00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00025 package edu.uoc.ocr;
00026
00027 import java.io.BufferedInputStream;
00028 import java.io.BufferedOutputStream;
00029 import java.io.File;
00030 import java.io.FileInputStream;
00031 import java.io.FileNotFoundException;
00032 import java.io.FileOutputStream;
00033 import java.io.IOException;
00034 import java.io.InputStream;
00035 import java.io.OutputStream;
00036 import java.io.RandomAccessFile;
00037 import java.net.HttpURLConnection;
00038 import java.net.MalformedURLException;
00039 import java.net.URL;
00040 import java.util.zip.GZIPInputStream;
00041 import java.util.zip.ZipEntry;
00042 import java.util.zip.ZipInputStream;
00043
00044 import org.xeustechnologies.jtar.TarEntry;
00045 import org.xeustechnologies.jtar.TarInputStream;
00046
00047 import com.googlecode.tesseract.android.TessBaseAPI;
00048
00049 import android.app.ProgressDialog;
00050 import android.content.Context;
00051 import android.os.AsyncTask;
00052 import android.util.Log;
00053
00060 final class OcrInitAsyncTask extends AsyncTask<String, String, Boolean> {
00061 private static final String TAG = OcrInitAsyncTask.class.getSimpleName();
00062
00063 private CaptureActivityForOcr activity;
00064 private Context context;
00065 private TessBaseAPI baseApi;
00066 private ProgressDialog dialog;
00067 private ProgressDialog indeterminateDialog;
00068 private final String languageCode;
00069 private String languageName;
00070
00088 OcrInitAsyncTask(CaptureActivityForOcr activity, TessBaseAPI baseApi,
00089 ProgressDialog dialog, ProgressDialog indeterminateDialog,
00090 String languageCode, String languageName) {
00091 this.activity = activity;
00092 this.context = activity.getBaseContext();
00093 this.baseApi = baseApi;
00094 this.dialog = dialog;
00095 this.indeterminateDialog = indeterminateDialog;
00096 this.languageCode = languageCode;
00097 this.languageName = languageName;
00098 }
00099
00100 @Override
00101 protected void onPreExecute() {
00102 super.onPreExecute();
00103 dialog.setTitle(context.getString(R.string.dialogo_1));
00104 dialog.setMessage(context.getString(R.string.dialogo_12));
00105 dialog.setIndeterminate(false);
00106 dialog.setProgressStyle(ProgressDialog.STYLE_HORIZONTAL);
00107 dialog.setCancelable(false);
00108 dialog.show();
00109 activity.setButtonVisibility(false);
00110 }
00111
00120 protected Boolean doInBackground(String... params) {
00121
00122
00123
00124 String destinationFilenameBase = languageCode + ".traineddata";
00125
00126
00127 String destinationDirBase = params[0];
00128
00129
00130 File tessdataDir = new File(destinationDirBase + File.separator
00131 + "tessdata");
00132 if (!tessdataDir.exists() && !tessdataDir.mkdirs()) {
00133 Log.e(TAG, "Couldn't make directory " + tessdataDir);
00134 return false;
00135 }
00136
00137
00138 File downloadFile = new File(tessdataDir, destinationFilenameBase);
00139
00140
00141
00142
00143
00144 File incomplete = new File(tessdataDir, destinationFilenameBase
00145 + ".download");
00146 File tesseractTestFile = new File(tessdataDir, languageCode
00147 + ".traineddata");
00148 if (incomplete.exists()) {
00149 incomplete.delete();
00150 if (tesseractTestFile.exists()) {
00151 tesseractTestFile.delete();
00152 }
00153 }
00154
00155
00156 boolean installSuccess = false;
00157 if (!tesseractTestFile.exists()) {
00158 Log.d(TAG, "Language data for " + languageCode + " not found in "
00159 + tessdataDir.toString());
00160
00161
00162
00163 try {
00164 Log.d(TAG, "Checking for language data ("
00165 + destinationFilenameBase
00166 + ".zip) in application assets...");
00167
00168
00169 installSuccess = installFromAssets(destinationFilenameBase
00170 + ".zip", tessdataDir, downloadFile);
00171 } catch (IOException e) {
00172 Log.e(TAG, "IOException", e);
00173 } catch (Exception e) {
00174 Log.e(TAG, "Got exception", e);
00175 }
00176
00177 if (!installSuccess) {
00178
00179 Log.d(TAG, "Downloading " + destinationFilenameBase + ".gz...");
00180 try {
00181 installSuccess = downloadFile(destinationFilenameBase,
00182 downloadFile);
00183 if (!installSuccess) {
00184 Log.e(TAG, "Download failed");
00185 return false;
00186 }
00187 } catch (IOException e) {
00188 Log.e(TAG,
00189 "IOException received in doInBackground. Is a network connection available?");
00190 return false;
00191 }
00192 }
00193
00194
00195
00196 String extension = destinationFilenameBase.substring(
00197 destinationFilenameBase.lastIndexOf('.'),
00198 destinationFilenameBase.length());
00199 if (extension.equals(".tar")) {
00200 try {
00201 untar(new File(tessdataDir.toString() + File.separator
00202 + destinationFilenameBase), tessdataDir);
00203 installSuccess = true;
00204 } catch (IOException e) {
00205 Log.e(TAG, "Untar failed");
00206 return false;
00207 }
00208 }
00209
00210 } else {
00211 Log.d(TAG, "Language data for " + languageCode
00212 + " already installed in " + tessdataDir.toString());
00213 installSuccess = true;
00214 }
00215
00216
00217 File osdFile = new File(tessdataDir, CaptureActivityForOcr.OSD_FILENAME_BASE);
00218 boolean osdInstallSuccess = false;
00219 if (!osdFile.exists()) {
00220
00221
00222 languageName = context.getString(R.string.dialogo_13);
00223 try {
00224
00225 String[] badFiles = {
00226 CaptureActivityForOcr.OSD_FILENAME + ".gz.download",
00227 CaptureActivityForOcr.OSD_FILENAME + ".gz",
00228 CaptureActivityForOcr.OSD_FILENAME };
00229 for (String filename : badFiles) {
00230 File file = new File(tessdataDir, filename);
00231 if (file.exists()) {
00232 file.delete();
00233 }
00234 }
00235
00236 Log.d(TAG, "Checking for OSD data ("
00237 + CaptureActivityForOcr.OSD_FILENAME_BASE
00238 + ".zip) in application assets...");
00239
00240 osdInstallSuccess = installFromAssets(
00241 CaptureActivityForOcr.OSD_FILENAME_BASE + ".zip",
00242 tessdataDir, new File(CaptureActivityForOcr.OSD_FILENAME));
00243 } catch (IOException e) {
00244 Log.e(TAG, "IOException", e);
00245 } catch (Exception e) {
00246 Log.e(TAG, "Got exception", e);
00247 }
00248
00249 if (!osdInstallSuccess) {
00250
00251 Log.d(TAG, "Downloading " + CaptureActivityForOcr.OSD_FILENAME
00252 + ".gz...");
00253 try {
00254 osdInstallSuccess = downloadFile(
00255 CaptureActivityForOcr.OSD_FILENAME, new File(tessdataDir,
00256 CaptureActivityForOcr.OSD_FILENAME));
00257 if (!osdInstallSuccess) {
00258 Log.e(TAG, "Download failed");
00259 return false;
00260 }
00261 } catch (IOException e) {
00262 Log.e(TAG,
00263 "IOException received in doInBackground. Is a network connection available?");
00264 return false;
00265 }
00266 }
00267
00268
00269 try {
00270 untar(new File(tessdataDir.toString() + File.separator
00271 + CaptureActivityForOcr.OSD_FILENAME), tessdataDir);
00272 } catch (IOException e) {
00273 Log.e(TAG, "Untar failed");
00274 return false;
00275 }
00276
00277 } else {
00278 Log.d(TAG, "OSD file already present in " + tessdataDir.toString());
00279 osdInstallSuccess = true;
00280 }
00281
00282
00283
00284 try {
00285 dialog.dismiss();
00286 } catch (IllegalArgumentException e) {
00287
00288 }
00289
00290
00291 if (baseApi.init(destinationDirBase + File.separator, languageCode)) {
00292 return installSuccess && osdInstallSuccess;
00293 }
00294 return false;
00295 }
00296
00310 private boolean downloadFile(String sourceFilenameBase, File destinationFile)
00311 throws IOException {
00312 try {
00313 return downloadGzippedFileHttp(
00314 new URL(CaptureActivityForOcr.DOWNLOAD_BASE + sourceFilenameBase
00315 + ".gz"), destinationFile);
00316 } catch (MalformedURLException e) {
00317 throw new IllegalArgumentException("Bad URL string.");
00318 }
00319 }
00320
00334 private boolean downloadGzippedFileHttp(URL url, File destinationFile)
00335 throws IOException {
00336
00337 Log.d(TAG, "Sending GET request to " + url + "...");
00338 publishProgress(context.getString(R.string.dialogo_3) + " " + languageName + "...", "0");
00339 HttpURLConnection urlConnection = null;
00340 urlConnection = (HttpURLConnection) url.openConnection();
00341 urlConnection.setAllowUserInteraction(false);
00342 urlConnection.setInstanceFollowRedirects(true);
00343 urlConnection.setRequestMethod("GET");
00344 urlConnection.connect();
00345 if (urlConnection.getResponseCode() != HttpURLConnection.HTTP_OK) {
00346 Log.e(TAG, "Did not get HTTP_OK response.");
00347 Log.e(TAG, "Response code: " + urlConnection.getResponseCode());
00348 Log.e(TAG, "Response message: "
00349 + urlConnection.getResponseMessage().toString());
00350 return false;
00351 }
00352 int fileSize = urlConnection.getContentLength();
00353 InputStream inputStream = urlConnection.getInputStream();
00354 File tempFile = new File(destinationFile.toString() + ".gz.download");
00355
00356
00357 Log.d(TAG, "Streaming download to " + destinationFile.toString()
00358 + ".gz.download...");
00359 final int BUFFER = 8192;
00360 FileOutputStream fileOutputStream = null;
00361 Integer percentComplete;
00362 int percentCompleteLast = 0;
00363 try {
00364 fileOutputStream = new FileOutputStream(tempFile);
00365 } catch (FileNotFoundException e) {
00366 Log.e(TAG, "Exception received when opening FileOutputStream.", e);
00367 }
00368 int downloaded = 0;
00369 byte[] buffer = new byte[BUFFER];
00370 int bufferLength = 0;
00371 while ((bufferLength = inputStream.read(buffer, 0, BUFFER)) > 0) {
00372 fileOutputStream.write(buffer, 0, bufferLength);
00373 downloaded += bufferLength;
00374 percentComplete = (int) ((downloaded / (float) fileSize) * 100);
00375 if (percentComplete > percentCompleteLast) {
00376 publishProgress(context.getString(R.string.dialogo_3) + " " + languageName
00377 + "...", percentComplete.toString());
00378 percentCompleteLast = percentComplete;
00379 }
00380 }
00381 fileOutputStream.close();
00382 if (urlConnection != null) {
00383 urlConnection.disconnect();
00384 }
00385
00386
00387
00388 try {
00389 Log.d(TAG, "Unzipping...");
00390 gunzip(tempFile,
00391 new File(tempFile.toString().replace(".gz.download", "")));
00392 return true;
00393 } catch (FileNotFoundException e) {
00394 Log.e(TAG, "File not available for unzipping.");
00395 } catch (IOException e) {
00396 Log.e(TAG, "Problem unzipping file.");
00397 }
00398 return false;
00399 }
00400
00412 private void gunzip(File zippedFile, File outFilePath)
00413 throws FileNotFoundException, IOException {
00414 int uncompressedFileSize = getGzipSizeUncompressed(zippedFile);
00415 Integer percentComplete;
00416 int percentCompleteLast = 0;
00417 int unzippedBytes = 0;
00418 final Integer progressMin = 0;
00419 int progressMax = 100 - progressMin;
00420 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00421 progressMin.toString());
00422
00423
00424 String extension = zippedFile.toString().substring(
00425 zippedFile.toString().length() - 16);
00426 if (extension.equals(".tar.gz.download")) {
00427 progressMax = 50;
00428 }
00429 GZIPInputStream gzipInputStream = new GZIPInputStream(
00430 new BufferedInputStream(new FileInputStream(zippedFile)));
00431 OutputStream outputStream = new FileOutputStream(outFilePath);
00432 BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00433 outputStream);
00434
00435 final int BUFFER = 8192;
00436 byte[] data = new byte[BUFFER];
00437 int len;
00438 while ((len = gzipInputStream.read(data, 0, BUFFER)) > 0) {
00439 bufferedOutputStream.write(data, 0, len);
00440 unzippedBytes += len;
00441 percentComplete = (int) ((unzippedBytes / (float) uncompressedFileSize) * progressMax)
00442 + progressMin;
00443
00444 if (percentComplete > percentCompleteLast) {
00445 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName
00446 + "...", percentComplete.toString());
00447 percentCompleteLast = percentComplete;
00448 }
00449 }
00450 gzipInputStream.close();
00451 bufferedOutputStream.flush();
00452 bufferedOutputStream.close();
00453
00454 if (zippedFile.exists()) {
00455 zippedFile.delete();
00456 }
00457 }
00458
00468 private int getGzipSizeUncompressed(File zipFile) throws IOException {
00469 RandomAccessFile raf = new RandomAccessFile(zipFile, "r");
00470 raf.seek(raf.length() - 4);
00471 int b4 = raf.read();
00472 int b3 = raf.read();
00473 int b2 = raf.read();
00474 int b1 = raf.read();
00475 raf.close();
00476 return (b1 << 24) | (b2 << 16) + (b3 << 8) + b4;
00477 }
00478
00492 private void untar(File tarFile, File destinationDir) throws IOException {
00493 Log.d(TAG, "Untarring...");
00494 final int uncompressedSize = getTarSizeUncompressed(tarFile);
00495 Integer percentComplete;
00496 int percentCompleteLast = 0;
00497 int unzippedBytes = 0;
00498 final Integer progressMin = 50;
00499 final int progressMax = 100 - progressMin;
00500 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00501 progressMin.toString());
00502
00503
00504 TarInputStream tarInputStream = new TarInputStream(
00505 new BufferedInputStream(new FileInputStream(tarFile)));
00506 TarEntry entry;
00507 while ((entry = tarInputStream.getNextEntry()) != null) {
00508 int len;
00509 final int BUFFER = 8192;
00510 byte data[] = new byte[BUFFER];
00511 String pathName = entry.getName();
00512 String fileName = pathName.substring(pathName.lastIndexOf('/'),
00513 pathName.length());
00514 OutputStream outputStream = new FileOutputStream(destinationDir
00515 + fileName);
00516 BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00517 outputStream);
00518
00519 Log.d(TAG, "Writing " + fileName.substring(1, fileName.length())
00520 + "...");
00521 while ((len = tarInputStream.read(data, 0, BUFFER)) != -1) {
00522 bufferedOutputStream.write(data, 0, len);
00523 unzippedBytes += len;
00524 percentComplete = (int) ((unzippedBytes / (float) uncompressedSize) * progressMax)
00525 + progressMin;
00526 if (percentComplete > percentCompleteLast) {
00527 publishProgress(context.getString(R.string.dialogo_7) + " "
00528 + languageName + "...", percentComplete.toString());
00529 percentCompleteLast = percentComplete;
00530 }
00531 }
00532 bufferedOutputStream.flush();
00533 bufferedOutputStream.close();
00534 }
00535 tarInputStream.close();
00536
00537 if (tarFile.exists()) {
00538 tarFile.delete();
00539 }
00540 }
00541
00550 private int getTarSizeUncompressed(File tarFile) throws IOException {
00551 int size = 0;
00552 TarInputStream tis = new TarInputStream(new BufferedInputStream(
00553 new FileInputStream(tarFile)));
00554 TarEntry entry;
00555 while ((entry = tis.getNextEntry()) != null) {
00556 if (!entry.isDirectory()) {
00557 size += entry.getSize();
00558 }
00559 }
00560 return size;
00561 }
00562
00576 private boolean installFromAssets(String sourceFilename, File modelRoot,
00577 File destinationFile) throws IOException {
00578 String extension = sourceFilename.substring(
00579 sourceFilename.lastIndexOf('.'), sourceFilename.length());
00580 try {
00581 if (extension.equals(".zip")) {
00582 return installZipFromAssets(sourceFilename, modelRoot,
00583 destinationFile);
00584 } else {
00585 throw new IllegalArgumentException("Extension " + extension
00586 + " is unsupported.");
00587 }
00588 } catch (FileNotFoundException e) {
00589 Log.d(TAG, "Language not packaged in application assets.");
00590 }
00591 return false;
00592 }
00593
00608 private boolean installZipFromAssets(String sourceFilename,
00609 File destinationDir, File destinationFile) throws IOException,
00610 FileNotFoundException {
00611
00612 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00613 "0");
00614 ZipInputStream inputStream = new ZipInputStream(context.getAssets()
00615 .open(sourceFilename));
00616
00617
00618
00619 for (ZipEntry entry = inputStream.getNextEntry(); entry != null; entry = inputStream
00620 .getNextEntry()) {
00621 destinationFile = new File(destinationDir, entry.getName());
00622
00623 if (entry.isDirectory()) {
00624 destinationFile.mkdirs();
00625 } else {
00626
00627
00628 long zippedFileSize = entry.getSize();
00629
00630
00631 FileOutputStream outputStream = new FileOutputStream(
00632 destinationFile);
00633 final int BUFFER = 8192;
00634
00635
00636 BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00637 outputStream, BUFFER);
00638 int unzippedSize = 0;
00639
00640
00641 int count = 0;
00642 Integer percentComplete = 0;
00643 Integer percentCompleteLast = 0;
00644 byte[] data = new byte[BUFFER];
00645 while ((count = inputStream.read(data, 0, BUFFER)) != -1) {
00646 bufferedOutputStream.write(data, 0, count);
00647 unzippedSize += count;
00648 percentComplete = (int) ((unzippedSize / (long) zippedFileSize) * 100);
00649 if (percentComplete > percentCompleteLast) {
00650 publishProgress(context.getString(R.string.dialogo_7) + " "
00651 + languageName + "...",
00652 percentComplete.toString(), "0");
00653 percentCompleteLast = percentComplete;
00654 }
00655 }
00656 bufferedOutputStream.close();
00657 }
00658 inputStream.closeEntry();
00659 }
00660 inputStream.close();
00661 return true;
00662 }
00663
00673 @Override
00674 protected void onProgressUpdate(String... message) {
00675 super.onProgressUpdate(message);
00676 int percentComplete = 0;
00677
00678 percentComplete = Integer.parseInt(message[1]);
00679 dialog.setMessage(message[0]);
00680 dialog.setProgress(percentComplete);
00681 dialog.show();
00682 }
00683
00684 @Override
00685 protected void onPostExecute(Boolean result) {
00686 super.onPostExecute(result);
00687
00688 try {
00689 indeterminateDialog.dismiss();
00690 } catch (IllegalArgumentException e) {
00691
00692 }
00693
00694 if (result) {
00695
00696 activity.resumeOCR();
00697 activity.showLanguageName();
00698 } else {
00699 activity.showErrorMessage(
00700 context.getString(R.string.dialogo_10),
00701 context.getString(R.string.dialogo_11));
00702 }
00703 }
00704 }