OCR Configurable  1.0
 Todo Clases Namespaces Archivos Funciones Variables
OcrInitAsyncTask.java
Ir a la documentación de este archivo.
00001 /*
00002  * Copyright 2011 Robert Theis
00003  * Copyright 2012 Jaime Navarro Santapau
00004  *
00005  * Licensed under the Apache License, Version 2.0 (the "License");
00006  * you may not use this file except in compliance with the License.
00007  * You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00025 package edu.uoc.ocr;
00026 
00027 import java.io.BufferedInputStream;
00028 import java.io.BufferedOutputStream;
00029 import java.io.File;
00030 import java.io.FileInputStream;
00031 import java.io.FileNotFoundException;
00032 import java.io.FileOutputStream;
00033 import java.io.IOException;
00034 import java.io.InputStream;
00035 import java.io.OutputStream;
00036 import java.io.RandomAccessFile;
00037 import java.net.HttpURLConnection;
00038 import java.net.MalformedURLException;
00039 import java.net.URL;
00040 import java.util.zip.GZIPInputStream;
00041 import java.util.zip.ZipEntry;
00042 import java.util.zip.ZipInputStream;
00043 
00044 import org.xeustechnologies.jtar.TarEntry;
00045 import org.xeustechnologies.jtar.TarInputStream;
00046 
00047 import com.googlecode.tesseract.android.TessBaseAPI;
00048 
00049 import android.app.ProgressDialog;
00050 import android.content.Context;
00051 import android.os.AsyncTask;
00052 import android.util.Log;
00053 
00060 final class OcrInitAsyncTask extends AsyncTask<String, String, Boolean> {
00061         private static final String TAG = OcrInitAsyncTask.class.getSimpleName();
00062 
00063         private CaptureActivityForOcr activity;
00064         private Context context;
00065         private TessBaseAPI baseApi;
00066         private ProgressDialog dialog;
00067         private ProgressDialog indeterminateDialog;
00068         private final String languageCode;
00069         private String languageName;
00070 
00088         OcrInitAsyncTask(CaptureActivityForOcr activity, TessBaseAPI baseApi,
00089                         ProgressDialog dialog, ProgressDialog indeterminateDialog,
00090                         String languageCode, String languageName) {
00091                 this.activity = activity;
00092                 this.context = activity.getBaseContext();
00093                 this.baseApi = baseApi;
00094                 this.dialog = dialog;
00095                 this.indeterminateDialog = indeterminateDialog;
00096                 this.languageCode = languageCode;
00097                 this.languageName = languageName;
00098         }
00099 
00100         @Override
00101         protected void onPreExecute() {
00102                 super.onPreExecute();
00103                 dialog.setTitle(context.getString(R.string.dialogo_1));
00104                 dialog.setMessage(context.getString(R.string.dialogo_12));
00105                 dialog.setIndeterminate(false);
00106                 dialog.setProgressStyle(ProgressDialog.STYLE_HORIZONTAL);
00107                 dialog.setCancelable(false);
00108                 dialog.show();
00109                 activity.setButtonVisibility(false);
00110         }
00111 
00120         protected Boolean doInBackground(String... params) {
00121                 // Check whether we need Cube data or Tesseract data.
00122                 // Example Cube data filename: "tesseract-ocr-3.01.eng.tar"
00123                 // Example Tesseract data filename: "eng.traineddata"
00124                 String destinationFilenameBase = languageCode + ".traineddata";
00125 
00126                 // Check for, and create if necessary, folder to hold model data
00127                 String destinationDirBase = params[0]; // The storage directory, minus
00128                                                                                                 // the
00129                                                                                                 // "tessdata" subdirectory
00130                 File tessdataDir = new File(destinationDirBase + File.separator
00131                                 + "tessdata");
00132                 if (!tessdataDir.exists() && !tessdataDir.mkdirs()) {
00133                         Log.e(TAG, "Couldn't make directory " + tessdataDir);
00134                         return false;
00135                 }
00136 
00137                 // Create a reference to the file to save the download in
00138                 File downloadFile = new File(tessdataDir, destinationFilenameBase);
00139 
00140                 // Check if an incomplete download is present. If a *.download file is
00141                 // there, delete it and
00142                 // any (possibly half-unzipped) Tesseract and Cube data files that may
00143                 // be there.
00144                 File incomplete = new File(tessdataDir, destinationFilenameBase
00145                                 + ".download");
00146                 File tesseractTestFile = new File(tessdataDir, languageCode
00147                                 + ".traineddata");
00148                 if (incomplete.exists()) {
00149                         incomplete.delete();
00150                         if (tesseractTestFile.exists()) {
00151                                 tesseractTestFile.delete();
00152                         }
00153                 }
00154 
00155                 // If language data files are not present, install them
00156                 boolean installSuccess = false;
00157                 if (!tesseractTestFile.exists()) {
00158                         Log.d(TAG, "Language data for " + languageCode + " not found in "
00159                                         + tessdataDir.toString());
00160 
00161                         // Check assets for language data to install. If not present,
00162                         // download from Internet
00163                         try {
00164                                 Log.d(TAG, "Checking for language data ("
00165                                                 + destinationFilenameBase
00166                                                 + ".zip) in application assets...");
00167                                 // Check for a file like "eng.traineddata.zip" or
00168                                 // "tesseract-ocr-3.01.eng.tar.zip"
00169                                 installSuccess = installFromAssets(destinationFilenameBase
00170                                                 + ".zip", tessdataDir, downloadFile);
00171                         } catch (IOException e) {
00172                                 Log.e(TAG, "IOException", e);
00173                         } catch (Exception e) {
00174                                 Log.e(TAG, "Got exception", e);
00175                         }
00176 
00177                         if (!installSuccess) {
00178                                 // File was not packaged in assets, so download it
00179                                 Log.d(TAG, "Downloading " + destinationFilenameBase + ".gz...");
00180                                 try {
00181                                         installSuccess = downloadFile(destinationFilenameBase,
00182                                                         downloadFile);
00183                                         if (!installSuccess) {
00184                                                 Log.e(TAG, "Download failed");
00185                                                 return false;
00186                                         }
00187                                 } catch (IOException e) {
00188                                         Log.e(TAG,
00189                                                         "IOException received in doInBackground. Is a network connection available?");
00190                                         return false;
00191                                 }
00192                         }
00193 
00194                         // If we have a tar file at this point because we downloaded v3.01+
00195                         // data, untar it
00196                         String extension = destinationFilenameBase.substring(
00197                                         destinationFilenameBase.lastIndexOf('.'),
00198                                         destinationFilenameBase.length());
00199                         if (extension.equals(".tar")) {
00200                                 try {
00201                                         untar(new File(tessdataDir.toString() + File.separator
00202                                                         + destinationFilenameBase), tessdataDir);
00203                                         installSuccess = true;
00204                                 } catch (IOException e) {
00205                                         Log.e(TAG, "Untar failed");
00206                                         return false;
00207                                 }
00208                         }
00209 
00210                 } else {
00211                         Log.d(TAG, "Language data for " + languageCode
00212                                         + " already installed in " + tessdataDir.toString());
00213                         installSuccess = true;
00214                 }
00215 
00216                 // If OSD data file is not present, download it
00217                 File osdFile = new File(tessdataDir, CaptureActivityForOcr.OSD_FILENAME_BASE);
00218                 boolean osdInstallSuccess = false;
00219                 if (!osdFile.exists()) {
00220                         // Check assets for language data to install. If not present,
00221                         // download from Internet
00222                         languageName = context.getString(R.string.dialogo_13);
00223                         try {
00224                                 // Check for, and delete, partially-downloaded OSD files
00225                                 String[] badFiles = {
00226                                                 CaptureActivityForOcr.OSD_FILENAME + ".gz.download",
00227                                                 CaptureActivityForOcr.OSD_FILENAME + ".gz",
00228                                                 CaptureActivityForOcr.OSD_FILENAME };
00229                                 for (String filename : badFiles) {
00230                                         File file = new File(tessdataDir, filename);
00231                                         if (file.exists()) {
00232                                                 file.delete();
00233                                         }
00234                                 }
00235 
00236                                 Log.d(TAG, "Checking for OSD data ("
00237                                                 + CaptureActivityForOcr.OSD_FILENAME_BASE
00238                                                 + ".zip) in application assets...");
00239                                 // Check for "osd.traineddata.zip"
00240                                 osdInstallSuccess = installFromAssets(
00241                                                 CaptureActivityForOcr.OSD_FILENAME_BASE + ".zip",
00242                                                 tessdataDir, new File(CaptureActivityForOcr.OSD_FILENAME));
00243                         } catch (IOException e) {
00244                                 Log.e(TAG, "IOException", e);
00245                         } catch (Exception e) {
00246                                 Log.e(TAG, "Got exception", e);
00247                         }
00248 
00249                         if (!osdInstallSuccess) {
00250                                 // File was not packaged in assets, so download it
00251                                 Log.d(TAG, "Downloading " + CaptureActivityForOcr.OSD_FILENAME
00252                                                 + ".gz...");
00253                                 try {
00254                                         osdInstallSuccess = downloadFile(
00255                                                         CaptureActivityForOcr.OSD_FILENAME, new File(tessdataDir,
00256                                                                         CaptureActivityForOcr.OSD_FILENAME));
00257                                         if (!osdInstallSuccess) {
00258                                                 Log.e(TAG, "Download failed");
00259                                                 return false;
00260                                         }
00261                                 } catch (IOException e) {
00262                                         Log.e(TAG,
00263                                                         "IOException received in doInBackground. Is a network connection available?");
00264                                         return false;
00265                                 }
00266                         }
00267 
00268                         // Untar the OSD tar file
00269                         try {
00270                                 untar(new File(tessdataDir.toString() + File.separator
00271                                                 + CaptureActivityForOcr.OSD_FILENAME), tessdataDir);
00272                         } catch (IOException e) {
00273                                 Log.e(TAG, "Untar failed");
00274                                 return false;
00275                         }
00276 
00277                 } else {
00278                         Log.d(TAG, "OSD file already present in " + tessdataDir.toString());
00279                         osdInstallSuccess = true;
00280                 }
00281 
00282                 // Dismiss the progress dialog box, revealing the indeterminate dialog
00283                 // box behind it
00284                 try {
00285                         dialog.dismiss();
00286                 } catch (IllegalArgumentException e) {
00287                         // Catch "View not attached to window manager" error, and continue
00288                 }
00289 
00290                 // Initialize the OCR engine
00291                 if (baseApi.init(destinationDirBase + File.separator, languageCode)) {
00292                         return installSuccess && osdInstallSuccess;
00293                 }
00294                 return false;
00295         }
00296 
00310         private boolean downloadFile(String sourceFilenameBase, File destinationFile)
00311                         throws IOException {
00312                 try {
00313                         return downloadGzippedFileHttp(
00314                                         new URL(CaptureActivityForOcr.DOWNLOAD_BASE + sourceFilenameBase
00315                                                         + ".gz"), destinationFile);
00316                 } catch (MalformedURLException e) {
00317                         throw new IllegalArgumentException("Bad URL string.");
00318                 }
00319         }
00320 
00334         private boolean downloadGzippedFileHttp(URL url, File destinationFile)
00335                         throws IOException {
00336                 // Send an HTTP GET request for the file
00337                 Log.d(TAG, "Sending GET request to " + url + "...");
00338                 publishProgress(context.getString(R.string.dialogo_3) + " " + languageName + "...", "0");
00339                 HttpURLConnection urlConnection = null;
00340                 urlConnection = (HttpURLConnection) url.openConnection();
00341                 urlConnection.setAllowUserInteraction(false);
00342                 urlConnection.setInstanceFollowRedirects(true);
00343                 urlConnection.setRequestMethod("GET");
00344                 urlConnection.connect();
00345                 if (urlConnection.getResponseCode() != HttpURLConnection.HTTP_OK) {
00346                         Log.e(TAG, "Did not get HTTP_OK response.");
00347                         Log.e(TAG, "Response code: " + urlConnection.getResponseCode());
00348                         Log.e(TAG, "Response message: "
00349                                         + urlConnection.getResponseMessage().toString());
00350                         return false;
00351                 }
00352                 int fileSize = urlConnection.getContentLength();
00353                 InputStream inputStream = urlConnection.getInputStream();
00354                 File tempFile = new File(destinationFile.toString() + ".gz.download");
00355 
00356                 // Stream the file contents to a local file temporarily
00357                 Log.d(TAG, "Streaming download to " + destinationFile.toString()
00358                                 + ".gz.download...");
00359                 final int BUFFER = 8192;
00360                 FileOutputStream fileOutputStream = null;
00361                 Integer percentComplete;
00362                 int percentCompleteLast = 0;
00363                 try {
00364                         fileOutputStream = new FileOutputStream(tempFile);
00365                 } catch (FileNotFoundException e) {
00366                         Log.e(TAG, "Exception received when opening FileOutputStream.", e);
00367                 }
00368                 int downloaded = 0;
00369                 byte[] buffer = new byte[BUFFER];
00370                 int bufferLength = 0;
00371                 while ((bufferLength = inputStream.read(buffer, 0, BUFFER)) > 0) {
00372                         fileOutputStream.write(buffer, 0, bufferLength);
00373                         downloaded += bufferLength;
00374                         percentComplete = (int) ((downloaded / (float) fileSize) * 100);
00375                         if (percentComplete > percentCompleteLast) {
00376                                 publishProgress(context.getString(R.string.dialogo_3) + " " + languageName
00377                                                 + "...", percentComplete.toString());
00378                                 percentCompleteLast = percentComplete;
00379                         }
00380                 }
00381                 fileOutputStream.close();
00382                 if (urlConnection != null) {
00383                         urlConnection.disconnect();
00384                 }
00385 
00386                 // Uncompress the downloaded temporary file into place, and remove the
00387                 // temporary file
00388                 try {
00389                         Log.d(TAG, "Unzipping...");
00390                         gunzip(tempFile,
00391                                         new File(tempFile.toString().replace(".gz.download", "")));
00392                         return true;
00393                 } catch (FileNotFoundException e) {
00394                         Log.e(TAG, "File not available for unzipping.");
00395                 } catch (IOException e) {
00396                         Log.e(TAG, "Problem unzipping file.");
00397                 }
00398                 return false;
00399         }
00400 
00412         private void gunzip(File zippedFile, File outFilePath)
00413                         throws FileNotFoundException, IOException {
00414                 int uncompressedFileSize = getGzipSizeUncompressed(zippedFile);
00415                 Integer percentComplete;
00416                 int percentCompleteLast = 0;
00417                 int unzippedBytes = 0;
00418                 final Integer progressMin = 0;
00419                 int progressMax = 100 - progressMin;
00420                 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00421                                 progressMin.toString());
00422 
00423                 // If the file is a tar file, just show progress to 50%
00424                 String extension = zippedFile.toString().substring(
00425                                 zippedFile.toString().length() - 16);
00426                 if (extension.equals(".tar.gz.download")) {
00427                         progressMax = 50;
00428                 }
00429                 GZIPInputStream gzipInputStream = new GZIPInputStream(
00430                                 new BufferedInputStream(new FileInputStream(zippedFile)));
00431                 OutputStream outputStream = new FileOutputStream(outFilePath);
00432                 BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00433                                 outputStream);
00434 
00435                 final int BUFFER = 8192;
00436                 byte[] data = new byte[BUFFER];
00437                 int len;
00438                 while ((len = gzipInputStream.read(data, 0, BUFFER)) > 0) {
00439                         bufferedOutputStream.write(data, 0, len);
00440                         unzippedBytes += len;
00441                         percentComplete = (int) ((unzippedBytes / (float) uncompressedFileSize) * progressMax)
00442                                         + progressMin;
00443 
00444                         if (percentComplete > percentCompleteLast) {
00445                                 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName
00446                                                 + "...", percentComplete.toString());
00447                                 percentCompleteLast = percentComplete;
00448                         }
00449                 }
00450                 gzipInputStream.close();
00451                 bufferedOutputStream.flush();
00452                 bufferedOutputStream.close();
00453 
00454                 if (zippedFile.exists()) {
00455                         zippedFile.delete();
00456                 }
00457         }
00458 
00468         private int getGzipSizeUncompressed(File zipFile) throws IOException {
00469                 RandomAccessFile raf = new RandomAccessFile(zipFile, "r");
00470                 raf.seek(raf.length() - 4);
00471                 int b4 = raf.read();
00472                 int b3 = raf.read();
00473                 int b2 = raf.read();
00474                 int b1 = raf.read();
00475                 raf.close();
00476                 return (b1 << 24) | (b2 << 16) + (b3 << 8) + b4;
00477         }
00478 
00492         private void untar(File tarFile, File destinationDir) throws IOException {
00493                 Log.d(TAG, "Untarring...");
00494                 final int uncompressedSize = getTarSizeUncompressed(tarFile);
00495                 Integer percentComplete;
00496                 int percentCompleteLast = 0;
00497                 int unzippedBytes = 0;
00498                 final Integer progressMin = 50;
00499                 final int progressMax = 100 - progressMin;
00500                 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00501                                 progressMin.toString());
00502 
00503                 // Extract all the files
00504                 TarInputStream tarInputStream = new TarInputStream(
00505                                 new BufferedInputStream(new FileInputStream(tarFile)));
00506                 TarEntry entry;
00507                 while ((entry = tarInputStream.getNextEntry()) != null) {
00508                         int len;
00509                         final int BUFFER = 8192;
00510                         byte data[] = new byte[BUFFER];
00511                         String pathName = entry.getName();
00512                         String fileName = pathName.substring(pathName.lastIndexOf('/'),
00513                                         pathName.length());
00514                         OutputStream outputStream = new FileOutputStream(destinationDir
00515                                         + fileName);
00516                         BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00517                                         outputStream);
00518 
00519                         Log.d(TAG, "Writing " + fileName.substring(1, fileName.length())
00520                                         + "...");
00521                         while ((len = tarInputStream.read(data, 0, BUFFER)) != -1) {
00522                                 bufferedOutputStream.write(data, 0, len);
00523                                 unzippedBytes += len;
00524                                 percentComplete = (int) ((unzippedBytes / (float) uncompressedSize) * progressMax)
00525                                                 + progressMin;
00526                                 if (percentComplete > percentCompleteLast) {
00527                                         publishProgress(context.getString(R.string.dialogo_7) + " "
00528                                                         + languageName + "...", percentComplete.toString());
00529                                         percentCompleteLast = percentComplete;
00530                                 }
00531                         }
00532                         bufferedOutputStream.flush();
00533                         bufferedOutputStream.close();
00534                 }
00535                 tarInputStream.close();
00536 
00537                 if (tarFile.exists()) {
00538                         tarFile.delete();
00539                 }
00540         }
00541 
00550         private int getTarSizeUncompressed(File tarFile) throws IOException {
00551                 int size = 0;
00552                 TarInputStream tis = new TarInputStream(new BufferedInputStream(
00553                                 new FileInputStream(tarFile)));
00554                 TarEntry entry;
00555                 while ((entry = tis.getNextEntry()) != null) {
00556                         if (!entry.isDirectory()) {
00557                                 size += entry.getSize();
00558                         }
00559                 }
00560                 return size;
00561         }
00562 
00576         private boolean installFromAssets(String sourceFilename, File modelRoot,
00577                         File destinationFile) throws IOException {
00578                 String extension = sourceFilename.substring(
00579                                 sourceFilename.lastIndexOf('.'), sourceFilename.length());
00580                 try {
00581                         if (extension.equals(".zip")) {
00582                                 return installZipFromAssets(sourceFilename, modelRoot,
00583                                                 destinationFile);
00584                         } else {
00585                                 throw new IllegalArgumentException("Extension " + extension
00586                                                 + " is unsupported.");
00587                         }
00588                 } catch (FileNotFoundException e) {
00589                         Log.d(TAG, "Language not packaged in application assets.");
00590                 }
00591                 return false;
00592         }
00593 
00608         private boolean installZipFromAssets(String sourceFilename,
00609                         File destinationDir, File destinationFile) throws IOException,
00610                         FileNotFoundException {
00611                 // Attempt to open the zip archive
00612                 publishProgress(context.getString(R.string.dialogo_7) + " " + languageName + "...",
00613                                 "0");
00614                 ZipInputStream inputStream = new ZipInputStream(context.getAssets()
00615                                 .open(sourceFilename));
00616 
00617                 // Loop through all the files and folders in the zip archive (but there
00618                 // should just be one)
00619                 for (ZipEntry entry = inputStream.getNextEntry(); entry != null; entry = inputStream
00620                                 .getNextEntry()) {
00621                         destinationFile = new File(destinationDir, entry.getName());
00622 
00623                         if (entry.isDirectory()) {
00624                                 destinationFile.mkdirs();
00625                         } else {
00626                                 // Note getSize() returns -1 when the zipfile does not have the
00627                                 // size set
00628                                 long zippedFileSize = entry.getSize();
00629 
00630                                 // Create a file output stream
00631                                 FileOutputStream outputStream = new FileOutputStream(
00632                                                 destinationFile);
00633                                 final int BUFFER = 8192;
00634 
00635                                 // Buffer the output to the file
00636                                 BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(
00637                                                 outputStream, BUFFER);
00638                                 int unzippedSize = 0;
00639 
00640                                 // Write the contents
00641                                 int count = 0;
00642                                 Integer percentComplete = 0;
00643                                 Integer percentCompleteLast = 0;
00644                                 byte[] data = new byte[BUFFER];
00645                                 while ((count = inputStream.read(data, 0, BUFFER)) != -1) {
00646                                         bufferedOutputStream.write(data, 0, count);
00647                                         unzippedSize += count;
00648                                         percentComplete = (int) ((unzippedSize / (long) zippedFileSize) * 100);
00649                                         if (percentComplete > percentCompleteLast) {
00650                                                 publishProgress(context.getString(R.string.dialogo_7) + " "
00651                                                                 + languageName + "...",
00652                                                                 percentComplete.toString(), "0");
00653                                                 percentCompleteLast = percentComplete;
00654                                         }
00655                                 }
00656                                 bufferedOutputStream.close();
00657                         }
00658                         inputStream.closeEntry();
00659                 }
00660                 inputStream.close();
00661                 return true;
00662         }
00663 
00673         @Override
00674         protected void onProgressUpdate(String... message) {
00675                 super.onProgressUpdate(message);
00676                 int percentComplete = 0;
00677 
00678                 percentComplete = Integer.parseInt(message[1]);
00679                 dialog.setMessage(message[0]);
00680                 dialog.setProgress(percentComplete);
00681                 dialog.show();
00682         }
00683 
00684         @Override
00685         protected void onPostExecute(Boolean result) {
00686                 super.onPostExecute(result);
00687 
00688                 try {
00689                         indeterminateDialog.dismiss();
00690                 } catch (IllegalArgumentException e) {
00691                         // Catch "View not attached to window manager" error, and continue
00692                 }
00693 
00694                 if (result) {
00695                         // Restart recognition
00696                         activity.resumeOCR();
00697                         activity.showLanguageName();
00698                 } else {
00699                         activity.showErrorMessage(
00700                                         context.getString(R.string.dialogo_10),
00701                                         context.getString(R.string.dialogo_11));
00702                 }
00703         }
00704 }
 Todo Clases Namespaces Archivos Funciones Variables