TessBaseAPI.java example

Explorer

android-anpr-master
- app
  - src
    - main
      - java
        com
        DVLA
        testapp
        app
        HttpRequest.java
        ImgProcess.java
        MainActivity.java
        OpenCV.java
        Texture.java
        boxLetter.java
        boxLetterComapre.java
        customCompare.java
        histCountBox.java
        vehRecView.java
        vehRecord.java
        googlecode
        leptonica
        android
        AdaptiveMap.java
        Binarize.java
        Box.java
        Constants.java
        Convert.java
        Enhance.java
        JpegIO.java
        Pix.java
        Pixa.java
        ReadFile.java
        Rotate.java
        Scale.java
        Skew.java
        WriteFile.java
        tesseract
        android
        PageIterator.java
        ResultIterator.java
        TessBaseAPI.java
- libraries
  - opencv
    - build
      - source
        aidl
        debug
        org
        opencv
        engine
        OpenCVEngineInterface.java
        release
        org
        opencv
        engine
        OpenCVEngineInterface.java
        buildConfig
        debug
        org
        opencv
        BuildConfig.java
        release
        org
        opencv
        BuildConfig.java
        r
        debug
        org
        opencv
        R.java
        release
        org
        opencv
        R.java
    - gen
      - org
        opencv
        BuildConfig.java
        Manifest.java
        R.java
        engine
        OpenCVEngineInterface.java
    - src
      - org
        opencv
        android
        AsyncServiceHelper.java
        BaseLoaderCallback.java
        CameraBridgeViewBase.java
        FpsMeter.java
        InstallCallbackInterface.java
        JavaCameraView.java
        LoaderCallbackInterface.java
        NativeCameraView.java
        OpenCVLoader.java
        StaticHelper.java
        Utils.java
        calib3d
        Calib3d.java
        StereoBM.java
        StereoSGBM.java
        contrib
        Contrib.java
        FaceRecognizer.java
        StereoVar.java
        core
        Algorithm.java
        Core.java
        CvException.java
        CvType.java
        Mat.java
        MatOfByte.java
        MatOfDMatch.java
        MatOfDouble.java
        MatOfFloat.java
        MatOfFloat4.java
        MatOfFloat6.java
        MatOfInt.java
        MatOfInt4.java
        MatOfKeyPoint.java
        MatOfPoint.java
        MatOfPoint2f.java
        MatOfPoint3.java
        MatOfPoint3f.java
        MatOfRect.java
        Point.java
        Point3.java
        Range.java
        Rect.java
        RotatedRect.java
        Scalar.java
        Size.java
        TermCriteria.java
        features2d
        DMatch.java
        DescriptorExtractor.java
        DescriptorMatcher.java
        FeatureDetector.java
        Features2d.java
        GenericDescriptorMatcher.java
        KeyPoint.java
        gpu
        DeviceInfo.java
        Gpu.java
        TargetArchs.java
        highgui
        Highgui.java
        VideoCapture.java
        imgproc
        CLAHE.java
        Imgproc.java
        Moments.java
        Subdiv2D.java
        ml
        CvANN_MLP.java
        CvANN_MLP_TrainParams.java
        CvBoost.java
        CvBoostParams.java
        CvDTree.java
        CvDTreeParams.java
        CvERTrees.java
        CvGBTrees.java
        CvGBTreesParams.java
        CvKNearest.java
        CvNormalBayesClassifier.java
        CvParamGrid.java
        CvRTParams.java
        CvRTrees.java
        CvSVM.java
        CvSVMParams.java
        CvStatModel.java
        EM.java
        Ml.java
        objdetect
        CascadeClassifier.java
        HOGDescriptor.java
        Objdetect.java
        photo
        Photo.java
        utils
        Converters.java
        video
        BackgroundSubtractor.java
        BackgroundSubtractorMOG.java
        BackgroundSubtractorMOG2.java
        KalmanFilter.java
        Video.java

/*
 * Copyright (C) 2011 Google Inc.
 * Copyright (C) 2011 Robert Theis
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.googlecode.tesseract.android;

import android.graphics.Bitmap;
import android.graphics.Rect;
import android.util.Log;

import com.googlecode.leptonica.android.Pixa;
import com.googlecode.leptonica.android.Pix;
import com.googlecode.leptonica.android.ReadFile;

import java.io.File;

/**
 * Java interface for the Tesseract OCR engine. Does not implement all available
 * JNI methods, but does implement enough to be useful. Comments are adapted
 * from original Tesseract source.
 * 
 * @author alanv@google.com (Alan Viverette)
 */
public class TessBaseAPI {
    /**
     * Used by the native implementation of the class.
     */
    private int mNativeData;

    static {
        System.loadLibrary("lept");
        System.loadLibrary("tess");

        nativeClassInit();
    }

    public static final class PageSegMode {
    	/** Orientation and script detection only. */
    	public static final int PSM_OSD_ONLY = 0;

    	/** Automatic page segmentation with orientation and script detection. (OSD) */
    	public static final int PSM_AUTO_OSD = 1;

    	/** Fully automatic page segmentation, but no OSD, or OCR. */
    	public static final int PSM_AUTO_ONLY = 2;

    	/** Fully automatic page segmentation, but no OSD. */
    	public static final int PSM_AUTO = 3;

    	/** Assume a single column of text of variable sizes. */
    	public static final int PSM_SINGLE_COLUMN = 4;

    	/** Assume a single uniform block of vertically aligned text. */
    	public static final int PSM_SINGLE_BLOCK_VERT_TEXT = 5;

    	/** Assume a single uniform block of text. (Default.) */
    	public static final int PSM_SINGLE_BLOCK = 6;

    	/** Treat the image as a single text line. */
    	public static final int PSM_SINGLE_LINE = 7;

    	/** Treat the image as a single word. */
    	public static final int PSM_SINGLE_WORD = 8;

    	/** Treat the image as a single word in a circle. */
    	public static final int PSM_CIRCLE_WORD = 9;

    	/** Treat the image as a single character. */
    	public static final int PSM_SINGLE_CHAR = 10;

    	/** Find as much text as possible in no particular order. */
    	public static final int PSM_SPARSE_TEXT = 11;

    	/** Sparse text with orientation and script detection. */
    	public static final int PSM_SPARSE_TEXT_OSD = 12;

    	/** Number of enum entries. */
    	public static final int PSM_COUNT = 13;
    }
    
    /** Whitelist of characters to recognize. */
    public static final String VAR_CHAR_WHITELIST = "tessedit_char_whitelist";

    /** Blacklist of characters to not recognize. */
    public static final String VAR_CHAR_BLACKLIST = "tessedit_char_blacklist";
    
    /** Run Tesseract only - fastest */
    public static final int OEM_TESSERACT_ONLY = 0;
    
    /** Run Cube only - better accuracy, but slower */
    public static final int OEM_CUBE_ONLY = 1;
    
    /** Run both and combine results - best accuracy */
    public static final int OEM_TESSERACT_CUBE_COMBINED = 2;
    
    /** Default OCR engine mode. */
    public static final int OEM_DEFAULT = 3;

    
    /**
     * Elements of the page hierarchy, used in {@link ResultIterator} to provide
     * functions that operate on each level without having to have 5x as many
     * functions.
     * <p>
     * NOTE: At present {@link #RIL_PARA} and {@link #RIL_BLOCK} are equivalent
     * as there is no paragraph internally yet.
     */
    public static final class PageIteratorLevel {
        /** Block of text/image/separator line. */
        public static final int RIL_BLOCK = 0;

        /** Paragraph within a block. */
        public static final int RIL_PARA = 1;

        /** Line within a paragraph. */
        public static final int RIL_TEXTLINE = 2;

        /** Word within a text line. */
        public static final int RIL_WORD = 3;

        /** Symbol/character within a word. */
        public static final int RIL_SYMBOL = 4;
    };
    
    /**
     * Constructs an instance of TessBaseAPI.
     */
    public TessBaseAPI() {
        nativeConstruct();
    }

    /**
     * Called by the GC to clean up the native data that we set up when we
     * construct the object.
     * 
     * Altered from original version to avoid a crash-causing bug in OCR Test application.
     */
    @Override
    protected void finalize() throws Throwable {
      // TODO Find out why finalize() is getting called when we change languages, even though
      // we're still using the object. Is bypassing nativeFinalize() OK if we still call
      // baseApi.end() in the Activity's onDestroy()?

      try {
        Log.d("TessBaseAPI.java", "finalize(): NOT calling nativeFinalize() due to premature garbage collection");
        //nativeFinalize();
      } finally {
        Log.d("TessBaseAPI.java", "finalize(): calling super.finalize()");
        super.finalize();
      }
    }

    /**
     * Initializes the Tesseract engine with a specified language model. Returns
     * <code>true</code> on success.
     * <p>
     * Instances are now mostly thread-safe and totally independent, but some
     * global parameters remain. Basically it is safe to use multiple
     * TessBaseAPIs in different threads in parallel, UNLESS you use SetVariable
     * on some of the Params in classify and textord. If you do, then the effect
     * will be to change it for all your instances.
     * <p>
     * The datapath must be the name of the parent directory of tessdata and
     * must end in / . Any name after the last / will be stripped. The language
     * is (usually) an ISO 639-3 string or <code>null</code> will default to eng.
     * It is entirely safe (and eventually will be efficient too) to call Init
     * multiple times on the same instance to change language, or just to reset
     * the classifier.
     * <p>
	 * The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
     * that multiple languages are to be loaded. Eg hin+eng will load Hindi and
     * English. Languages may specify internally that they want to be loaded
     * with one or more other languages, so the ~ sign is available to override
     * that. Eg if hin were set to load eng by default, then hin+~eng would force
     * loading only hin. The number of loaded languages is limited only by
     * memory, with the caveat that loading additional languages will impact
     * both speed and accuracy, as there is more work to do to decide on the
     * applicable language, and there is more chance of hallucinating incorrect
     * words.
	 * <p>
     * <b>WARNING:</b> On changing languages, all Tesseract parameters are reset
     * back to their default values. (Which may vary between languages.)
     * <p>
     * If you have a rare need to set a Variable that controls initialization
     * for a second call to Init you should explicitly call End() and then use
     * SetVariable before Init. This is only a very rare use case, since there
     * are very few uses that require any parameters to be set before Init.
     *
     * @param datapath the parent directory of tessdata ending in a forward
     *            slash
     * @param language (optional) an ISO 639-3 string representing the language(s)
     * @return <code>true</code> on success
     */
    public boolean init(String datapath, String language) {
        if (datapath == null)
            throw new IllegalArgumentException("Data path must not be null!");
        if (!datapath.endsWith(File.separator))
            datapath += File.separator;

        File tessdata = new File(datapath + "tessdata");
        if (!tessdata.exists() || !tessdata.isDirectory())
            throw new IllegalArgumentException("Data path must contain subfolder tessdata!");

        return nativeInit(datapath, language);
    }

    /**
     * Initializes the Tesseract engine with the specified language model(s). Returns
     * <code>true</code> on success.
     *
     * @param datapath the parent directory of tessdata ending in a forward
     *            slash
     * @param language (optional) an ISO 639-3 string representing the language(s)
     * @param mode the OCR engine mode to be set
     * @return <code>true</code> on success
     */
    public boolean init(String datapath, String language, int ocrEngineMode) {
        if (datapath == null)
            throw new IllegalArgumentException("Data path must not be null!");
        if (!datapath.endsWith(File.separator))
            datapath += File.separator;

        File tessdata = new File(datapath + "tessdata");
        if (!tessdata.exists() || !tessdata.isDirectory())
            throw new IllegalArgumentException("Data path must contain subfolder tessdata!");

        return nativeInitOem(datapath, language, ocrEngineMode);	
    }

    /**
     * Returns the languages string used in the last valid initialization.
     * If the last initialization specified "deu+hin" then that will be
     * returned. If hin loaded eng automatically as well, then that will
     * not be included in this list. To find the languages actually
     * loaded use GetLoadedLanguagesAsVector.
     * 
     * @return the last-used language code
     */
    public String getInitLanguagesAsString() {
    	return nativeGetInitLanguagesAsString();
    }
    
    /**
     * Frees up recognition results and any stored image data, without actually
     * freeing any recognition data that would be time-consuming to reload.
     * Afterwards, you must call SetImage or SetRectangle before doing any
     * Recognize or Get* operation.
     */
    public void clear() {
        nativeClear();
    }

    /**
     * Closes down tesseract and free up all memory. End() is equivalent to
     * destructing and reconstructing your TessBaseAPI.
     * <p>
     * Once End() has been used, none of the other API functions may be used
     * other than Init and anything declared above it in the class definition.
     */
    public void end() {
        nativeEnd();
    }

    /**
     * Set the value of an internal "variable" (of either old or new types).
     * Supply the name of the variable and the value as a string, just as you
     * would in a config file.
     * <p>
     * Example: <code>setVariable(VAR_TESSEDIT_CHAR_BLACKLIST, "xyz"); to ignore x, y and z. * setVariable(VAR_BLN_NUMERICMODE, "1"); to set numeric-only mode. * </code>
     * <p>
     * setVariable() may be used before open(), but settings will revert to
     * defaults on close().
     *
     * @param var name of the variable
     * @param value value to set
     * @return false if the name lookup failed
     */
    public boolean setVariable(String var, String value) {
        return nativeSetVariable(var, value);
    }

    /**
     * Sets the page segmentation mode. This controls how much processing the
     * OCR engine will perform before recognizing text.
     *
     * @param mode the page segmentation mode to set
     */
    public void setPageSegMode(int mode) {
        nativeSetPageSegMode(mode);
    }

    /**
     * Sets debug mode. This controls how much information is displayed in the
     * log during recognition.
     *
     * @param enabled <code>true</code> to enable debugging mode
     */
    public void setDebug(boolean enabled) {
        nativeSetDebug(enabled);
    }

    /**
     * Restricts recognition to a sub-rectangle of the image. Call after
     * SetImage. Each SetRectangle clears the recogntion results so multiple
     * rectangles can be recognized with the same image.
     *
     * @param rect the bounding rectangle
     */
    public void setRectangle(Rect rect) {
        setRectangle(rect.left, rect.top, rect.width(), rect.height());
    }

    /**
     * Restricts recognition to a sub-rectangle of the image. Call after
     * SetImage. Each SetRectangle clears the recogntion results so multiple
     * rectangles can be recognized with the same image.
     *
     * @param left the left bound
     * @param top the right bound
     * @param width the width of the bounding box
     * @param height the height of the bounding box
     */
    public void setRectangle(int left, int top, int width, int height) {
        nativeSetRectangle(left, top, width, height);
    }

    /**
     * Provides an image for Tesseract to recognize.
     *
     * @param file absolute path to the image file
     */
    public void setImage(File file) {
        Pix image = ReadFile.readFile(file);

        if (image == null) {
            throw new RuntimeException("Failed to read image file");
        }

        nativeSetImagePix(image.getNativePix());
    }

    /**
     * Provides an image for Tesseract to recognize. Does not copy the image
     * buffer. The source image must persist until after Recognize or
     * GetUTF8Chars is called.
     *
     * @param bmp bitmap representation of the image
     */
    public void setImage(Bitmap bmp) {
        Pix image = ReadFile.readBitmap(bmp);

        if (image == null) {
            throw new RuntimeException("Failed to read bitmap");
        }

        nativeSetImagePix(image.getNativePix());
    }

    /**
     * Provides a Leptonica pix format image for Tesseract to recognize. Clones
     * the pix object. The source image may be destroyed immediately after
     * SetImage is called, but its contents may not be modified.
     *
     * @param image Leptonica pix representation of the image
     */
    public void setImage(Pix image) {
        nativeSetImagePix(image.getNativePix());
    }

    /**
     * Provides an image for Tesseract to recognize. Copies the image buffer.
     * The source image may be destroyed immediately after SetImage is called.
     * SetImage clears all recognition results, and sets the rectangle to the
     * full image, so it may be followed immediately by a GetUTF8Text, and it
     * will automatically perform recognition.
     *
     * @param imagedata byte representation of the image
     * @param width image width
     * @param height image height
     * @param bpp bytes per pixel
     * @param bpl bytes per line
     */
    public void setImage(byte[] imagedata, int width, int height, int bpp, int bpl) {
        nativeSetImageBytes(imagedata, width, height, bpp, bpl);
    }

    /**
     * The recognized text is returned as a String which is coded as UTF8.
     *
     * @return the recognized text
     */
    public String getUTF8Text() {
        // Trim because the text will have extra line breaks at the end
        String text = nativeGetUTF8Text();

        return text.trim();
    }

    /**
     * Returns the mean confidence of text recognition.
     *
     * @return the mean confidence
     */
    public int meanConfidence() {
        return nativeMeanConfidence();
    }

    /**
     * Returns all word confidences (between 0 and 100) in an array. The number
     * of confidences should correspond to the number of space-delimited words
     * in GetUTF8Text().
     *
     * @return an array of word confidences (between 0 and 100) for each
     *         space-delimited word returned by GetUTF8Text()
     */
    public int[] wordConfidences() {
        int[] conf = nativeWordConfidences();

        // We shouldn't return null confidences
        if (conf == null)
            conf = new int[0];

        return conf;
    }

    /**
     * Returns the result of page layout analysis as a Pixa, in reading order.
     * 
     * @return Pixa contaning page layout bounding boxes
     */
    public Pixa getRegions() {
        return new Pixa(nativeGetRegions(), 0, 0);
    }
    
    /**
     * Returns the textlines as a Pixa.
     * 
     * Block IDs are not returned.
     * 
     * @return Pixa containing textlines
     */
    public Pixa getTextlines() {
	return new Pixa(nativeGetTextlines(), 0, 0);
    }
    
    /**
     * Returns the strips as a Pixa.
     * 
     * Block IDs are not returned.
     * 
     * @return Pixa containing strips
     */
    public Pixa getStrips() {
	return new Pixa(nativeGetStrips(), 0, 0);
    }    
    
    /**
     * Returns the word bounding boxes as a Pixa, in reading order.
     * 
     * @return Pixa containing word bounding boxes 
     */
    public Pixa getWords() {
        return new Pixa(nativeGetWords(), 0, 0);
    }

    public ResultIterator getResultIterator() {
        int nativeResultIterator = nativeGetResultIterator();

        if (nativeResultIterator == 0) {
            return null;
        }

        return new ResultIterator(nativeResultIterator);
    }
    
    /**
     * Make a HTML-formatted string with hOCR markup from the internal data
     * structures.  
     * 
     * @param page is 0-based but will appear in the output as 1-based. 
     * @return HTML-formatted string with hOCR markup
     */
    public String getHOCRText(int page){
        return nativeGetHOCRText(page);
    }
    
    /**
     * Set the name of the input file. Needed only for training and
     * reading a UNLV zone file.
     * 
     * @param name input file name
     */
    public void setInputName(String name){
        nativeSetInputName(name);
    } 
    
    /**
     * Set the name of the output files. 
     * Needed only for debugging. 
     * @param name output file name
     */
    public void setOutputName(String name){
        nativeSetOutputName(name);
    } 
    
    /**
     * Read a "config" file containing a set of variable, value pairs.
     * Searches the standard places: <i>tessdata/configs, tessdata/tessconfigs</i>.
     * 
     * @param filename the configuration filename, without path
     */
    public void ReadConfigFile(String filename){
        nativeReadConfigFile(filename);
    }
    
    /**
     * The recognized text is returned as coded in the same format as a UTF8 
     * box file used in training.
     * 
     * @param page is a 0-based page index that will appear in the box file.
     */
    public String getBoxText(int page){
        return nativeGetBoxText(page);
    }

    // ******************
    // * Native methods *
    // ******************

    /**
     * Initializes static native data. Must be called on object load.
     */
    private static native void nativeClassInit();

    /**
     * Initializes native data. Must be called on object construction.
     */
    private native void nativeConstruct();

    /**
     * Finalizes native data. Must be called on object destruction.
     */
    private native void nativeFinalize();

    private native boolean nativeInit(String datapath, String language);
    
    private native boolean nativeInitOem(String datapath, String language, int mode);

    private native String nativeGetInitLanguagesAsString();
    
    private native void nativeClear();

    private native void nativeEnd();

    private native void nativeSetImageBytes(
            byte[] imagedata, int width, int height, int bpp, int bpl);

    private native void nativeSetImagePix(int nativePix);

    private native void nativeSetRectangle(int left, int top, int width, int height);

    private native String nativeGetUTF8Text();

    private native int nativeMeanConfidence();

    private native int[] nativeWordConfidences();

    private native boolean nativeSetVariable(String var, String value);

    private native void nativeSetDebug(boolean debug);

    private native void nativeSetPageSegMode(int mode);
    
    private native int nativeGetRegions();

    private native int nativeGetTextlines();

    private native int nativeGetStrips();

    private native int nativeGetWords();

    private native int nativeGetResultIterator();
    
    private native String nativeGetBoxText(int page_number);
    
    private native String nativeGetHOCRText(int page_number);
    
    private native void nativeSetInputName(String name);
    
    private native void nativeSetOutputName(String name);
    
    private native void nativeReadConfigFile(String fileName);
}