/*
* Copyright (C) 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.googlecode.tesseract.android;
import com.googlecode.tesseract.android.TessBaseAPI.PageIteratorLevel;
public class PageIterator {
static {
System.loadLibrary("lept");
System.loadLibrary("tess");
}
/** Pointer to native page iterator. */
private final int mNativePageIterator;
/* package */PageIterator(int nativePageIterator) {
mNativePageIterator = nativePageIterator;
}
/**
* Resets the iterator to point to the start of the page.
*/
public void begin() {
nativeBegin(mNativePageIterator);
}
/**
* Moves to the start of the next object at the given level in the page
* hierarchy, and returns false if the end of the page was reached.
* <p>
* NOTE that {@link PageIteratorLevel#RIL_SYMBOL} will skip non-text blocks,
* but all other {@link PageIteratorLevel} level values will visit each
* non-text block once. Think of non text blocks as containing a single
* para, with a single line, with a single imaginary word.
* <p>
* Calls to {@link #next} with different levels may be freely intermixed.
* <p>
* This function iterates words in right-to-left scripts correctly, if the
* appropriate language has been loaded into Tesseract.
*
* @param level the page iterator level. See {@link PageIteratorLevel}.
* @return {@code false} if the end of the page was reached, {@code true}
* otherwise.
*/
public boolean next(int level) {
return nativeNext(mNativePageIterator, level);
}
/**
* Get bounding box: x, y, w, h
*
* ============= Accessing data ==============.
* Coordinate system:
* Integer coordinates are at the cracks between the pixels.
* The top-left corner of the top-left pixel in the image is at (0,0).
* The bottom-right corner of the bottom-right pixel in the image is at
* (width, height).
* Every bounding box goes from the top-left of the top-left contained
* pixel to the bottom-right of the bottom-right contained pixel, so
* the bounding box of the single top-left pixel in the image is:
* (0,0)->(1,1).
* If an image rectangle has been set in the API, then returned coordinates
* relate to the original (full) image, rather than the rectangle.
*
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* The returned bounding box may clip foreground pixels from a grey image.
*
* @param level the page iterator level. See {@link PageIteratorLevel}.
* @return the bounding rectangle of the current object at the given level
*/
public int[] getBoundingBox(int level){
return nativeBoundingBox(mNativePageIterator, level);
}
private static native void nativeBegin(int nativeIterator);
private static native boolean nativeNext(int nativeIterator, int level);
private static native int[] nativeBoundingBox(int nativeIterator, int level);
}