/*
* Copyright (C) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.googlecode.leptonica.android;
/**
* Image binarization methods.
*
* @author alanv@google.com (Alan Viverette)
*/
public class Binarize {
static {
System.loadLibrary("lept");
}
// Otsu thresholding constants
/** Desired tile X dimension; actual size may vary */
public final static int OTSU_SIZE_X = 32;
/** Desired tile Y dimension; actual size may vary */
public final static int OTSU_SIZE_Y = 32;
/** Desired X smoothing value */
public final static int OTSU_SMOOTH_X = 2;
/** Desired Y smoothing value */
public final static int OTSU_SMOOTH_Y = 2;
/** Fraction of the max Otsu score, typically 0.1 */
public final static float OTSU_SCORE_FRACTION = 0.1f;
/**
* Performs locally-adaptive Otsu threshold binarization with default
* parameters.
*
* @param pixs An 8 bpp PIX source image.
* @return A 1 bpp thresholded PIX image.
*/
public static Pix otsuAdaptiveThreshold(Pix pixs) {
return otsuAdaptiveThreshold(
pixs, OTSU_SIZE_X, OTSU_SIZE_Y, OTSU_SMOOTH_X, OTSU_SMOOTH_Y, OTSU_SCORE_FRACTION);
}
/**
* Performs locally-adaptive Otsu threshold binarization.
* <p>
* Notes:
* <ol>
* <li>The Otsu method finds a single global threshold for an image. This
* function allows a locally adapted threshold to be found for each tile
* into which the image is broken up.
* <li>The array of threshold values, one for each tile, constitutes a
* highly downscaled image. This array is optionally smoothed using a
* convolution. The full width and height of the convolution kernel are (2 *
* smoothX + 1) and (2 * smoothY + 1).
* <li>The minimum tile dimension allowed is 16. If such small tiles are
* used, it is recommended to use smoothing, because without smoothing, each
* small tile determines the splitting threshold independently. A tile that
* is entirely in the image bg will then hallucinate fg, resulting in a very
* noisy binarization. The smoothing should be large enough that no tile is
* only influenced by one type (fg or bg) of pixels, because it will force a
* split of its pixels.
* <li>To get a single global threshold for the entire image, use input
* values of sizeX and sizeY that are larger than the image. For this
* situation, the smoothing parameters are ignored.
* <li>The threshold values partition the image pixels into two classes: one
* whose values are less than the threshold and another whose values are
* greater than or equal to the threshold. This is the same use of
* 'threshold' as in pixThresholdToBinary().
* <li>The scorefract is the fraction of the maximum Otsu score, which is
* used to determine the range over which the histogram minimum is searched.
* See numaSplitDistribution() for details on the underlying method of
* choosing a threshold.
* <li>This uses enables a modified version of the Otsu criterion for
* splitting the distribution of pixels in each tile into a fg and bg part.
* The modification consists of searching for a minimum in the histogram
* over a range of pixel values where the Otsu score is within a defined
* fraction, scoreFraction, of the max score. To get the original Otsu
* algorithm, set scoreFraction == 0.
* </ol>
*
* @param pixs An 8 bpp PIX source image.
* @param sizeX Desired tile X dimension; actual size may vary.
* @param sizeY Desired tile Y dimension; actual size may vary.
* @param smoothX Half-width of convolution kernel applied to threshold
* array: use 0 for no smoothing.
* @param smoothY Half-height of convolution kernel applied to threshold
* array: use 0 for no smoothing.
* @param scoreFraction Fraction of the max Otsu score; typ. 0.1 (use 0.0
* for standard Otsu).
* @return A 1 bpp thresholded PIX image.
*/
public static Pix otsuAdaptiveThreshold(
Pix pixs, int sizeX, int sizeY, int smoothX, int smoothY, float scoreFraction) {
if (pixs == null)
throw new IllegalArgumentException("Source pix must be non-null");
if (pixs.getDepth() != 8)
throw new IllegalArgumentException("Source pix depth must be 8bpp");
int nativePix = nativeOtsuAdaptiveThreshold(
pixs.mNativePix, sizeX, sizeY, smoothX, smoothY, scoreFraction);
if (nativePix == 0)
throw new RuntimeException("Failed to perform Otsu adaptive threshold on image");
return new Pix(nativePix);
}
/**
* Performs Sauvola binarization.
* <p>
* Notes:
* <ol>
* <li> The window width and height are 2 * whsize + 1. The minimum
* value for whsize is 2; typically it is >= 7..
* <li> For nx == ny == 1, this defaults to pixSauvolaBinarize().
* <li> Why a tiled version?
* (a) Because the mean value accumulator is a uint32, overflow
* can occur for an image with more than 16M pixels.
* (b) The mean value accumulator array for 16M pixels is 64 MB.
* The mean square accumulator array for 16M pixels is 128 MB.
* Using tiles reduces the size of these arrays.
* (c) Each tile can be processed independently, in parallel,
* on a multicore processor.
* <li> The Sauvola threshold is determined from the formula:
* t = m * (1 - k * (1 - s / 128))
* where:
* t = local threshold
* m = local mean
* k = @factor (>= 0) [ typ. 0.35 ]
* s = local standard deviation, which is maximized at
* 127.5 when half the samples are 0 and half are 255.
* <li> The basic idea of Niblack and Sauvola binarization is that
* the local threshold should be less than the median value, and the larger
* the variance, the closer to the median it should be chosen. Typical
* values for k are between 0.2 and 0.5.
* </ol>
*
* @param pixs An 8 bpp PIX source image.
* @param whsize Window half-width for measuring local statistics
* @param factor Factor for reducing threshold due to variance; >= 0
* @param nx Subdivision into tiles; >= 1
* @param ny Subdivision into tiles; >= 1
* @return A 1 bpp thresholded PIX image.r
*/
public static Pix sauvolaBinarizeTiled(Pix pixs, int whsize, float factor, int nx, int ny) {
if (pixs == null)
throw new IllegalArgumentException("Source pix must be non-null");
if (pixs.getDepth() != 8)
throw new IllegalArgumentException("Source pix depth must be 8bpp");
int nativePix = nativeSauvolaBinarizeTiled(pixs.mNativePix, whsize, factor, nx, ny);
if (nativePix == 0)
throw new RuntimeException("Failed to perform Otsu adaptive threshold on image");
return new Pix(nativePix);
}
// ***************
// * NATIVE CODE *
// ***************
private static native int nativeOtsuAdaptiveThreshold(
int nativePix, int sizeX, int sizeY, int smoothX, int smoothY, float scoreFract);
private static native int nativeSauvolaBinarizeTiled(
int nativePix, int whsize, float factor, int nx, int ny);
}