OCR.java example

Explorer

curso-avanzado-android-master
- AR_dni
  - gen
    - com
      - moob
        BuildConfig.java
  - src
    - com
      - itwizard
        mezzofanti
        OCR.java
      - moob
        AR_dniActivity.java
        Preview.java
- AlarmExample
  - gen
    - com
      - tid
        examples
        alarmservice
        BuildConfig.java
  - src
    - com
      - tid
        examples
        alarmservice
        AlarmExampleActivity.java
        NotifReceiver.java
        ToastReceiver.java
- BluetoothExamples
  - src
    - com
      - tid
        examples
        bluetooth
        BluetoothActivity.java
- Ejemplo112_ sharedpref
  - src
    - com
      - tid
        Ejemplo112_sharedpref
        Ejemplo112_sharedpref.java
- FragmentDemo
  - src
    - com
      - demo
        AsyncImageLoader.java
        Commons.java
        DynamicFragmentActivity.java
        FileDownloader.java
        MainActivity.java
        MultiDexClassLoader.java
        TestFragment.java
- HoneycombFragments Open
  - src
    - com
      - mamlambo
        tutorial
        fragments
        SampleListActivity.java
        SampleListFragment.java
        SampleViewerActivity.java
        SampleViewerFragment.java
- NotePad
  - gen
    - com
      - example
        android
        notepad
        BuildConfig.java
  - src
    - com
      - example
        android
        notepad
        NoteEditor.java
        NotePad.java
        NotePadProvider.java
        NotesList.java
        NotesLiveFolder.java
        TitleEditor.java
  - tests
    - src
      - com
        example
        android
        notepad
        NotePadTest.java
- NotePadTest
  - gen
    - com
      - jayway
        test
        BuildConfig.java
  - src
    - com
      - jayway
        test
        NotePadTest.java
- OCRTest
  - gen
    - com
      - moob
        ocrdnitest
        BuildConfig.java
  - src
    - com
      - moob
        ocrdnitest
        BeepManager.java
        CaptureActivity.java
        CaptureActivityHandler.java
        DecodeHandler.java
        DecodeThread.java
        FinishListener.java
        LuminanceSource.java
        OcrCharacterHelper.java
        OcrInitAsyncTask.java
        OcrRecognizeAsyncTask.java
        OcrResult.java
        OcrResultFailure.java
        OcrResultText.java
        PlanarYUVLuminanceSource.java
        PreferencesActivity.java
        ViewfinderView.java
    - edu
      - sfsu
        cs
        orange
        ocr
        camera
        AutoFocusManager.java
        CameraConfigurationManager.java
        CameraManager.java
        PreviewCallback.java
        language
        LanguageCodeHelper.java
- ObserverExample
  - src
    - com
      - tid
        examples
        observer
        FakeWebService.java
        MediaObserver.java
        ObserverActivity.java
- animaciones_xml
  - src
    - com
      - tid
        animacionxml
        Animaciones_xml.java
- customizados_textview
  - src
    - com
      - tid
        custom_element
        CustomTextView.java
        Customizados_textview.java
- ejemplo101_parcelables
  - src
    - com
      - tid
        Ejemplo101_parcelables
        Ejemplo101_parcelables.java
        RecibeExtra.java
        model
        DTOParcel.java
        DTOSerialz.java
- ejemplo102_barcode
  - src
    - com
      - tid
        Ejemplo102_barcode
        Ejemplo102_barcode.java
- ejemplo10_setextra
  - src
    - com
      - tid
        Ejemplo10_setextra
        Ejemplo10_setextra.java
        RecibeExtra.java
- ejemplo113_ficherosRES
  - src
    - com
      - tid
        Ejemplo113_ficherosRES
        Ejemplo113_ficherosRES.java
- ejemplo113_ficherosSd
  - src
    - com
      - tid
        Ejemplo113_ficherosSd
        Ejemplo113_ficherosSd.java
- ejemplo11_alm_bbdd
  - src
    - com
      - tid
        Ejemplo11_alm_bbdd
        DatabaseHelper.java
        Ejemplo11_alm_bbdd.java
- ejemplo121_notificaciontoast
  - src
    - com
      - tid
        Ejemplo121_notificaciontoast
        Ejemplo121_notificaciontoast.java
        activity2.java
- ejemplo12_listAdapter
  - src
    - com
      - tid
        Ejemplo12_listAdapter
        Ejemplo12_listAdapter.java
        MyListAdapter.java
- ejemplo12_notificaciones
  - src
    - com
      - tid
        Ejemplo12_notificaciones
        Ejemplo12_notificaciones.java
        activityNotificacion.java
- ejemplo130_ndk
  - src
    - com
      - tid
        ejemplondk
        Ejemplo130_ndkActivity.java
        NdkLib.java
- ejemplo1_1_hellondk
  - src
    - com
      - tid
        Ejemplo11Ndk
        EjemploDendk.java
- ejemplo1_helloworld
  - src
    - com
      - tid
        helloWorld
        helloWord.java
- ejemplo2_livecycle
  - src
    - com
      - tid
        livecycle
        LiveCycle2.java
        livecycle1.java
- ejemplo3_servicio
  - src
    - com
      - tid
        servicio
        ServiceExample.java
        ServicioManager.java
- ejemplo4_clienteExterno
  - gen
    - com
      - tid
        servicioclient
        service
        IServiceExample2.java
  - src
    - com
      - tid
        ejemplo41
        ejemplo41_clienteExterno.java
- ejemplo4_servicio_client
  - gen
    - com
      - tid
        servicioclient
        service
        IServiceExample2.java
  - src
    - com
      - tid
        servicioclient
        service
        ServiceExample2.java
        ThreadExecuted.java
        servicioclient.java
- ejemplo51_broadcastWakeup
  - src
    - com
      - tid
        broadcastWakeup
        BroadcastWakeup.java
        ejemplo5_broadcastWakeup.java
- ejemplo52_broadcastEmiter
  - src
    - com
      - tid
        ejemplo52_broadcastEmiter
        ejemplo52_broadcastEmiter.java
- ejemplo5_broadcastReciver
  - src
    - com
      - tid
        ejemplo5BroadcastRec
        RecibidorPrincipal.java
        ejemplo5Broadcast.java
- ejemplo61_customContentProv
  - gen
    - com
      - tid
        ejemplo61_customContentProv
        Manifest.java
  - src
    - com
      - tid
        ejemplo61_customContentProv
        CustomContentProvider.java
        ejemplo61_customContentProv.java
- ejemplo62_externalContent
  - src
    - com
      - tid
        ejemplo62_externalContent
        ejemplo62_externalContent.java
- ejemplo6_contentProvider
  - src
    - com
      - tid
        ejemplo6_contentProvider
        ejemplo6_contentProvider.java
- ejemplo71_multipantalla
  - src
    - com
      - tid
        Ejemplo71_multipantalla
        Ejemplo71_multipantalla.java
- ejemplo72_multifuente_xml
  - src
    - com
      - tid
        Ejemplo7_multifuente_xmlActivity
        Ejemplo7_multifuente_xmlActivity.java
- ejemplo73_manifest
  - src
    - com
      - tid
        Ejemplo73_manifest
        Activity_imagen.java
        Ejemplo73_manifestActivity.java
- ejemplo7_ViewsYXML
  - src
    - com
      - tid
        ejemplo7_ViewsYXML
        ejemplo7_ViewsYXML.java
- ejemplo81_widgetBotones
  - src
    - com
      - tid
        Ejemplo81_widgetBotones
        Ejemplo81_widgetBotones.java
        MyWidgetProvider.java
        ViewService.java
- ejemplo8_widget
  - src
    - com
      - tid
        Ejemplo8_widget
        Widget8.java
- ejemplo91_threadHandler
  - src
    - com
      - tid
        Ejemplo91_threadHandler
        Ejemplo91_threadHandler.java
        ThreadLoop.java
- ejemplo92_asynctask
  - src
    - com
      - tid
        Ejemplo92_asynctask
        Ejemplo92_asynctask.java
- ejemplo9_threads
  - src
    - com
      - tid
        Ejemplo9_threads
        Ejemplo9_threads.java
- ejemploAnalisis
  - gen
    - com
      - tid
        analisis
        BuildConfig.java
  - src
    - com
      - tid
        analisis
        EjemploAnalisisActivity.java
- ejemploRobotium
  - gen
    - com
      - tid
        robotium
        BuildConfig.java
  - src
    - com
      - tid
        robotium
        EjemploRobotiumActivity.java
- ejemploRobotium-test
  - gen
    - com
      - tid
        robotium
        test
        BuildConfig.java
  - src
    - com
      - tid
        robotium
        test
        EjemploTesting.java
- ejemplo_opengl_java
  - gen
    - com
      - tid
        opengljava
        BuildConfig.java
  - src
    - com
      - tid
        opengljava
        Ejemplo_opengl_java.java
        GLUT.java
        OpenGLUtils.java
- ejemplo_systemservices
  - src
    - com
      - tid
        systemservices
        Ejemplo_systemservices.java
- ejemplo_testingui
  - gen
    - com
      - tid
        testingui
        BuildConfig.java
  - src
    - com
      - tid
        testingui
        Ejemplo_testingui.java
- estilos_android
  - src
    - com
      - tid
        estilos
        Estilos_android.java
- fragment_ejemplo
  - src
    - com
      - tid
        Fragment_ejemplo
        DetailsActivity.java
        Fragment_ejemplo.java
- javaexamples
  - src
    - example.java
- seminario_1
  - src
    - com
      - aurigae
        seminario1
        SeminarioAlt.java
        seminario1.java
- sensores_ejemplo
  - src
    - com
      - tid
        Sensores_ejemplo.java
- videoPlayer
  - gen
    - com
      - moob
        video
        BuildConfig.java
  - src
    - com
      - moob
        video
        VideoPlayerActivity.java
- widget82_widgetBoton
  - src
    - com
      - tid
        widget82_widgetboton
        WIget82_widgetBoton.java
        WidgetBoton1.java

/*
 * Copyright (C) 2009 IT Wizard.
 * http://www.itwizard.ro
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.itwizard.mezzofanti;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;

import android.content.SharedPreferences;
import android.graphics.Bitmap;
import android.graphics.Typeface;
import android.text.SpannableString;
import android.text.Spanned;
import android.text.style.StyleSpan;
import android.util.Log;

/**
 * The wrapper for the JNI-OCR library
 */
public final class OCR 
{
	public static OCRConfig mConfig = new OCRConfig();	
	public static final String DATA_PATH = "/sdcard/tessdata/";
	private static OCR m_OCRinstance = null;		// there is only one ocr instance, publicly access it with get()
	private static final String TAG = "TAG";

	// Java-lib variables
	public final static int PSM_AUTO = 0;           // Fully automatic page segmentation. (Default.)
	public final static int PSM_SINGLE_COLUMN = 1;  // Assume a single column of text of variable sizes.
	public final static int PSM_SINGLE_BLOCK = 2;   // Assume a single uniform block of text.
	public final static int PSM_SINGLE_LINE = 3;    // Treat the image as a single text line.
	public final static int PSM_SINGLE_WORD = 4;    // Treat the image as a single word.
	public final static int PSM_SINGLE_CHAR = 5;    // Treat the image as a single character.
	public final static int PSM_COUNT = 6;          // Number of enum entries.

	public static Word[] m_asWords = null;				// vector to keep all info about each word in the results
	public static SpannableString m_ssOCRResult = null;	// the parsed result (all errors are changed to '#', all non-dictionary words become 'italic')
	public static int m_iMeanConfidence = 0;			// the mean overall confidence

	private boolean m_bLineMode = false;
	public boolean m_bIsLibraryInitialized = false;	// is the library initialized

	/**
	 * Class to keep info about each OCRed word.
	 */
	public final class Word
	{
		String m_sBody;			// the word body
		int m_iConfidence;		// the ocr confidence for the word
		boolean m_bIsValidWord;	// is a valid dictionary word
		boolean m_bNewLine;		// hold an endline after it
		int m_iEnd;				// the last position in the overall result

		/**
		 * constructor
		 */
		public Word()
		{
			m_sBody = "";
			m_iConfidence = 0;
			m_bIsValidWord = false;
			m_bNewLine = false;
			m_iEnd = 0;
		}

		/**
		 * 2nd constructor
		 * @param body the word-body
		 * @param conf the confidence
		 */
		public Word(String body, int conf)
		{		
			if (!mConfig.m_bReplaceUnknownChars)
				m_sBody = body;
			else
			{
				m_sBody = body.replaceAll("[\n\r]+", "");				
				m_sBody = m_sBody.replaceAll("[^A-Za-z0-9!?.,%$@&*(){}<>:;'\"-]+", "#");
			}
			m_iConfidence = conf;

			if (m_sBody.contains("#"))
				m_bIsValidWord = false;
			else m_bIsValidWord = MyIsValidDictionaryWord(body);

			m_bNewLine = body.contains("\n");
			Log.v(TAG, "Word("+m_sBody+","+conf+")");
		}		
	} // end class Word

	// -------------------------------------------------------------------

	/**
	 * stores the OCR configuration, load/store the config
	 */
	public final static class OCRConfig
	{
		public int m_iMinOveralConfidence;		// the overall confidence for all decoded text
		public int m_iMinWordConfidence;		// the confidence per each word
		public int m_iPSMMode = PSM_AUTO; 		// the OCR scanning mode
		public boolean m_bUseBWFilter;			// enable or not the BW filter
		public boolean m_bShowValidWordsOnly;	// if enabled, we will display only the dictionary-valid words after OCR
		public boolean m_bReplaceUnknownChars;	// replace strange characters with '#'

		private byte m_bImgDivisor = 2;			// the image divisor factor
		private String m_sLanguage = "eng";		// current language
		public String m_asLanguages[] = null; 	// all available languages

		/**
		 * constructor
		 */
		public OCRConfig()
		{
			LoadFabricDefaults();			
		}

		/**
		 * load fabric default values for the config
		 */
		public void LoadFabricDefaults()
		{
			m_iMinOveralConfidence = 60;
			m_iMinWordConfidence = 50;
			m_iPSMMode = OCR.PSM_AUTO;
			SetImgDivisor(2);
			m_bUseBWFilter = false;
			m_bShowValidWordsOnly = false;
			m_bReplaceUnknownChars = true;
			m_sLanguage = "eng";
			Log.v(TAG, "LoadFabricDefaults(): " + m_sLanguage);
		}

		/**
		 * get setings from the shared preferences
		 * @param prefs the shared preferences
		 * @return success/fail
		 */
		public boolean GetSettings(SharedPreferences prefs)
		{
			Log.v(TAG, "GetSettings() -----------------------------------------------");

			try
			{
				OCR.mConfig.m_iMinOveralConfidence = 60;//Integer.parseInt(prefs.getString(PreferencesActivity.KEY_MIN_OVERALL_CONFIDENCE, "60"));
				OCR.mConfig.m_iMinWordConfidence = 50;//Integer.parseInt(prefs.getString(PreferencesActivity.KEY_MIN_WORD_CONFIDENCE, "50"));
				OCR.mConfig.SetImgDivisor(2/*Integer.parseInt(prefs.getString(PreferencesActivity.KEY_SPEED_QUALITY, "2"))*/);
				OCR.mConfig.m_bUseBWFilter = false;//prefs.getBoolean(PreferencesActivity.KEY_USE_IMAGE_LIGHT_FILTER, false);
				OCR.mConfig.m_sLanguage = "eng";//prefs.getString(PreferencesActivity.KEY_SET_OCR_LANGUAGE, "eng");
			}
			catch (Exception ex)
			{
				Log.v(TAG, "exception: GetSettings()" + ex.toString());
				return false;
			}
			
			Log.v(TAG, "GetSettings: [" + 
					OCR.mConfig.m_iMinOveralConfidence +","+ 
					OCR.mConfig.m_iMinWordConfidence +","+ 
					OCR.mConfig.GetImgDivisor() +","+
					OCR.mConfig.m_bUseBWFilter +","+
					OCR.mConfig.m_sLanguage  +"]"
					);
			
			return true;
		}

		/**
		 * set the image divisor for the OCR
		 * @param d 2/4
		 */
		public void SetImgDivisor(int d)
		{
			if (d!=2 && d!=4)
				return;
			m_bImgDivisor = (byte)d;		
		}

		/**
		 * @return the image divisor
		 */
		public byte GetImgDivisor()
		{
			return m_bImgDivisor;		
		}

		/**
		 * @return OCR dictionary language
		 */
		public String GetLanguage()
		{
			Log.v(TAG, "GetLanguage(): " + m_sLanguage);
			return m_sLanguage;
		}

		/**
		 * @return a string containing all available OCR dictionaries
		 */
		public String GetLanguages()
		{
			String ret = "";
			for (int i=0; i<m_asLanguages.length; i++)
			{
				ret += m_asLanguages[i];
				if (i!=m_asLanguages.length-1)
					ret += ",";
			}
			return ret;
		}

		/**
		 * @return a vector of strings, each string is a valid OCR-dictionary language
		 */
		public String[] GetvLanguages()
		{
			if (m_asLanguages == null)
				return null;

			String ret[] = new String[m_asLanguages.length];
			for (int i=0; i<m_asLanguages.length; i++)
				ret[i] = LanguageMore(m_asLanguages[i]);
			return ret;
		}

		/**
		 * Check if a language is installed.
		 * @param lang the language to check for
		 * @return if language is installed or not
		 */
		public boolean IsLanguageInstalled(String lang)
		{
			if (m_asLanguages == null)
				return false;

			for (int i=0; i<m_asLanguages.length; i++)
			{
				if (m_asLanguages[i].compareTo(lang) == 0)
					return true;
				if (LanguageMore(m_asLanguages[i]).compareTo(lang) == 0)
					return true;				
			}
			return false;
		}

		/**
		 * @param lang the language in short format
		 * @return the language in long-string format
		 */
		public String LanguageMore(String lang)

		{
			if ( lang.compareTo("eng") == 0 )
				return "English";			
			if ( lang.compareTo("spa") == 0 )
				return "Spanish";			
			if ( lang.compareTo("fra") == 0 )
				return "French";			
			if ( lang.compareTo("ita") == 0 )
				return "Italian";			
			if ( lang.compareTo("deu") == 0 )
				return "German";			
			return "error";
		}

		/** 
		 * @return the current OCR language in long-string format
		 */
		public String GetLanguageMore()
		{
			return LanguageMore(m_sLanguage);
		}

	} // end class OCRConfig




	// -------------------------------------------------------------------
	/**
	 * constructor
	 */
	private OCR()
	{
		String text = libVer();

		classInitNative();
		initializeNativeDataNative();

		mConfig.m_asLanguages = getLanguagesNative(); 	// this should be called at the beginning, 
		// such a way nobody calls without a valid language
		// if not called, the recognizeNative will not work
		Log.v(TAG, "OCR Initialize() done - libver "+ text + " no-langs-installed=" + mConfig.m_asLanguages.length);
	}

	public static void Initialize()
	{
		if (m_OCRinstance == null) 
		{
			m_OCRinstance = new OCR();
		}		
	}

	/**
	 * @return the ocr instance
	 */
	public static OCR get()
	{
		return m_OCRinstance;
	}

	/**
	 * cleanup function
	 */
	public void Destructor()  
	{
		Log.v(TAG, "OCR - finalize");
		if (m_bIsLibraryInitialized)
			OCRClean();
		m_bIsLibraryInitialized = false;
	}

	/**
	 * read the available OCR languages in the local vector
	 */
	public static void ReadAvailableLanguages()
	{
		mConfig.m_asLanguages = getLanguagesNative();
	}

	/**
	 * Set the OCR-dictionary language
	 * @param lang the language to be set
	 * @return true if language exists and was installed properly
	 */
	public boolean SetLanguage(String lang)
	{
		Log.v(TAG, "setLanguage to "+lang);
		if (mConfig.m_asLanguages == null)
			return false;

		Log.v(TAG, "noLangs=" + mConfig.m_asLanguages.length);

		for (int i=0; i<mConfig.m_asLanguages.length; i++)
			if (mConfig.m_asLanguages[i].compareTo(lang) == 0)
			{
				mConfig.m_sLanguage = lang;
				openNative(lang);
				Log.v(TAG, "setLang succeded");
				return true;
			}

		Log.v(TAG, "setLang failed");
		return false;
	}


	/**
	 * clear the API and the buffers used by the last OCR execution
	 */
	public void OCRClean()
	{
		clearResultsNative();		// api clear
		releaseImageNative();		// free the image buffers

		//cleanupNativeDataNative(); 	// free internal buffers (test for crash if image not allocated)
		//closeNative(); 				// end api - called by the destructor itself
		closeDebug();				// clean close debug	
	}

	/**
	 * Local store of mean confidence
	 */
	public void SaveMeanConfidence() 
	{
		m_iMeanConfidence = meanConfidenceNative();		
	}

	/**
	 * Add a user word to the OCR-dictionary.
	 * @param newword
	 * @return
	 */
	public boolean AddUserWord(String newword)
	{
		if (!isValidWord(newword))
		{
			BufferedWriter writer;
			try 
			{
				writer = new BufferedWriter(new FileWriter(DATA_PATH + mConfig.GetLanguage() +".user-words",true));
				writer.write(newword+"\n");
				writer.close();
				closeNative();
				openNative(mConfig.GetLanguage());

				// change the validity in the local structure
				for (int i=0; i<m_asWords.length; i++)
				{
					if (m_asWords[i].m_sBody.compareTo(newword) == 0)
						m_asWords[i].m_bIsValidWord = true;
				}

				return true;

			} catch (Exception e) {
				Log.v(TAG, e.toString());
				return false;
			}
		}

		return false;
	}

	/**
	 * Get the internal picture that was used during the OCR. The OCR engine uses a filtered image, and not the original.
	 * Using this function, one can see why there were problems during OCR.
	 * 
	 * @param pic_width the picture width
	 * @param pic_height the picture height
	 * @return the internal image as a bitmap
	 */
	public Bitmap GetInternImage(int pic_width, int pic_height)
	{	
		// from disk file
		Log.v(TAG, "getInternImage w=" + pic_width + " h=" + pic_height);
		byte[] tif = null;
		try
		{
			tif = GetBytesFromFile(new File("out/" + "tessinput.tif"), pic_width, pic_height, 1);
		}
		catch (Exception ex)
		{
			Log.v(TAG, "exception: " + ex.toString());
		}

		if (tif == null)
		{
			Log.v(TAG, "warning: tif is null");
			return null;
		}

		Bitmap bmp2 = null;
		try
		{
			bmp2 = Bitmap.createBitmap(pic_width, pic_height, Bitmap.Config.RGB_565);
		}
		catch (Throwable th)
		{
			Log.v(TAG, "throwable: " + th.toString());
			return null;
		}

		int [] pixbuf = new int[8];
		for (int h=0; h<pic_height; h++)
			for (int w=0; w<pic_width; w++)
			{
				if (w%8 == 0)
				{
					// read 8b -> 8pixels
					int pix = tif[258 + (h*pic_width + w)/8];
					// store them in a buffer
					for (int i=0; i<8; i++)
					{
						pix = pix >> i;
					pixbuf[7-i] = pix & 0x00000001;	
					if (pixbuf[7-i] != 0)
						pixbuf[7-i] = 0x00ffffff;
					}
				}
				bmp2.setPixel(w, h, pixbuf[w%8]);				
			}

		return bmp2;

	}








	/**
	 *  from unsigned byte to int value
	 * @param b value to be converted
	 * @return the according int value
	 */
	public int unsignedByte2Int(byte b) 
	{
		return (int) b & 0xFF;
	}

	/**
	 *  read bytes from file into a byte buffer
	 * @param file the image-file to be read
	 * @param width the image width
	 * @param height the image height
	 * @param bpp bytes per pixel
	 * @return a vector with the image, no compression
	 * @throws IOException
	 */
	public byte[] GetBytesFromFile(File file, int width, int height, int bpp) throws IOException 
	{
		InputStream is = new FileInputStream(file);

		// Get the size of the file
		long length = file.length();        
		if (length < width*height*bpp)
		{
			// file is too small
			Log.v(TAG, "warning: file len is too small "+length);
			//return null; //bpp in the tif case is fractional 1b per pixel, meaning 1/8B per pixel, so this warning is wrong
		}


		// You cannot create an array using a long type.
		// It needs to be an int type.
		// Before converting to an int type, check
		// to ensure that file is not larger than Integer.MAX_VALUE.
		if (length > Integer.MAX_VALUE) {
			// File is too large
			Log.v(TAG, "warning: length more than max allowed value");
			return null;
		}

		// Create the byte array to hold the data
		byte[] bytes = new byte[(int)length];

		// Read in the bytes
		int offset = 0;
		int numRead = 0;
		while (offset < bytes.length
				&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
			offset += numRead;
		}

		// Ensure all the bytes have been read in
		if (offset < bytes.length) {
			throw new IOException("Could not completely read file "+file.getName());
		}

		// Close the input stream and return bytes
		is.close();
		return bytes;
	}	

	/*
	 * ----------------------------------------------------------------------------------------
	 * OCR functions
	 * ----------------------------------------------------------------------------------------
	 */

	/**
	 * Transforms an image to bw 1bpp.
	 * @param bw_img the image
	 * @param width image width
	 * @param height image height
	 * @param bpp bytes per pixel
	 * @return the new bpp
	 */
	public int bwFilterImage(byte[] bw_img, int width, int height, int bpp)
	{
		//!! TBD if bpp=1 

		final int THOLD_WHITE = 0x70;
		final int THOLD_BLACK = 0x30;
		final int BLK_SZ = width/16; //=128 (this should be an int, otherwise we will get noise at the margins)

		final int RED_P = 3;
		final int GREEN_P = 6;
		final int BLUE_P = 1;

		if (bpp != 1) 
		{
			int local_min = THOLD_WHITE;
			int local_max = THOLD_BLACK;
			int local = 0;

			for (int i=0; i<width * height; i++) 
			{
				// compute the intensity of the pixel
				int red = unsignedByte2Int(bw_img[i*3+0]);
				int green = unsignedByte2Int(bw_img[i*3+1]);
				int blue = unsignedByte2Int(bw_img[i*3+2]);
				int intensity = red*RED_P + green*GREEN_P + blue*BLUE_P;
				intensity /= 10;

				// rarely: compute in advance the local min and max (for one BLK_SZ line)
				if ( local == 0 )
				{
					local_min = THOLD_WHITE;
					local_max = THOLD_BLACK;
					for (int k=i; k<i+BLK_SZ && k<width*height; k++)
					{
						int lred = unsignedByte2Int(bw_img[k*3+0]);
						int lgreen = unsignedByte2Int(bw_img[k*3+1]);
						int lblue = unsignedByte2Int(bw_img[k*3+2]);

						int lintensity = lred*RED_P + lgreen*GREEN_P + lblue*BLUE_P;
						lintensity /= 10;

						if ( lintensity > local_max ) local_max = lintensity;
						if ( lintensity < local_min ) local_min = lintensity;
					}
					local = BLK_SZ;
				}
				local --;

				// compare current pixel with local min and max
				if ( (local_max - intensity) < 0x15 )
				{
					bw_img[i] = -1;   // =0xff =white
				}
				else 
				{
					bw_img[i] = 0x00; // black
				}
			}
			// --------------------------------------------------------------

		}   

		// we give up 3Bpp, and replace them with 1Bpp (Bpp = Byte Per Pixel)
		return 1; // return the new bpp

	}

	/** 
	 * Apply OCR to a byte[] image
	 * @param bw_img the image as a vector (no compresion)
	 * @param width image width
	 * @param height image height
	 * @param bpp bytes per pixel
	 * @return the OCR result
	 */
	public String ImgOCRAndFilter(byte[] bw_img, int width, int height, int bpp)
	{
		String text = "";
		try {
			// the filter modifies the img-buffer itself
			//bpp = bwFilterImage(bw_img, width, height, bpp); 		


			setImageNative(bw_img, width, height, bpp);		// copy the image in the internal OCR buffers							

			setRectangleNative(0, 0, width, height);		// set rectangle to be OCRized							

			setPageSegModeNative(mConfig.m_iPSMMode);					// set the page segmentation mode											

			text = recognizeNative();						// OCR the buffered image
			//text = recognizeNative();						// OCR the buffered image

			//text = recognizeNative(bw_img, width, height, bpp); // may be used instead of the 4 steps above

			// do next - process the results				
		}
		catch (Exception ex)
		{
			text = ex.getMessage();
		}
		return text;
	}


	/**
	 * OCR an int[] image
	 * @param bw_img the image, no compression
	 * @param width image width
	 * @param height image height
	 * @param bHorizontalDisp if image is horizontal or vertical
	 * @return
	 */
	public String ImgOCRAndFilter(int[] bw_img, int width, int height, boolean bHorizontalDisp, boolean bLineMode)
	{
		String text_ret = "";
		m_bLineMode = bLineMode;
		try {
			setImageNative(bw_img, width, height, 
					mConfig.m_bUseBWFilter, bLineMode ? true : bHorizontalDisp); 				// copy the image in the internal OCR buffers							

			if (bHorizontalDisp || bLineMode)
				setRectangleNative(0, 0, width, height);					// set rectangle to be OCRized
			else 
				setRectangleNative(0, 0, height, width);

			setPageSegModeNative(mConfig.m_iPSMMode);									// set the page segmentation mode											

			String bulk_text = recognizeNative();							// OCR the buffered image
			text_ret = bulk_text;
			Log.v(TAG, "OCR brute text is ["+bulk_text+"]");


			int wconf[]	= wordConfidencesNative();							// get the confidence for each word
			Log.v(TAG, "no confidences="+wconf.length);

			String st[] = bulk_text.split("/");								// get the list of words decoded
			Log.v(TAG, "no words=" + st.length);

			boolean bLenMatch = true;
			int len = st.length-1;
			if (wconf.length != st.length-1)
			{
				Log.v(TAG, "errror: the word len do not match --------------------------");
				bLenMatch = false;
			}				

			// create the list of words and attributes	
			m_asWords = new Word[len];
			for (int i=0; i<len; i++)
				m_asWords[i] = new Word( st[i], bLenMatch ? wconf[i] : 0 ); 

			Log.v(TAG, "allocated memory, linemode=" + m_bLineMode);

			
			
			String debug_text = "";

			// in line-mode we keep only valid dictionary words and words with a large OCR trust
			if (m_bLineMode)
			{
				text_ret = "";
				for (int i=0; i<m_asWords.length; i++)
				{
					debug_text += "\t[" + m_asWords[i].m_sBody + "/" + 
					m_asWords[i].m_bIsValidWord +"/"+ 
					m_asWords[i].m_iConfidence +"]";

					// show the valid dictionary words
					if ( m_asWords[i].m_sBody.length() >= 3 &&
						 m_asWords[i].m_iConfidence > 80
						)		
					{
						text_ret += m_asWords[i].m_sBody + " ";
						continue;
					}
					
					// this is a void word (in line mode we do not display anything for it)
					if (  m_asWords[i].m_sBody.length() < 3 ||
						  !IsValidLineModeWord(m_asWords[i].m_sBody) // this checks if the word contains signs or numbers 
						)
					{
							continue;
					}

					// show the valid dictionary words
					if ( m_asWords[i].m_bIsValidWord ||
						 m_asWords[i].m_iConfidence > mConfig.m_iMinWordConfidence
						)		
					{
						text_ret += m_asWords[i].m_sBody + " ";
						continue;
					}

				}


				Log.v(TAG, "polished text is ["+ text_ret +"]");
				Log.v(TAG, "debug text is ["+ debug_text +"]");
			}
			
			
			
			// polish the results according to the configuration, if not in line-mode
			if (mConfig.m_iMinWordConfidence > 0 && !m_bLineMode)
			{
				text_ret = "";
				for (int i=0; i<m_asWords.length; i++)
				{
					debug_text += "\t[" + m_asWords[i].m_sBody + "/" + 
					m_asWords[i].m_bIsValidWord +"/"+ 
					m_asWords[i].m_iConfidence +"]";

					// this is a void word (in line mode we do not display anything for it)
					if (  m_asWords[i].m_sBody.compareTo(" ") == 0 ||
							m_asWords[i].m_sBody.contains("#") ||
							m_asWords[i].m_sBody.length() == 0)

					{
						Log.v(TAG, "1: [" + m_asWords[i].m_sBody + "]");
						text_ret += "... ";
						if (m_asWords[i].m_bNewLine)
							text_ret += "\n";
						m_asWords[i].m_iEnd = text_ret.length();
						continue;
					}

					// show the valid dictionary words
					if (m_asWords[i].m_bIsValidWord)		
					{
						Log.v(TAG, "2: ["+m_asWords[i].m_sBody+"]");
						text_ret += m_asWords[i].m_sBody + " ";
						if (m_asWords[i].m_bNewLine)
							text_ret += "\n";
						m_asWords[i].m_iEnd = text_ret.length();
						continue;
					}

					// invalid dictionary word, but preaty good confidence - show them
					if (m_asWords[i].m_iConfidence > mConfig.m_iMinWordConfidence)
					{   
						Log.v(TAG, "3: ["+m_asWords[i].m_sBody+"]");
						text_ret += m_asWords[i].m_sBody;
						text_ret += " ";
					}
					else
					{
						text_ret += "... ";
					}

					if (m_asWords[i].m_bNewLine)
						text_ret += "\n";
					m_asWords[i].m_iEnd = text_ret.length();
				}


				Log.v(TAG, "polished text is ["+ text_ret +"]");
				Log.v(TAG, "debug text is ["+ debug_text +"]");
			}

		}
		catch (Exception ex)
		{
			text_ret = ex.toString();
			Log.v(TAG, "Exception (ImgOCRAndFilter): " + ex.toString());
		}

		
		
		
		// create the spannable string, to be displayed
		m_ssOCRResult = new SpannableString(text_ret);

		if (!m_bLineMode)
		{
			int start = 0;
			int end = 0; 
			for (int i=0; i<m_asWords.length; i++)
			{
				end = m_asWords[i].m_iEnd;

				if ( m_asWords[i].m_bIsValidWord == false || 
						(m_asWords[i].m_iConfidence!=0 && m_asWords[i].m_iConfidence < mConfig.m_iMinWordConfidence)
				)
				{
					m_ssOCRResult.setSpan(new StyleSpan(Typeface.SERIF.MONOSPACE.ITALIC), start, end, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
					Log.v(TAG, "ss: "+start+"-"+end);
				}
				start = end;
			}
		}

		return text_ret;
	}

	

	/**
	 * Get the index of one word, given the position in the decoded text. 
	 * @param iPosInString the position in the string
	 * @return returns -1 if iPosInString is longer than the string itself
	 */
	public static int GetWordIndex(int iPosInString)
	{
		int i=0;
		int prev = 0;
		for (; i<m_asWords.length; i++)
		{
			if (iPosInString >= prev && iPosInString < m_asWords[i].m_iEnd)
				break;
			prev = m_asWords[i].m_iEnd;
		}	

		if (i == m_asWords.length) return -1;
		return i;
	}



	/**
	 *  OCR a non-coded image from a file, 1 pixel = bpp bytes
	 * @param filename the image-file
	 * @param width image width
	 * @param height image height
	 * @param bpp bytes per pixel
	 * @return the OCR result
	 */
	public String FileOCR(String filename, int width, int height, int bpp)
	{
		String text = "";
		try {

			byte[] bw_img = null;

			// -------------- step 1
			bw_img = GetBytesFromFile(new File(filename), width, height, bpp); 	// 1
			if (bw_img == null)
			{
				return "could not load file\n";
			}

			text = ImgOCRAndFilter(bw_img, width, height, bpp); 		
			// do next - process the results				
		}
		catch (Exception ex)
		{
			text = ex.getMessage();
		}
		return text;
	}


	
	
	
	

	/**
	 * Verifies if the provided word is valid for line-mode, where we have more constrains than full-mode
	 * @param s - the word
	 * @return validity
	 */
	private boolean IsValidLineModeWord(String s)
	{
		// if only letters
		if (s.matches("[a-zA-Z]+")) 
			return true;
		if (s.contains("#"))
			return false;
		
		return IsValidComposedWord(s);
	}

	/**
	 * This is used in 'word' constructor, we do not want single-letters and signs to be valid dictionary words
	 * @param text the word to be searched
	 * @return validity of the word, according to the dictionary
	 */
	public static boolean MyIsValidDictionaryWord(String text)
	{
		// this is for numbers, signs and alphabet letters
		if (text.length() < 2) 
			return false; 
		
		return IsValidComposedWord(text);
	}

	/**
	 * Wrapper for JNI-IsValidWord - all chars converted to lower case, check for composed-words (ex: white-grey)
	 * @param word the word to be searched
	 * @return validity of the word, according to the dictionary
	 */
	public static boolean IsValidComposedWord(String word)
	{
		String lWord = word.toLowerCase();

		// this checks for composed words, ex: well-equiped, grey-white, Montenegro's 
		String ss = lWord.replaceAll("[^a-zA-Z]", " ");
		String [] vect = ss.split(" ");
		
		for (int i=0; i<vect.length; i++)
			if ( vect[i].length()>0 && !get().isValidWord(vect[i]) )		// even if one word is not valid, return
			{
				Log.v(TAG, "word: ["+vect[i] +"] is not valid");
				return false;
			}
			
		return true;
	}

	/*
	 * ----------------------------------------------------------------------------------------
	 * Java-lib variables
	 * ----------------------------------------------------------------------------------------
	 */
	/* The methods implemented by the 'ocr' native library, which is packaged
	 * with this application.
	 */

	// general initialization/cleanup/setup functions
	public native void    	classInitNative();				// init the lib (1st to be called at startup)
	public native void    	initializeNativeDataNative();	// init allocate lib buffers (2nd to be called)
	public native boolean 	openNative(String sLanguage);	// init the api with a language

	public native void    	cleanupNativeDataNative();		// delete the lib buffers
	public native void		clearResultsNative();			// api clear
	public native void		closeNative();					// api.end, called by the destructor automatically

	public native void		setVariableNative(				// set a lib variable
			String var, String value);    



	// language functions    
	public static native String[]  getLanguagesNative();	// get the language list
	public native int		getShardsNative(				// get the shard of the language
			String lang);
	public native boolean 	isValidWord(String word);		// is the word valid according to the installed language 



	// aux functions before OCR
	public native void		setImageNative(					// copy the image to the internal api buffers				
			byte[] image, int width, int height, int bpp);
	public native void		setImageNative(					// copy the image to the internal api buffers				
			int[] image, int width, int height, 
			boolean bBWFilter, boolean bHorizDisp);
	public native void		releaseImageNative();			// release the internal api buffers
	public native void		setRectangleNative(				// set the rectangle where OCR will focus 
			int left, int top, int width, int height);
	public native void		setPageSegModeNative(			// set the page segmentation mode
			int mode);


	// OCR
	public native String	recognizeNative(				// do OCR over the parameter image
			byte[] image, int width, int height, int bpp);
	public native String	recognizeNative();				// do OCR over the image in the api buffers (all ocr)
	public native String	recognizeNative(int nopass);	// do OCR over the image in the api buffers 
	// 		(options: 0=all, 1 or 2 passes)


	// aux functions, to be run after OCR    
	public native int		meanConfidenceNative();			// mean confidence (last OCR)
	public native int[]		wordConfidencesNative(); 		// confidence for each word (last OCR)
	public native String 	getBoxText();					// get the box for each letter


	// debug functions    
	public native void    	closeDebug();					// clean close the debug (if any)
	public native String 	libVer();						// get the lib version

	public int mNativeData; 								// storage space for the library's internal buffers


	/* this is used to load the 'ocr' library on application
	 * startup. The library has already been unpacked into
	 * /data/data/com.../lib/libocr.so at installation time by the package manager.
	 */
	static 
	{
		System.loadLibrary("ocr");
	}
}