Java Examples for com.aspose.pdf.Document.CallBackGetHocr

The following java examples will help you to understand the usage of com.aspose.pdf.Document.CallBackGetHocr. These source code samples are taken from different open source projects.

Example 1
Project: Aspose_Pdf_Java-master  File: ConvertingNonSearchablePDFToSearchablePDFDocument.java View source code
public static void main(String[] args) {
    final String myDir = "PathToDir";
    Document doc = new Document(myDir + "outFile.pdf");
    // Create callBack - logic recognize text for pdf images. Use outer OCR supports HOCR standard(http://en.wikipedia.org/wiki/HOCR).
    // We have used free google tesseract OCR(http://en.wikipedia.org/wiki/Tesseract_%28software%29)
    CallBackGetHocr cbgh = new CallBackGetHocr() {

        @Override
        public String invoke(java.awt.image.BufferedImage img) {
            File outputfile = new File(myDir + "test.jpg");
            try {
                ImageIO.write(img, "jpg", outputfile);
            } catch (IOException e1) {
                e1.printStackTrace();
            }
            try {
                java.lang.Process process = Runtime.getRuntime().exec("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr");
                System.out.println("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr");
                process.waitFor();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            // reading out.html to string
            File file = new File(myDir + "out.html");
            StringBuilder fileContents = new StringBuilder((int) file.length());
            Scanner scanner = null;
            try {
                scanner = new Scanner(file);
                String lineSeparator = System.getProperty("line.separator");
                while (scanner.hasNextLine()) {
                    fileContents.append(scanner.nextLine() + lineSeparator);
                }
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } finally {
                if (scanner != null)
                    scanner.close();
            }
            // deleting temp files
            File fileOut = new File(myDir + "out.html");
            if (fileOut.exists()) {
                fileOut.delete();
            }
            File fileTest = new File(myDir + "test.jpg");
            if (fileTest.exists()) {
                fileTest.delete();
            }
            return fileContents.toString();
        }
    };
    // End callBack
    doc.convert(cbgh);
    doc.save(myDir + "output971.pdf");
}