Java Examples for com.aspose.pdf.Document.CallBackGetHocr
The following java examples will help you to understand the usage of com.aspose.pdf.Document.CallBackGetHocr. These source code samples are taken from different open source projects.
Example 1
Project: Aspose_Pdf_Java-master File: ConvertingNonSearchablePDFToSearchablePDFDocument.java View source code |
public static void main(String[] args) { final String myDir = "PathToDir"; Document doc = new Document(myDir + "outFile.pdf"); // Create callBack - logic recognize text for pdf images. Use outer OCR supports HOCR standard(http://en.wikipedia.org/wiki/HOCR). // We have used free google tesseract OCR(http://en.wikipedia.org/wiki/Tesseract_%28software%29) CallBackGetHocr cbgh = new CallBackGetHocr() { @Override public String invoke(java.awt.image.BufferedImage img) { File outputfile = new File(myDir + "test.jpg"); try { ImageIO.write(img, "jpg", outputfile); } catch (IOException e1) { e1.printStackTrace(); } try { java.lang.Process process = Runtime.getRuntime().exec("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr"); System.out.println("tesseract" + " " + myDir + "test.jpg" + " " + myDir + "out hocr"); process.waitFor(); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } // reading out.html to string File file = new File(myDir + "out.html"); StringBuilder fileContents = new StringBuilder((int) file.length()); Scanner scanner = null; try { scanner = new Scanner(file); String lineSeparator = System.getProperty("line.separator"); while (scanner.hasNextLine()) { fileContents.append(scanner.nextLine() + lineSeparator); } } catch (FileNotFoundException e) { e.printStackTrace(); } finally { if (scanner != null) scanner.close(); } // deleting temp files File fileOut = new File(myDir + "out.html"); if (fileOut.exists()) { fileOut.delete(); } File fileTest = new File(myDir + "test.jpg"); if (fileTest.exists()) { fileTest.delete(); } return fileContents.toString(); } }; // End callBack doc.convert(cbgh); doc.save(myDir + "output971.pdf"); }