//----------------------------------------------------------------------------// // // // T e s s e r a c t O r d e r // // // //----------------------------------------------------------------------------// // <editor-fold defaultstate="collapsed" desc="hdr"> // // Copyright © Hervé Bitteur and others 2000-2013. All rights reserved. // // This software is released under the GNU General Public License. // // Goto http://kenai.com/projects/audiveris to report bugs or suggestions. // //----------------------------------------------------------------------------// // </editor-fold> package omr.text.tesseract; import java.awt.Rectangle; import omr.WellKnowns; import omr.sheet.SystemInfo; import omr.text.FontInfo; import omr.text.TextChar; import omr.text.TextLine; import omr.text.TextWord; import tesseract.TessBridge; import tesseract.TessBridge.PIX; import tesseract.TessBridge.ResultIterator; import tesseract.TessBridge.ResultIterator.Level; import tesseract.TessBridge.TessBaseAPI; import tesseract.TessBridge.TessBaseAPI.SegmentationMode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.awt.image.BufferedImage; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.spi.IIORegistry; import javax.imageio.stream.ImageOutputStream; /** * Class {@code TesseractOrder} carries a processing order submitted * to Tesseract OCR program. * * @author Hervé Bitteur */ public class TesseractOrder { //~ Static fields/initializers --------------------------------------------- /** Usual logger utility */ private static final Logger logger = LoggerFactory.getLogger(TesseractOrder.class); /** To avoid repetitive warnings if OCR binding failed */ private static boolean userWarned; /** Needed (for OpenJDK) to register TIFF support. */ static { IIORegistry registry = IIORegistry.getDefaultInstance(); registry.registerServiceProvider(new com.sun.media.imageioimpl.plugins.tiff.TIFFImageWriterSpi()); registry.registerServiceProvider(new com.sun.media.imageioimpl.plugins.tiff.TIFFImageReaderSpi()); } //~ Instance fields -------------------------------------------------------- // /** Containing system. */ private final SystemInfo system; /** Serial number for this order. */ private final int serial; /** Image label. */ private final String label; /** Should we keep a disk copy of the image?. */ private final boolean keepImage; /** Language specification. */ private final String lang; /** Desired handling of layout. */ private final SegmentationMode segMode; /** The dedicated API. */ private TessBaseAPI api; /** The image being processed. */ private PIX image; //~ Constructors ----------------------------------------------------------- // //----------------// // TesseractOrder // //----------------// /** * Creates a new TesseractOrder object. * * @param system The containing system * @param label A debugging label (such as glyph id) * @param serial A unique id for this order instance * @param keepImage True to keep a disk copy of the image * @param lang The language specification * @param segMode The desired page segmentation mode * @param bufferedImage The image to process * * @throws UnsatisfiedLinkError When bridge to C++ could not be loaded * @throws IOException When temporary Tiff buffer failed * @throws RuntimeException When PIX image failed */ public TesseractOrder (SystemInfo system, String label, int serial, boolean keepImage, String lang, SegmentationMode segMode, BufferedImage bufferedImage) throws UnsatisfiedLinkError, IOException { this.system = system; this.label = label; this.serial = serial; this.keepImage = keepImage; this.lang = lang; this.segMode = segMode; // Build a PIX from the image provided ByteBuffer buf = toTiffBuffer(bufferedImage); image = PIX.readMemTiff(buf, buf.capacity(), 0); if (image == null) { logger.warn("Invalid image {}", label); throw new RuntimeException("Invalid image"); } } //~ Methods ---------------------------------------------------------------- // //---------// // process // //---------// /** * Actually allocate a Tesseract API and recognize the image. * * @return the sequence of lines found */ public List<TextLine> process () { try { api = new TessBaseAPI(WellKnowns.OCR_FOLDER.getPath()); // Init API with proper language if (!api.Init(lang)) { logger.warn( "Could not initialize Tesseract with lang {}", lang); return finish(null); } // Set API image api.SetImage(image); // Perform layout analysis according to segmentation mode api.SetPageSegMode(segMode); api.AnalyseLayout(); // Perform image recognition if (api.Recognize() != 0) { logger.warn("Error in Tesseract recognize"); return finish(null); } // Extract lines return finish(getLines()); } catch (UnsatisfiedLinkError ex) { if (!userWarned) { logger.warn("Could not link Tesseract bridge", ex); logger.warn( "java.library.path=" + System.getProperty("java.library.path")); userWarned = true; } throw new RuntimeException(ex); } } //--------// // finish // //--------// /** * A convenient way to cleanup Tesseract resources while ending * the current processing * * @param lines the lines found, if any * @return the lines found, if nay */ private List<TextLine> finish (List<TextLine> lines) { if (image != null) { PIX.freeData(image); } if (api != null) { api.End(); } return lines; } //---------// // getFont // //---------// /** * Map Tesseract3 font attributes to our own FontInfo class. * * @param att Font attributes out of OCR, perhap null * @return our FontInfo structure, or null */ private FontInfo getFont (TessBridge.FontAttributes att) { if (att != null) { return new FontInfo( att.isBold, att.isItalic, att.isUnderlined, att.isMonospace, att.isSerif, att.isSmallcaps, att.pointsize, att.fontName); } else { return null; } } //----------// // getLines // //----------// /** * Build the hierarchy of TextLine / TextWord / TextChar instances * out of the results of OCR recognition * * @return the sequence of lines */ private List<TextLine> getLines () { final int maxDashWidth = system.getScoreSystem().getScale().getInterline(); ResultIterator it = api.GetIterator(); List<TextLine> lines = new ArrayList<>(); // Lines built so far TextLine line = null; // Line being built TextWord word = null; // Word being built try { do { // SKip empty stuff if (it.Empty(Level.SYMBOL)) { continue; } // Start of line? if (it.IsAtBeginningOf(Level.TEXTLINE)) { line = new TextLine(system); logger.debug("{} {}", label, line); lines.add(line); } // Start of word? if (it.IsAtBeginningOf(Level.WORD)) { FontInfo fontInfo = getFont(it.WordFontAttributes()); if (fontInfo == null) { logger.debug("No font info on {}", label); return null; } word = new TextWord( it.BoundingBox(Level.WORD), it.GetUTF8Text(Level.WORD), it.Baseline(Level.WORD), (int) Math.rint(it.Confidence(Level.WORD)), fontInfo, line); logger.debug(" {}", word); line.appendWord(word); // // Heuristic... (just to test) // boolean isDict = it.WordIsFromDictionary(); // boolean isNumeric = it.WordIsNumeric(); // boolean isLatin = encoder.canEncode(wordContent); // int conf = (int) Math.rint(it.Confidence(WORD)); // int len = wordContent.length(); // boolean isValid = isLatin // && (conf >= 80 // || (conf >= 50 && ((isDict && len > 1) || isNumeric))); } // Char/symbol to be processed // Fix long "—" vs short "-" String charValue = it.GetUTF8Text(Level.SYMBOL); Rectangle charBox = it.BoundingBox(Level.SYMBOL); if (charValue.equals("—") && charBox.width <= maxDashWidth) { charValue = "-"; // Containing word value will be updated later } word.addChar(new TextChar(charBox, charValue)); } while (it.Next(Level.SYMBOL)); return lines; } catch (Exception ex) { logger.warn("Error decoding tesseract output", ex); return null; } finally { it.delete(); } } //--------------// // toTiffBuffer // //--------------// /** * Convert the given image into a TIFF-formatted ByteBuffer for * passing it directly to Tesseract. * A copy of the tiff buffer can be saved on disk, if so desired. * * @param image the input image * @return a buffer in TIFF format */ private ByteBuffer toTiffBuffer (BufferedImage image) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (final ImageOutputStream ios = ImageIO.createImageOutputStream(baos)) { ImageWriter writer = ImageIO.getImageWritersByFormatName("tiff"). next(); writer.setOutput(ios); writer.write(image); } ByteBuffer buf = ByteBuffer.allocate(baos.size()); byte[] bytes = baos.toByteArray(); buf.put(bytes); // Should we keep a local copy of this buffer on disk? if (keepImage) { String name = String.format("%03d-", serial) + ((label != null) ? label : ""); File file = new File(WellKnowns.TEMP_FOLDER, name + ".tif"); // Make sure the TEMP directory exists if (!WellKnowns.TEMP_FOLDER.exists()) { WellKnowns.TEMP_FOLDER.mkdir(); } try (final FileOutputStream fos = new FileOutputStream( file.getAbsolutePath())) { fos.write(bytes); } } return buf; } }