package net.hearthstats.game.ocr; import net.sourceforge.tess4j.Tesseract; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.awt.*; import java.awt.image.BufferedImage; import java.awt.image.BufferedImageOp; import java.awt.image.ColorConvertOp; import java.awt.image.RescaleOp; /** * Base class for performing OCR. Subclasses can override methods to customise the handling of OCR on different * types of text. */ public abstract class OcrBase { protected final static Logger debugLog = LoggerFactory.getLogger(OcrBase.class); /** * Crops the image to the expected location of the text to OCR. Different interations may return crop the image * differently if necessary. * * @param image A full screenshot image that needs to be cropped * @param iteration The iteration number, zero-based * @return */ protected abstract BufferedImage crop(BufferedImage image, int iteration); /** * The filename of the image written to disk for debugging. * @return */ protected abstract String getFilename(); /** * Defines which page segmentation mode to use in Tesseract. This defines what type of OCR is performed on the image, * which if set inappropriately means nothing will be detected. Each subclass should pick the most appropriate mode * for the type of text undergoing OCR. * * @see net.sourceforge.tess4j.TessAPI.TessPageSegMode * @param iteration The iteration number, zero-based * @return a value from TessAPI.TessPageSegMode */ protected abstract int getTesseractPageSegMode(int iteration); /** * Parse an OCR string to fix up any obvious errors, such as 'I' instead of '1' in a number. * * @param ocrResult A string generated by OCR * @param iteration The iteration number, zero-based * @return The OCR string with errors fixed, if possible */ protected abstract String parseString(String ocrResult, int iteration); /** * Some OCR might require multiple iterations to find the right spot. Set this value to 1 if only one OCR attempt * should be made, or higher if multiple OCR attempts are needed. * * @param ocrResult A string generated by OCR * @param iteration The iteration number - one-based, unlike other methods where it is zero-based! * @return true if OCR should be processed again, or false if it's OK to continue with this OCR result */ protected abstract boolean tryProcessingAgain(String ocrResult, int iteration); public String process(BufferedImage image) throws OcrException { String result = null; int iteration = 0; do { BufferedImage croppedImage = crop(image, 0); BufferedImage filteredImage = filter(croppedImage, iteration); croppedImage.flush(); saveCopy(filteredImage, iteration); String rawResult = performOcr(filteredImage, iteration); filteredImage.flush(); result = parseString(rawResult, iteration); iteration++; } while (tryProcessingAgain(result, iteration)); debugLog.debug("OCR recognised \"{}\"", result); return result; } /** * Filters the image to make it easier to OCR, such as by turning it greyscale and increasing the contrast. * * @param image A cropped image * @param iteration The iteration number, zero-based * @return * @throws OcrException */ protected BufferedImage filter(BufferedImage image, int iteration) throws OcrException { int width = image.getWidth(); int height = image.getHeight(); int bigWidth = width * 3; int bigHeight = height * 3; // to gray scale BufferedImage grayscale = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_INT_RGB); BufferedImageOp grayscaleConv = new ColorConvertOp(image.getColorModel().getColorSpace(), grayscale.getColorModel().getColorSpace(), null); grayscaleConv.filter(image, grayscale); // blow it up for ocr BufferedImage newImage = new BufferedImage(bigWidth, bigHeight, BufferedImage.TYPE_INT_RGB); Graphics g = newImage.createGraphics(); g.drawImage(grayscale, 0, 0, bigWidth, bigHeight, null); g.dispose(); // invert image for (int x = 0; x < bigWidth; x++) { for (int y = 0; y < bigHeight; y++) { int rgba = newImage.getRGB(x, y); Color col = new Color(rgba, true); col = new Color(255 - col.getRed(), 255 - col.getGreen(), 255 - col.getBlue()); newImage.setRGB(x, y, col.getRGB()); } } // increase contrast try { RescaleOp rescaleOp = new RescaleOp(1.8f, -30, null); rescaleOp.filter(newImage, newImage); // Source and destination are the same. } catch (Exception e) { throw new OcrException("Error rescaling OCR image", e); } return newImage; } /** * Save a copy of the image to disk for use when debugging inaccurate OCR. * * @param image An image to be processed by OCR. Should already be cropped and filtered. */ protected void saveCopy(BufferedImage image, int iteration) { String filename = getFilename(); if (filename != null) { BackgroundImageSave.savePngImage(image, getFilename()); } } /** * Perform the actual OCR using Tesseract. * * @param image An image to be processed by OCR. Should be cropped and filtered to ensure the contrast is sufficient. * @return The text that was recognised in the image */ protected String performOcr(BufferedImage image, int iteration) throws OcrException { try { Tesseract instance = Tesseract.getInstance(); instance.setPageSegMode(getTesseractPageSegMode(iteration)); String output = instance.doOCR(image); return output.trim(); } catch (Exception e) { throw new OcrException("Error performing OCR", e); } } }