/** * Licensed to The Apereo Foundation under one or more contributor license * agreements. See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * * The Apereo Foundation licenses this file to you under the Educational * Community License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License * at: * * http://opensource.org/licenses/ecl2.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opencastproject.analysis.text; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.opencastproject.textextractor.api.TextFrame; import org.opencastproject.textextractor.tesseract.TesseractTextExtractor; import org.opencastproject.util.IoSupport; import org.opencastproject.util.StreamHelper; import org.apache.commons.io.FileUtils; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.net.URL; /** * Test case for class {@link TesseractTextExtractor}. */ public class TesseractTextExtractorTest { /** Path to the test image */ protected String testPath = "/image.tiff"; /** Test image */ protected File testFile = null; /** Path to the tesseract binary */ protected static String tesseractbinary = TesseractTextExtractor.TESSERACT_BINARY_DEFAULT; /** The tesseract text analyzer */ protected TesseractTextExtractor analyzer = null; /** The text without punctuation */ protected String text = "Land and Vegetation Key players on the"; /** Additional options for tesseract */ protected String addopts = "-psm 3"; /** True to run the tests */ private static boolean tesseractInstalled = true; /** Logging facility */ private static final Logger logger = LoggerFactory.getLogger(TesseractTextExtractorTest.class); @BeforeClass public static void testTesseract() { StreamHelper stdout = null; StreamHelper stderr = null; StringBuffer errorBuffer = new StringBuffer(); Process p = null; try { String[] command = {tesseractbinary, "-v"}; p = new ProcessBuilder(command).start(); stdout = new StreamHelper(p.getInputStream()); stderr = new StreamHelper(p.getErrorStream(), errorBuffer); int status = p.waitFor(); stdout.stopReading(); stderr.stopReading(); if (status != 0) throw new IllegalStateException(); } catch (Throwable t) { logger.warn("Skipping text analysis tests due to unsatisifed tesseract installation"); logger.warn(t.getMessage(), t); logger.warn(errorBuffer.toString()); tesseractInstalled = false; } finally { IoSupport.closeQuietly(stdout); IoSupport.closeQuietly(stderr); IoSupport.closeQuietly(p); } } /** * @throws java.lang.Exception */ @Before public void setUp() throws Exception { URL imageUrl = this.getClass().getResource(testPath); testFile = File.createTempFile("ocrtest", ".jpg"); FileUtils.copyURLToFile(imageUrl, testFile); analyzer = new TesseractTextExtractor(tesseractbinary); analyzer.setAdditionalOptions(addopts); } /** * @throws java.io.File.IOException */ @After public void tearDown() throws Exception { FileUtils.deleteQuietly(testFile); } /** * Test method for {@link org.opencastproject.textextractor.tesseract.TesseractTextExtractor#getBinary()}. */ @Test public void testGetBinary() { assertEquals(tesseractbinary, analyzer.getBinary()); } /** * Test method for {@link org.opencastproject.textextractor.tesseract.TesseractTextExtractor#getAdditionalOptions()}. */ @Test public void testGetAdditionalOptions() { assertEquals(addopts, analyzer.getAdditionalOptions()); } /** * Test method for {@link org.opencastproject.textextractor.tesseract.TesseractTextExtractor#analyze(java.io.File)}. */ @Test public void testAnalyze() throws Exception { if (!tesseractInstalled) return; TextFrame frame = analyzer.extract(testFile); assertTrue(frame.hasText()); } }