/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.ocr;
import org.apache.commons.lang.SystemUtils;
import org.apache.tika.TikaTest;
import org.junit.Test;
import java.io.File;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public class TesseractOCRConfigTest extends TikaTest {
@Test
public void testNoConfig() throws Exception {
TesseractOCRConfig config = new TesseractOCRConfig();
assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
assertEquals("Invalid default language value", "eng", config.getLanguage());
assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
assertEquals("Invalid default minFileSizeToOcr value", 0, config.getMinFileSizeToOcr());
assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
assertEquals("Invalid default timeout value", 120, config.getTimeout());
assertEquals("Invalid default ImageMagickPath value", "", config.getImageMagickPath());
assertEquals("Invalid default density value", 300 , config.getDensity());
assertEquals("Invalid default depth value", 4 , config.getDepth());
assertEquals("Invalid default colorpsace value", "gray" , config.getColorspace());
assertEquals("Invalid default filter value", "triangle" , config.getFilter());
assertEquals("Invalid default resize value", 900 , config.getResize());
}
@Test
public void testPartialConfig() throws Exception {
InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
"/test-properties/TesseractOCRConfig-partial.properties");
TesseractOCRConfig config = new TesseractOCRConfig(stream);
assertEquals("Invalid default tesseractPath value", "", config.getTesseractPath());
assertEquals("Invalid default tessdataPath value", "", config.getTessdataPath());
assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
assertEquals("Invalid default pageSegMode value", "1", config.getPageSegMode());
assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
assertEquals("Invalid default maxFileSizeToOcr value", Integer.MAX_VALUE, config.getMaxFileSizeToOcr());
assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
assertEquals("Invalid default ImageMagickPath value", "", config.getImageMagickPath());
assertEquals("Invalid overridden density value", 200 , config.getDensity());
assertEquals("Invalid overridden depth value", 8 , config.getDepth());
assertEquals("Invalid overridden filter value", "box" , config.getFilter());
assertEquals("Invalid overridden resize value", 300 , config.getResize());
}
@Test
public void testFullConfig() throws Exception {
InputStream stream = TesseractOCRConfigTest.class.getResourceAsStream(
"/test-properties/TesseractOCRConfig-full.properties");
TesseractOCRConfig config = new TesseractOCRConfig(stream);
if(SystemUtils.IS_OS_UNIX) {
assertEquals("Invalid overridden tesseractPath value", "/opt/tesseract" + File.separator, config.getTesseractPath());
assertEquals("Invalid overridden tesseractPath value", "/usr/local/share" + File.separator, config.getTessdataPath());
assertEquals("Invalid overridden ImageMagickPath value", "/usr/local/bin/", config.getImageMagickPath());
}
assertEquals("Invalid overridden language value", "fra+deu", config.getLanguage());
assertEquals("Invalid overridden pageSegMode value", "2", config.getPageSegMode());
assertEquals("Invalid overridden minFileSizeToOcr value", 1, config.getMinFileSizeToOcr());
assertEquals("Invalid overridden maxFileSizeToOcr value", 2000000, config.getMaxFileSizeToOcr());
assertEquals("Invalid overridden timeout value", 240, config.getTimeout());
assertEquals("Invalid overridden density value", 200 , config.getDensity());
assertEquals("Invalid overridden depth value", 8 , config.getDepth());
assertEquals("Invalid overridden filter value", "box" , config.getFilter());
assertEquals("Invalid overridden resize value", 300 , config.getResize());
}
@Test
public void testValidateValidLanguage() {
List<String> validLanguages = Arrays.asList(
"eng", "slk_frak", "chi_tra", "eng+fra", "tgk+chi_tra+slk_frak");
TesseractOCRConfig config = new TesseractOCRConfig();
for (String language : validLanguages) {
config.setLanguage(language);
assertEquals("Valid language not set", language, config.getLanguage());
}
}
@Test
public void testValidateInvalidLanguage() {
List<String> invalidLanguages = Arrays.asList(
"", "+", "en", "en+", "eng+fra+", "rm -rf *");
TesseractOCRConfig config = new TesseractOCRConfig();
for (String language : invalidLanguages) {
try {
config.setLanguage(language);
fail("Invalid language set: " + language);
} catch (IllegalArgumentException e) {
// expected exception thrown
}
}
}
@Test(expected=IllegalArgumentException.class)
public void testValidatePageSegMode() {
TesseractOCRConfig config = new TesseractOCRConfig();
config.setPageSegMode("0");
config.setPageSegMode("10");
assertTrue("Couldn't set valid values", true);
config.setPageSegMode("14");
}
@Test(expected=IllegalArgumentException.class)
public void testValidateDensity() {
TesseractOCRConfig config = new TesseractOCRConfig();
config.setDensity(300);
config.setDensity(400);
assertTrue("Couldn't set valid values", true);
config.setDensity(1);
}
@Test(expected=IllegalArgumentException.class)
public void testValidateDepth() {
TesseractOCRConfig config = new TesseractOCRConfig();
config.setDepth(4);
config.setDepth(8);
assertTrue("Couldn't set valid values", true);
config.setDepth(6);
}
@Test(expected=IllegalArgumentException.class)
public void testValidateFilter() {
TesseractOCRConfig config = new TesseractOCRConfig();
config.setFilter("Triangle");
config.setFilter("box");
assertTrue("Couldn't set valid values", true);
config.setFilter("abc");
}
@Test(expected=IllegalArgumentException.class)
public void testValidateResize() {
TesseractOCRConfig config = new TesseractOCRConfig();
config.setResize(200);
config.setResize(400);
assertTrue("Couldn't set valid values", true);
config.setResize(1000);
}
}