package io.lumify.tesseract; import io.lumify.core.model.properties.LumifyProperties; import io.lumify.test.GraphPropertyWorkerTestBase; import org.apache.commons.io.IOUtils; import org.json.JSONObject; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.runners.MockitoJUnitRunner; import org.securegraph.Metadata; import org.securegraph.Property; import org.securegraph.Vertex; import org.securegraph.Visibility; import org.securegraph.property.StreamingPropertyValue; import java.io.ByteArrayInputStream; import java.io.File; import java.util.List; import java.util.Map; import static org.junit.Assert.*; import static org.securegraph.util.IterableUtils.toList; @RunWith(MockitoJUnitRunner.class) public class TesseractGraphPropertyWorkerTest extends GraphPropertyWorkerTestBase { private Visibility visibility = new Visibility(""); @Test public void testTesseractTestImage01() throws Exception { byte[] imageData = getResourceAsByteArray(TesseractGraphPropertyWorkerTest.class, "testImage01.jpg"); Metadata metadata = new Metadata(); LumifyProperties.MIME_TYPE.setMetadata(metadata, "image/jpg", visibility); StreamingPropertyValue value = new StreamingPropertyValue(new ByteArrayInputStream(imageData), byte[].class); Vertex v1 = getGraph().prepareVertex("v1", visibility) .addPropertyValue("k1", "image", value, metadata, visibility) .save(getGraphAuthorizations()); TesseractGraphPropertyWorker gpw = new TesseractGraphPropertyWorker(); run(gpw, getWorkerPrepareData(), v1, v1.getProperty("k1", "image"), new ByteArrayInputStream(imageData)); v1 = getGraph().getVertex("v1", getGraphAuthorizations()); List<Property> textProperties = toList(LumifyProperties.TEXT.getProperties(v1)); assertEquals(1, textProperties.size()); Property textProperty = textProperties.get(0); StreamingPropertyValue textValue = (StreamingPropertyValue) textProperty.getValue(); assertNotNull("textValue was null", textValue); String textValueString = IOUtils.toString(textValue.getInputStream()); assertTrue("does not contain Tesseract", textValueString.contains("Tesseract")); assertEquals(1, getGraphPropertyQueue().size()); JSONObject graphPropertyQueueItem = getGraphPropertyQueue().peek(); assertEquals(textProperty.getName(), graphPropertyQueueItem.getString("propertyName")); assertEquals(textProperty.getKey(), graphPropertyQueueItem.getString("propertyKey")); assertEquals(v1.getId(), graphPropertyQueueItem.getString("graphVertexId")); } @Override protected Map getConfigurationMap() { Map map = super.getConfigurationMap(); File tessdataDir = new File("/usr/share/tesseract-ocr/tessdata/"); if (tessdataDir.exists()) { map.put(TesseractGraphPropertyWorker.CONFIG_DATA_PATH, tessdataDir.getAbsolutePath()); } tessdataDir = new File("/usr/local/share/tessdata"); if (tessdataDir.exists()) { map.put(TesseractGraphPropertyWorker.CONFIG_DATA_PATH, tessdataDir.getAbsolutePath()); } return map; } }