package io.lumify.phoneNumber;
import com.google.common.base.Charsets;
import com.google.inject.Injector;
import io.lumify.core.config.HashMapConfigurationLoader;
import io.lumify.core.ingest.graphProperty.GraphPropertyWorkData;
import io.lumify.core.ingest.graphProperty.GraphPropertyWorkerPrepareData;
import io.lumify.core.ingest.graphProperty.TermMentionFilter;
import io.lumify.core.model.ontology.OntologyRepository;
import io.lumify.core.model.properties.LumifyProperties;
import io.lumify.core.model.termMention.TermMentionRepository;
import io.lumify.core.model.workQueue.WorkQueueRepository;
import io.lumify.core.security.DirectVisibilityTranslator;
import io.lumify.core.security.VisibilityTranslator;
import io.lumify.core.user.User;
import io.lumify.web.clientapi.model.VisibilityJson;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
import org.securegraph.*;
import org.securegraph.inmemory.InMemoryAuthorizations;
import org.securegraph.inmemory.InMemoryGraph;
import org.securegraph.property.StreamingPropertyValue;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import static org.securegraph.util.IterableUtils.toList;
@RunWith(MockitoJUnitRunner.class)
public class PhoneNumberGraphPropertyWorkerTest {
private static final String PHONE_TEXT = "This terrorist's phone number is 410-678-2230, and his best buddy's phone number is +44 (0)207 437 0478";
private static final String PHONE_NEW_LINES = "This terrorist's phone\n number is 410-678-2230, and his best buddy's phone number\n is +44 (0)207 437 0478";
private static final String PHONE_MISSING = "This is a sentence without any phone numbers in it.";
@Mock
private User user;
@Mock
private OntologyRepository ontologyRepository;
@Mock
private WorkQueueRepository workQueueRepository;
private PhoneNumberGraphPropertyWorker extractor;
private InMemoryAuthorizations authorizations;
private InMemoryGraph graph;
private Visibility visibility;
private VisibilityTranslator visibilityTranslator = new DirectVisibilityTranslator();
private VisibilityJson visibilityJson;
@Before
public void setUp() throws Exception {
Map config = new HashMap();
config.put("ontology.intent.concept.person", "http://lumify.io/test#person");
config.put("ontology.intent.concept.location", "http://lumify.io/test#location");
config.put("ontology.intent.concept.organization", "http://lumify.io/test#organization");
config.put("ontology.intent.relationship.artifactHasEntity", "http://lumify.io/test#artifactHasEntity");
config.put("ontology.intent.concept.phoneNumber", "http://lumify.io/test#phoneNumber");
io.lumify.core.config.Configuration configuration = new HashMapConfigurationLoader(config).createConfiguration();
when(ontologyRepository.getRequiredConceptIRIByIntent("phoneNumber")).thenReturn("http://lumify.io/test#phoneNumber");
extractor = new PhoneNumberGraphPropertyWorker();
extractor.setConfiguration(configuration);
extractor.setVisibilityTranslator(visibilityTranslator);
extractor.setOntologyRepository(ontologyRepository);
extractor.setWorkQueueRepository(workQueueRepository);
FileSystem hdfsFileSystem = null;
authorizations = new InMemoryAuthorizations(TermMentionRepository.VISIBILITY_STRING);
Injector injector = null;
List<TermMentionFilter> termMentionFilters = new ArrayList<TermMentionFilter>();
GraphPropertyWorkerPrepareData workerPrepareData = new GraphPropertyWorkerPrepareData(config, termMentionFilters, hdfsFileSystem, user, authorizations, injector);
graph = InMemoryGraph.create();
visibility = new Visibility("");
visibilityJson = new VisibilityJson();
extractor.setGraph(graph);
extractor.prepare(workerPrepareData);
}
@Test
public void testPhoneNumberExtraction() throws Exception {
InputStream in = asStream(PHONE_TEXT);
VertexBuilder vertexBuilder = graph.prepareVertex("v1", visibility);
StreamingPropertyValue textPropertyValue = new StreamingPropertyValue(in, String.class);
LumifyProperties.TEXT.setProperty(vertexBuilder, textPropertyValue, visibility);
LumifyProperties.VISIBILITY_JSON.setProperty(vertexBuilder, visibilityJson, visibility);
Vertex vertex = vertexBuilder.save(authorizations);
Property property = vertex.getProperty(LumifyProperties.TEXT.getPropertyName());
GraphPropertyWorkData workData = new GraphPropertyWorkData(visibilityTranslator, vertex, property, null, null);
in = asStream(PHONE_TEXT);
extractor.execute(in, workData);
List<Vertex> termMentions = toList(vertex.getVertices(Direction.OUT, LumifyProperties.TERM_MENTION_LABEL_HAS_TERM_MENTION, authorizations));
assertEquals("Incorrect number of phone numbers extracted", 2, termMentions.size());
boolean foundFirst = false;
boolean foundSecond = false;
for (Vertex term : termMentions) {
String title = LumifyProperties.TERM_MENTION_TITLE.getPropertyValue(term);
if (title.equals("+14106782230")) {
foundFirst = true;
assertEquals(33, LumifyProperties.TERM_MENTION_START_OFFSET.getPropertyValue(term, 0));
assertEquals(45, LumifyProperties.TERM_MENTION_END_OFFSET.getPropertyValue(term, 0));
} else if (title.equals("+442074370478")) {
foundSecond = true;
assertEquals(84, LumifyProperties.TERM_MENTION_START_OFFSET.getPropertyValue(term, 0));
assertEquals(103, LumifyProperties.TERM_MENTION_END_OFFSET.getPropertyValue(term, 0));
}
}
assertTrue("+14106782230 not found", foundFirst);
assertTrue("+442074370478 not found", foundSecond);
}
@Test
public void testPhoneNumberExtractionWithNewlines() throws Exception {
InputStream in = asStream(PHONE_NEW_LINES);
VertexBuilder vertexBuilder = graph.prepareVertex("v1", visibility);
StreamingPropertyValue textPropertyValue = new StreamingPropertyValue(in, String.class);
LumifyProperties.TEXT.setProperty(vertexBuilder, textPropertyValue, visibility);
LumifyProperties.VISIBILITY_JSON.setProperty(vertexBuilder, visibilityJson, visibility);
Vertex vertex = vertexBuilder.save(authorizations);
Property property = vertex.getProperty(LumifyProperties.TEXT.getPropertyName());
GraphPropertyWorkData workData = new GraphPropertyWorkData(visibilityTranslator, vertex, property, null, null);
in = asStream(PHONE_NEW_LINES);
extractor.execute(in, workData);
List<Vertex> termMentions = toList(vertex.getVertices(Direction.OUT, LumifyProperties.TERM_MENTION_LABEL_HAS_TERM_MENTION, authorizations));
assertEquals("Incorrect number of phone numbers extracted", 2, termMentions.size());
boolean foundFirst = false;
boolean foundSecond = false;
for (Vertex term : termMentions) {
String title = LumifyProperties.TERM_MENTION_TITLE.getPropertyValue(term);
if (title.equals("+14106782230")) {
foundFirst = true;
assertEquals(34, LumifyProperties.TERM_MENTION_START_OFFSET.getPropertyValue(term, 0));
assertEquals(46, LumifyProperties.TERM_MENTION_END_OFFSET.getPropertyValue(term, 0));
} else if (title.equals("+442074370478")) {
foundSecond = true;
assertEquals(86, LumifyProperties.TERM_MENTION_START_OFFSET.getPropertyValue(term, 0));
assertEquals(105, LumifyProperties.TERM_MENTION_END_OFFSET.getPropertyValue(term, 0));
}
}
assertTrue("+14106782230 not found", foundFirst);
assertTrue("+442074370478 not found", foundSecond);
}
@Test
public void testNegativePhoneNumberExtraction() throws Exception {
InputStream in = asStream(PHONE_MISSING);
VertexBuilder vertexBuilder = graph.prepareVertex("v1", visibility);
StreamingPropertyValue textPropertyValue = new StreamingPropertyValue(in, String.class);
LumifyProperties.TEXT.setProperty(vertexBuilder, textPropertyValue, visibility);
Vertex vertex = vertexBuilder.save(authorizations);
Property property = vertex.getProperty(LumifyProperties.TEXT.getPropertyName());
GraphPropertyWorkData workData = new GraphPropertyWorkData(visibilityTranslator, vertex, property, null, null);
in = asStream(PHONE_MISSING);
extractor.execute(in, workData);
List<Vertex> termMentions = toList(vertex.getVertices(Direction.OUT, LumifyProperties.TERM_MENTION_LABEL_HAS_TERM_MENTION, authorizations));
assertTrue("Phone number extracted when there were no phone numbers", termMentions.isEmpty());
}
private InputStream asStream(final String text) {
return new ByteArrayInputStream(text.getBytes(Charsets.UTF_8));
}
}