//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.templates;
import static java.util.Collections.singleton;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Test;
import uk.gov.dstl.baleen.types.templates.TemplateRecord;
import uk.gov.dstl.baleen.types.templates.TemplateField;
public class TemplateAnnotatorTest extends AbstractRecordAnnotatorTest {
public TemplateAnnotatorTest() {
super(TemplateAnnotator.class);
}
@Test
public void testCreateFieldAnnotationsFromSelectorFile()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinition();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(53, field1.getBegin());
assertEquals(105, field1.getEnd());
assertEquals(PARA2, field1.getCoveredText());
assertEquals(PARA2, field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test(expected = ResourceInitializationException.class)
public void testCanNotInitializeWithIncorrectStructureType()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString(),
TemplateAnnotator.PARAM_TYPE_NAMES, new String[] { "NotAStructure" });
}
@Test
public void testCreateRecordWhenNoFollowingPath()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createNoFollowingRecordDefinition();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString(),
TemplateAnnotator.PARAM_TYPE_NAMES, new String[] { "Paragraph" });
TemplateRecord record = JCasUtil.selectSingle(jCas, TemplateRecord.class);
assertEquals(212, record.getBegin());
assertEquals(265, record.getEnd());
assertEquals("\n" + PARA5, record.getCoveredText());
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(213, field1.getBegin());
assertEquals(265, field1.getEnd());
assertEquals(PARA5, field1.getCoveredText());
assertEquals(PARA5, field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateDefaultFieldAnnotationsWithDefault()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithDefaultAndMissing();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(212, field1.getBegin());
assertEquals(212, field1.getEnd());
assertEquals("", field1.getCoveredText());
assertEquals("default value", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateDefaultFieldAnnotationsMising()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionRequiredMissing();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertEquals(0, JCasUtil.select(jCas, TemplateField.class).size());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateDefaultFieldAnnotationsMisingWithDefaultValue()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithDefaultAndMissing();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(212, field1.getBegin());
assertEquals(212, field1.getEnd());
assertEquals("", field1.getCoveredText());
assertEquals("default value", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegex()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithRegex();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(69, field1.getBegin());
assertEquals(72, field1.getEnd());
assertEquals("cat", field1.getCoveredText());
assertEquals("cat", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegexRequired()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithRegexRequired();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(122, field1.getBegin());
assertEquals(125, field1.getEnd());
assertEquals("rat", field1.getCoveredText());
assertEquals("rat", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegexDefaultNotNeeded()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithRegexDefaultNotNeeded();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(179, field1.getBegin());
assertEquals(185, field1.getEnd());
assertEquals("jumped", field1.getCoveredText());
assertEquals("jumped", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegexDefaultUsed()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithRegexDefaultNeeded();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(159, field1.getBegin());
assertEquals(159, field1.getEnd());
assertEquals("", field1.getCoveredText());
assertEquals("horse", field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testCreateFieldAnnotationsFromSelectorFileWithRegexMissingRequired()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createGoodRecordDefinitionWithRegexRequiredAndMissing();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertFalse(JCasUtil.exists(jCas, TemplateField.class));
assertTrue(JCasUtil.exists(jCas, TemplateRecord.class));
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testMultipleElementsSelectedForField()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createBadRecordDefinition();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertRecordCoversParas2to4();
assertFalse(JCasUtil.exists(jCas, TemplateField.class));
} finally {
Files.delete(definitionFile);
}
}
protected TemplateRecord assertRecordCoversParas2to4() {
TemplateRecord record = JCasUtil.selectSingle(jCas, TemplateRecord.class);
assertEquals(52, record.getBegin());
assertEquals(212, record.getEnd());
assertEquals(String.join("\n", "", PARA2, PARA3, PARA4, ""), record.getCoveredText());
return record;
}
@Test
public void testDefaultRecord()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createDefaultRecordDefinition();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
assertEquals(0, JCasUtil.select(jCas, TemplateRecord.class).size());
TemplateField field1 = JCasUtil.selectSingle(jCas, TemplateField.class);
assertEquals(53, field1.getBegin());
assertEquals(105, field1.getEnd());
assertEquals(PARA2, field1.getCoveredText());
assertEquals(PARA2, field1.getValue());
} finally {
Files.delete(definitionFile);
}
}
@Test
public void testNoFieldsInRecord()
throws AnalysisEngineProcessException, ResourceInitializationException, IOException {
Path definitionFile = createNoFieldsRecordDefinition();
try {
processJCas(TemplateAnnotator.PARAM_RECORD_DEFINITIONS_DIRECTORY, tempDirectory.toString());
TemplateRecord record = JCasUtil.selectSingle(jCas, TemplateRecord.class);
assertEquals(158, record.getBegin());
assertEquals(212, record.getEnd());
assertEquals(String.join("\n", "", PARA4, ""), record.getCoveredText());
Collection<TemplateField> fields = JCasUtil.select(jCas, TemplateField.class);
assertEquals(0, fields.size());
assertFalse(JCasUtil.contains(jCas, record, TemplateField.class));
} finally {
Files.delete(definitionFile);
}
}
private Path createGoodRecordDefinition() throws IOException {
return createRecord("test", new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)"));
}
private Path createGoodRecordDefinitionRequiredMissing() throws IOException {
List<TemplateFieldConfiguration> fields = new ArrayList<>();
TemplateFieldConfiguration field = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(5)");
field.setRequired(true);
fields.add(field);
Path definitionFile = Files.createTempFile(tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(),
".yml");
TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0);
YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition));
return definitionFile;
}
private Path createGoodRecordDefinitionWithDefaultAndMissing() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(5)");
fieldDefinitionConfiguration.setDefaultValue("default value");
List<TemplateFieldConfiguration> fields = new ArrayList<>();
fields.add(fieldDefinitionConfiguration);
Path definitionFile = Files.createTempFile(tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(),
".yml");
TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0);
YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition));
return definitionFile;
}
private Path createGoodRecordDefinitionWithRegex() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(2)");
fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)");
return createRecord("test", fieldDefinitionConfiguration);
}
private Path createGoodRecordDefinitionWithRegexRequired() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(3)");
fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)");
fieldDefinitionConfiguration.setRequired(true);
return createRecord("test", fieldDefinitionConfiguration);
}
private Path createGoodRecordDefinitionWithRegexRequiredAndMissing() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(3)");
fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)");
fieldDefinitionConfiguration.setRequired(true);
return createRecord("test", fieldDefinitionConfiguration);
}
private Path createGoodRecordDefinitionWithRegexDefaultNotNeeded() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(4)");
fieldDefinitionConfiguration.setRegex("(?<=ant )(.*)(?= over)");
fieldDefinitionConfiguration.setDefaultValue("crawled");
return createRecord("test", fieldDefinitionConfiguration);
}
private Path createGoodRecordDefinitionWithRegexDefaultNeeded() throws IOException {
TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field",
"Paragraph:nth-of-type(4)");
fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)");
fieldDefinitionConfiguration.setDefaultValue("horse");
return createRecord("test", fieldDefinitionConfiguration);
}
private Path createBadRecordDefinition() throws IOException {
return createRecord("test", new TemplateFieldConfiguration("field", "Table"));
}
private Path createDefaultRecordDefinition() throws IOException {
List<TemplateFieldConfiguration> fields = new ArrayList<>();
fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)"));
Path definitionFile = Files.createTempFile(tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(),
".yml");
TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0);
YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition));
return definitionFile;
}
private Path createNoFieldsRecordDefinition() throws IOException {
List<TemplateFieldConfiguration> fields = new ArrayList<>();
Path definitionFile = Files.createTempFile(tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(),
".yml");
String precedingPath = "Paragraph:nth-of-type(3)";
String followingPath = "Paragraph:nth-of-type(5)";
TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration("test", precedingPath,
followingPath, fields, 0);
YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition));
return definitionFile;
}
private Path createNoFollowingRecordDefinition() throws IOException {
List<TemplateFieldConfiguration> fields = new ArrayList<>();
fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(6)"));
Path definitionFile = Files.createTempFile(tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(),
".yml");
String precedingPath = "Paragraph:nth-of-type(5)";
String followingPath = "";
TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration("test", precedingPath,
followingPath, fields, 0);
YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition));
return definitionFile;
}
}