//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.templates;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.testing.AbstractAnnotatorTest;
import uk.gov.dstl.baleen.types.templates.TemplateFieldDefinition;
public class TemplateFieldDefinitionAnnotatorTest extends AbstractAnnotatorTest {
private static final String FIELD_TEXT = "Full Name <<field:PersonFullName>> \n";
private static final String FIELD2_TEXT = FIELD_TEXT + " Description: \n" + " <<field:Description>> More text\n";
private static final String FIELD_REGEX_TEXT = "Email address: \n"
+ " <<field:email regex=\"\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b\">> More text\n";
private static final String FIELD_HTML_REGEX = "HTML: <<field:html regex=\"/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/\">> More text >>\n";
private static final String FIELD_NEIGHBOURS = "<<field:one>><<field:two>>";
private static final String FIELD_ILLEGAL_REGEX = "Error: <<field:error regex=\"(\">>";
private static final String FIELD_DEFAULT_TEXT = "<<field:ten defaultValue=\"10\">>";
private static final String FIELD_REQUIRED_TEXT = "<<field:required required=\"true\">>";
private static final String FIELD_REPEAT_TEXT = "<<field:required repeat>>";
private static final String FIELD_REGEX_DEFAULT_REQUIRED_TEXT = "<<field:all regex=\"\\d?:\\s\\d?\" defaultValue=\"not found\" required=\"true\" repeat=\"true\">>";
private static final String FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT = "<<field:all regex=\"\\d?:\\s\\d?\" defaultValue=\"not found\" required repeat>>";
public TemplateFieldDefinitionAnnotatorTest() {
super(TemplateFieldDefinitionAnnotator.class);
}
@Test
public void annotateField() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(10, field.getBegin());
assertEquals(34, field.getEnd());
assertEquals("PersonFullName", field.getName());
assertEquals("<<field:PersonFullName>>", field.getCoveredText());
assertNull(field.getDefaultValue());
assertFalse(field.getRequired());
}
@Test
public void annotate2Fields() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD2_TEXT);
processJCas();
TemplateFieldDefinition field1 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(10, field1.getBegin());
assertEquals(34, field1.getEnd());
assertEquals("PersonFullName", field1.getName());
assertEquals("<<field:PersonFullName>>", field1.getCoveredText());
assertNull(field1.getDefaultValue());
assertFalse(field1.getRequired());
TemplateFieldDefinition field2 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 1);
assertEquals(53, field2.getBegin());
assertEquals(74, field2.getEnd());
assertEquals("Description", field2.getName());
assertEquals("<<field:Description>>", field2.getCoveredText());
assertNull(field2.getDefaultValue());
assertFalse(field2.getRequired());
}
@Test
public void annotateFieldNeighbours() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_NEIGHBOURS);
processJCas();
TemplateFieldDefinition field1 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field1.getBegin());
assertEquals(13, field1.getEnd());
assertEquals("one", field1.getName());
assertEquals("<<field:one>>", field1.getCoveredText());
assertNull(field1.getDefaultValue());
assertFalse(field1.getRequired());
TemplateFieldDefinition field2 = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 1);
assertEquals(13, field2.getBegin());
assertEquals(26, field2.getEnd());
assertEquals("two", field2.getName());
assertEquals("<<field:two>>", field2.getCoveredText());
assertNull(field2.getDefaultValue());
assertFalse(field2.getRequired());
}
@Test
public void annotateFieldWithRegex() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_REGEX_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(17, field.getBegin());
assertEquals(82, field.getEnd());
assertEquals("<<field:email regex=\"\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b\">>", field.getCoveredText());
assertEquals("email", field.getName());
assertEquals("\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b", field.getRegex());
assertNull(field.getDefaultValue());
}
@Test
public void annotateFieldWithHtmlRegex() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_HTML_REGEX);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(6, field.getBegin());
assertEquals(90, field.getEnd());
assertEquals("<<field:html regex=\"/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/\">>",
field.getCoveredText());
assertEquals("html", field.getName());
assertEquals("/^<([a-z]+)([^<]+)*(?:>(.*)<\\/\\1>|\\s+\\/>)$/", field.getRegex());
assertNull(field.getDefaultValue());
assertFalse(field.getRepeat());
}
@Test(expected = AnalysisEngineProcessException.class)
public void annotateFieldWithIllegalRegex() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_ILLEGAL_REGEX);
processJCas();
}
@Test
public void annotateFieldWithDefaultValue() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_DEFAULT_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field.getBegin());
assertEquals(FIELD_DEFAULT_TEXT.length(), field.getEnd());
assertEquals("ten", field.getName());
assertEquals(FIELD_DEFAULT_TEXT, field.getCoveredText());
assertEquals("10", field.getDefaultValue());
assertFalse(field.getRequired());
assertFalse(field.getRepeat());
}
@Test
public void annotateFieldRequired() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_REQUIRED_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field.getBegin());
assertEquals(FIELD_REQUIRED_TEXT.length(), field.getEnd());
assertEquals("required", field.getName());
assertEquals(FIELD_REQUIRED_TEXT, field.getCoveredText());
assertTrue(field.getRequired());
assertFalse(field.getRepeat());
assertNull(field.getDefaultValue());
}
@Test
public void annotateFieldRepeat() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_REPEAT_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field.getBegin());
assertEquals(FIELD_REPEAT_TEXT.length(), field.getEnd());
assertEquals("required", field.getName());
assertEquals(FIELD_REPEAT_TEXT, field.getCoveredText());
assertFalse(field.getRequired());
assertTrue(field.getRepeat());
assertNull(field.getDefaultValue());
}
@Test
public void annotateFieldAllAttributes() throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_REGEX_DEFAULT_REQUIRED_TEXT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field.getBegin());
assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT.length(), field.getEnd());
assertEquals("all", field.getName());
assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT, field.getCoveredText());
assertTrue(field.getRequired());
assertTrue(field.getRepeat());
assertEquals("\\d?:\\s\\d?", field.getRegex());
assertEquals("not found", field.getDefaultValue());
}
@Test
public void annotateFieldAllAttributesLenient()
throws AnalysisEngineProcessException, ResourceInitializationException {
jCas.setDocumentText(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT);
processJCas();
TemplateFieldDefinition field = JCasUtil.selectByIndex(jCas, TemplateFieldDefinition.class, 0);
assertEquals(0, field.getBegin());
assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT.length(), field.getEnd());
assertEquals("all", field.getName());
assertEquals(FIELD_REGEX_DEFAULT_REQUIRED_TEXT_LENIENT, field.getCoveredText());
assertTrue(field.getRequired());
assertTrue(field.getRepeat());
assertEquals("\\d?:\\s\\d?", field.getRegex());
assertEquals("not found", field.getDefaultValue());
}
}