//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import static org.junit.Assert.assertEquals;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.junit.Test;
import uk.gov.dstl.baleen.annotators.cleaners.CleanPunctuation;
import uk.gov.dstl.baleen.annotators.testing.Annotations;
import uk.gov.dstl.baleen.annotators.testing.AnnotatorTestBase;
import uk.gov.dstl.baleen.types.common.Person;
import uk.gov.dstl.baleen.types.semantic.Entity;
import uk.gov.dstl.baleen.types.semantic.Location;
/**
*
*
*/
public class CleanPunctuationTest extends AnnotatorTestBase{
private static final String TEXT = "(Yay), we've pulled (out (the right)) number (of brackets!)";
private static final String TEXT_BRACKETS = "((Oh dear), we've (pulled out) too many opening brackets";
@Test
public void testStripPunctuation() throws Exception{
AnalysisEngine cleaner = AnalysisEngineFactory.createEngine(CleanPunctuation.class);
jCas.setDocumentText("Alice met Bill. \"Who goes there?\", he said.");
//Test a space after
Annotations.createPerson(jCas, 0, 6, "Alice ");
//Test a preceding space and a trailing punctuation, with a different value
Annotations.createPerson(jCas, 9, 15, "Bill.");
//Test a different type, with multiple trailing punctuation
Annotations.createLocation(jCas, 26, 34, "there?\",", null);
//Test an entity made entirely of punctuation
Annotations.createEntity(jCas, 31, 34, null);
cleaner.process(jCas);
assertEquals(2, JCasUtil.select(jCas, Person.class).size());
assertEquals(1, JCasUtil.select(jCas, Location.class).size());
assertEquals(3, JCasUtil.select(jCas, Entity.class).size());
Person p = JCasUtil.selectByIndex(jCas, Person.class, 0);
assertEquals("Alice", p.getCoveredText());
assertEquals("Alice", p.getValue());
p = JCasUtil.selectByIndex(jCas, Person.class, 1);
assertEquals("Bill", p.getCoveredText());
assertEquals("Bill", p.getValue());
Location l = JCasUtil.selectByIndex(jCas, Location.class, 0);
assertEquals("there", l.getCoveredText());
assertEquals("there", l.getValue());
cleaner.destroy();
}
@Test
public void testBracketsOpen() throws Exception{
AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class);
jCas.setDocumentText(TEXT_BRACKETS);
Annotations.createEntity(jCas, 0, jCas.getDocumentText().length(), TEXT_BRACKETS);
cleanBracketAE.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Entity.class).size());
Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0);
assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getCoveredText());
assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getValue());
}
@Test
public void testBracketsClose() throws Exception{
AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class);
String t = "(Oh dear), we've pulled out too many (closing brackets!))";
jCas.setDocumentText(t);
Annotations.createEntity(jCas, 0,t.length(), TEXT_BRACKETS);
cleanBracketAE.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Entity.class).size());
Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0);
assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getCoveredText());
assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getValue());
}
@Test
public void testBracketsNone() throws Exception{
AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class);
jCas.setDocumentText(TEXT);
Annotations.createEntity(jCas, 0,TEXT.length(), TEXT);
cleanBracketAE.process(jCas);
assertEquals(1, JCasUtil.select(jCas, Entity.class).size());
Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0);
assertEquals(TEXT, ent.getCoveredText());
assertEquals(TEXT, ent.getValue());
}
}