//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.misc;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.common.Nationality;
import uk.gov.dstl.baleen.types.common.Organisation;
import uk.gov.dstl.baleen.types.common.Quantity;
import uk.gov.dstl.baleen.types.semantic.Location;
import uk.gov.dstl.baleen.uima.BaleenAnnotator;
/**
* Converts the following cases into organisations:
* <ul>
* <li>People of **LOCATION**</li>
* <li>**NATIONALITY** people</li>
* <li>**QUANTITY** people</li>
* </ul>
*
* Optionally removes the original entity
*
* @baleen.javadoc
*/
public class People extends BaleenAnnotator {
/**
* Should the original entity be removed?
*
* @baleen.config false
*/
public static final String PARAM_REMOVE_ORIGINAL = "removeOriginal";
@ConfigurationParameter(name = PARAM_REMOVE_ORIGINAL, defaultValue="true")
boolean removeOriginal;
@Override
protected void doProcess(JCas jCas) throws AnalysisEngineProcessException {
peopleOfLocation(jCas);
nationalityPeople(jCas);
quantityPeople(jCas);
}
private void peopleOfLocation(JCas jCas){
List<Location> toRemove = new ArrayList<>();
for(Location loc : JCasUtil.select(jCas, Location.class)){
String precedingText = jCas.getDocumentText().substring(0, loc.getBegin()).toLowerCase();
if(precedingText.endsWith("people of ")){
Organisation o = new Organisation(jCas, loc.getBegin() - 10, loc.getEnd());
addToJCasIndex(o);
if(removeOriginal)
toRemove.add(loc);
}
}
removeFromJCasIndex(toRemove);
}
private void nationalityPeople(JCas jCas){
List<Nationality> toRemove = new ArrayList<>();
for(Nationality nat : JCasUtil.select(jCas, Nationality.class)){
String followingText = jCas.getDocumentText().substring(nat.getEnd()).toLowerCase();
if(followingText.startsWith(" people")){
Organisation o = new Organisation(jCas, nat.getBegin(), nat.getEnd() + 7);
addToJCasIndex(o);
if(removeOriginal)
toRemove.add(nat);
}
}
removeFromJCasIndex(toRemove);
}
private void quantityPeople(JCas jCas){
List<Quantity> toRemove = new ArrayList<>();
for(Quantity quant : JCasUtil.select(jCas, Quantity.class)){
String followingText = jCas.getDocumentText().substring(quant.getEnd()).toLowerCase();
if(followingText.startsWith(" people")){
Organisation o = new Organisation(jCas, quant.getBegin(), quant.getEnd() + 7);
addToJCasIndex(o);
if(removeOriginal)
toRemove.add(quant);
}
}
removeFromJCasIndex(toRemove);
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Location.class, Nationality.class, Quantity.class), ImmutableSet.of(Organisation.class));
}
}