package de.berlin.hu.uima.ae.filter;
import de.berlin.hu.util.Constants;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.jcas.JCas;
import org.u_compare.shared.semantic.NamedEntity;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class SuffixFilter extends JCasAnnotator_ImplBase {
private int numberOfFilteredEntities = 0;
private List<NamedEntity> invalidChemicals = null;
private String[] invalidSuffixes = {
"ase"
};
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
FSIndex chemicalIndex = aJCas.getAnnotationIndex(NamedEntity.type);
Iterator chemicalIterator = chemicalIndex.iterator();
invalidChemicals = new ArrayList<NamedEntity>();
while (chemicalIterator.hasNext()) {
NamedEntity chemical = (NamedEntity) chemicalIterator.next();
if (!Constants.GOLDSTANDARD.equals(chemical.getSource())) {
if (isInvalid(chemical.getCoveredText())) {
invalidChemicals.add(chemical);
numberOfFilteredEntities++;
}
}
}
for (NamedEntity invalidChemical : invalidChemicals) {
invalidChemical.removeFromIndexes();
}
}
private boolean isInvalid(String entity) {
if (entity.length() > 2) {
String suffix = (String) entity.subSequence(entity.length() - 3, entity.length());
if (isInvalidSuffix(suffix)) {
return true;
}
}
return false;
}
private boolean isInvalidSuffix(String suffix) {
for (int i = 0; i < invalidSuffixes.length; i++) {
if (invalidSuffixes[i].equals(suffix)) {
return true;
}
}
return false;
}
}