package de.berlin.hu.uima.ae.filter;
import de.berlin.hu.util.Constants;
import opennlp.uima.Token;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.jcas.JCas;
import org.u_compare.shared.semantic.NamedEntity;
import java.util.*;
/**
* Removes annotation of chemical if its POS tag is FW or LS.
*/
public class PosFilter extends JCasAnnotator_ImplBase {
private String[] posTags;
private final String[] invalidPOSTags = {
"FW",
"LS"
};
private int numberOfFilteredEntities = 0;
private List<NamedEntity> invalidChemicals = null;
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
FSIndex chemicalIndex = aJCas.getAnnotationIndex(NamedEntity.type);
FSIndex tokenIndex = aJCas.getAnnotationIndex(Token.type);
Iterator chemicalIterator = chemicalIndex.iterator();
Iterator tokenIterator = tokenIndex.iterator();
posTags = new String[aJCas.getDocumentText().length()];
while (tokenIterator.hasNext()) {
Token token = (Token) tokenIterator.next();
int begin = token.getBegin();
int end = token.getEnd();
String posTag = token.getPos();
for (int i = begin; i < end; i++) {
posTags[i] = posTag;
}
}
invalidChemicals = new ArrayList<NamedEntity>();
while (chemicalIterator.hasNext()) {
NamedEntity chemical = (NamedEntity) chemicalIterator.next();
int begin = chemical.getBegin();
int end = chemical.getEnd();
if (!Constants.GOLDSTANDARD.equals(chemical.getSource())) {
if (isInvalid(begin, end)) {
invalidChemicals.add(chemical);
numberOfFilteredEntities++;
}
}
}
for (NamedEntity invalidChemical : invalidChemicals) {
invalidChemical.removeFromIndexes();
}
System.out.println("PosFilter: " + numberOfFilteredEntities);
}
public String getTag(int begin, int end) {
Map<String, Integer> listOfTags = new HashMap<String, Integer>();
for (int i = begin; i < end; i++) {
String posTag = posTags[i];
if (listOfTags.containsKey(posTag)) {
listOfTags.put(posTag, listOfTags.get(posTag) + 1);
} else {
listOfTags.put(posTag, 1);
}
}
List<Map.Entry> list = new ArrayList<Map.Entry>(listOfTags.entrySet());
Collections.sort(list, new Comparator<Map.Entry>() {
public int compare(Map.Entry e1, Map.Entry e2) {
Integer i1 = (Integer) e1.getValue();
Integer i2 = (Integer) e2.getValue();
return i2.compareTo(i1);
}
});
String tag = (String) list.get(0).getKey();
return tag;
}
public boolean isInvalid(int begin, int end) {
String posTag = getTag(begin, end);
for (int i = 0; i < invalidPOSTags.length; i++) {
if (invalidPOSTags[i].equals(posTag)) {
return true;
}
}
return false;
}
}