package edu.isistan.uima.unified.analysisengines.srl;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ExternalResource;
import edu.isistan.uima.unified.analysisengines.AnnotationGenerator;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.SDDependency;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.nlp.Token;
import edu.isistan.uima.unified.typesystems.srl.Role;
import edu.isistan.uima.unified.utils.IteratorUtil;
public class SDSRLAnnotator extends JCasAnnotator_ImplBase {
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating SRL-like structures and roles (Custom)");
//
AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> dAnnotations = aJCas.getAnnotationIndex(SDDependency.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size());
//
for(Annotation sAnnotation : sAnnotations) {
FSIterator<Annotation> depIterator = dAnnotations.subiterator(sAnnotation);
//Create an iterator for all SUBJ / NSUBJ / AGENT
FSIterator<Annotation> subjectIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "SUBJ", "NSUBJ", "AGENT" });
//Create an iterator for all DOBJ
FSIterator<Annotation> dobjectIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "DOBJ" });
//Create an iterator for all NSUBJPASS
FSIterator<Annotation> nsubjIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "NSUBJPASS" });
//Create an iterator for all IOBJ
FSIterator<Annotation> iobjectIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "IOBJ" });
//Create an iterator for all PREP
FSIterator<Annotation> prepIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "PREP" });
//Create an iterator for all CONJ
FSIterator<Annotation> conjunctionIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "CONJ" });
//Create an iterator for all NN
FSIterator<Annotation> nounCompoundIterator = IteratorUtil.getStringFilteredIterator(aJCas, depIterator.copy(), SDDependency.type, "relation", new String[] { "NN" });
Role verbRole = null;
Role subjectRole = null;
Role directObjectRole = null;
Role indirectObjectRole = null;
List<Token> visitedTokens = new ArrayList<Token>();
while(subjectIterator.hasNext()) {
verbRole = null;
subjectRole = null;
directObjectRole = null;
indirectObjectRole = null;
//Recording data for annotation
List<String> verbDescriptions = new ArrayList<String>();
List<List<Token>> verbOcurrencies = new ArrayList<List<Token>>();
List<String> directObjectDescriptions = new ArrayList<String>();
List<List<Token>> directObjectOcurrencies = new ArrayList<List<Token>>();
List<String> subjectDescriptions = new ArrayList<String>();
List<List<Token>> subjectOcurrencies = new ArrayList<List<Token>>();
List<String> indirectObjectDescriptions = new ArrayList<String>();
List<List<Token>> indirectObjectOcurrencies = new ArrayList<List<Token>>();
Annotation subjectAnnotation = subjectIterator.next();
SDDependency subjectDependency = (SDDependency) subjectAnnotation;
//Getting verb
Token verb = subjectDependency.getGov();
if(!visitedTokens.contains(verb)) {
List<Token> verbs = new ArrayList<Token>();
List<Token> subjects = new ArrayList<Token>();
List<Token> directObjects = new ArrayList<Token>();
List<Token> indirectObjects = new ArrayList<Token>();
//Finding related verbs
related(aJCas, verb, conjunctionIterator, verbDescriptions, verbOcurrencies, verbs);
//Generating verb role annotation
verbRole = AnnotationGenerator.generateRole(calculateBegin(verbOcurrencies), calculateEnd(verbOcurrencies), "VERB", verbDescriptions, verbOcurrencies, aJCas);
//Getting subject
Token subject = subjectDependency.getDep();
//Finding related subjects
related(aJCas, subject, conjunctionIterator, subjectDescriptions, subjectOcurrencies, subjects);
//Expanding subjects
expand(aJCas, subject, nounCompoundIterator, subjectDescriptions, subjectOcurrencies, subjects);
//Generating subject role annotation
subjectRole = AnnotationGenerator.generateRole(calculateBegin(subjectOcurrencies), calculateEnd(subjectOcurrencies), "SUBJECT", subjectDescriptions, subjectOcurrencies, aJCas);
for(Token potentialVerb : verbs) {
boolean dObjectAsSubjPassive = false;
//Finding direct object
FSIterator<Annotation> directObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, dobjectIterator.copy(), SDDependency.type, "gov", potentialVerb);
//Search direct object as a passive subject
if(!directObjectIterator.hasNext()) {
directObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, nsubjIterator.copy(), SDDependency.type, "gov", potentialVerb);
if(directObjectIterator.hasNext())
dObjectAsSubjPassive = true;
}
//It should be at maximum one direct object dependency
if(directObjectIterator.hasNext()) {
Annotation doAnnotation = directObjectIterator.next();
SDDependency doDependency = (SDDependency) doAnnotation;
//Getting direct object
Token directObject = doDependency.getDep();
//Finding related direct objects
related(aJCas, directObject, conjunctionIterator, directObjectDescriptions, directObjectOcurrencies, directObjects);
//Expanding direct objects
expand(aJCas, directObject, nounCompoundIterator, directObjectDescriptions, directObjectOcurrencies, directObjects);
//Generating direct object role annotation
directObjectRole = AnnotationGenerator.generateRole(calculateBegin(directObjectOcurrencies), calculateEnd(directObjectOcurrencies), "DIRECTOBJECT", directObjectDescriptions, directObjectOcurrencies, aJCas);
}
//Finding indirect object
FSIterator<Annotation> indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, iobjectIterator.copy(), SDDependency.type, "gov", potentialVerb);
//Search indirect object as a preposition object
if(!indirectObjectIterator.hasNext())
indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, prepIterator.copy(), SDDependency.type, "gov", potentialVerb);
//Search indirect object as a passive subject
if(!dObjectAsSubjPassive && !indirectObjectIterator.hasNext())
indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, nsubjIterator.copy(), SDDependency.type, "gov", potentialVerb);
//It should be at maximum one indirect object dependency
if(indirectObjectIterator.hasNext()) {
Annotation ioAnnotation = indirectObjectIterator.next();
SDDependency ioDependency = (SDDependency) ioAnnotation;
//Getting indirect object
Token indirectObject = ioDependency.getDep();
//Finding related indirect objects
related(aJCas, indirectObject, conjunctionIterator, indirectObjectDescriptions, indirectObjectOcurrencies, indirectObjects);
//Expanding indirect objects
expand(aJCas, indirectObject, nounCompoundIterator, indirectObjectDescriptions, indirectObjectOcurrencies, indirectObjects);
//Generating indirect object role annotation
indirectObjectRole = AnnotationGenerator.generateRole(calculateBegin(indirectObjectOcurrencies), calculateEnd(indirectObjectOcurrencies), "INDIRECTOBJECT", indirectObjectDescriptions, indirectObjectOcurrencies, aJCas);
}
}
//Calculate begin and end positions
int begin = calculateBegin(subjectOcurrencies, verbOcurrencies, directObjectOcurrencies, indirectObjectOcurrencies);
int end = calculateEnd(subjectOcurrencies, verbOcurrencies, directObjectOcurrencies, indirectObjectOcurrencies);
AnnotationGenerator.generateStructure(begin, end, subjectRole, verbRole, directObjectRole, indirectObjectRole, aJCas);
//Adding to visited list
for(Token visitedVerb : verbs)
visitedTokens.add(visitedVerb);
}
}
while(dobjectIterator.hasNext()) {
verbRole = null;
subjectRole = null;
directObjectRole = null;
indirectObjectRole = null;
Annotation objectAnnotation = dobjectIterator.next();
SDDependency objectDependency = (SDDependency) objectAnnotation;
//Getting verb
Token verb = objectDependency.getGov();
if(!visitedTokens.contains(verb)) {
List<Token> verbs = new ArrayList<Token>();
List<Token> subjects = new ArrayList<Token>();
List<Token> directObjects = new ArrayList<Token>();
List<Token> indirectObjects = new ArrayList<Token>();
//Recording data for annotation
List<String> verbDescriptions = new ArrayList<String>();
List<List<Token>> verbOcurrencies = new ArrayList<List<Token>>();
List<String> directObjectDescriptions = new ArrayList<String>();
List<List<Token>> directObjectOcurrencies = new ArrayList<List<Token>>();
List<String> subjectDescriptions = new ArrayList<String>();
List<List<Token>> subjectOcurrencies = new ArrayList<List<Token>>();
List<String> indirectObjectDescriptions = new ArrayList<String>();
List<List<Token>> indirectObjectOcurrencies = new ArrayList<List<Token>>();
//Finding related verbs
related(aJCas, verb, conjunctionIterator, verbDescriptions, verbOcurrencies, verbs);
//Generating verb role annotation
verbRole = AnnotationGenerator.generateRole(calculateBegin(verbOcurrencies), calculateEnd(verbOcurrencies), "VERB", verbDescriptions, verbOcurrencies, aJCas);
//Getting direct object
Token directObject = objectDependency.getDep();
//Finding related direct objects
related(aJCas, directObject, conjunctionIterator, directObjectDescriptions, directObjectOcurrencies, directObjects);
//Expanding direct objects
expand(aJCas, directObject, nounCompoundIterator, directObjectDescriptions, directObjectOcurrencies, directObjects);
//Generating direct object role annotation
directObjectRole = AnnotationGenerator.generateRole(calculateBegin(directObjectOcurrencies), calculateEnd(directObjectOcurrencies), "DIRECTOBJECT", directObjectDescriptions, directObjectOcurrencies, aJCas);
for(Token potentialVerb : verbs) {
//Finding subjects (there shouldn't be any)
FSIterator<Annotation> subjIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, subjectIterator.copy(), SDDependency.type, "gov", potentialVerb);
//It should be at maximum one subject dependency
if(subjIterator.hasNext()) {
Annotation doAnnotation = subjIterator.next();
SDDependency doDependency = (SDDependency) doAnnotation;
//Getting subject
Token subject = doDependency.getDep();
//Finding related subjects
related(aJCas, subject, conjunctionIterator, subjectDescriptions, subjectOcurrencies, subjects);
//Expanding subjects
expand(aJCas, subject, nounCompoundIterator, subjectDescriptions, subjectOcurrencies, subjects);
//Generating subject role annotation
subjectRole = AnnotationGenerator.generateRole(calculateBegin(subjectOcurrencies), calculateEnd(subjectOcurrencies), "SUBJECT", subjectDescriptions, subjectOcurrencies, aJCas);
}
//Finding indirect object
FSIterator<Annotation> indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, iobjectIterator.copy(), SDDependency.type, "gov", potentialVerb);
//Search indirect object as a preposition object
if(!indirectObjectIterator.hasNext())
indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, prepIterator.copy(), SDDependency.type, "gov", potentialVerb);
//Search indirect object as a passive subject object
if(!indirectObjectIterator.hasNext())
indirectObjectIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, nsubjIterator.copy(), SDDependency.type, "gov", potentialVerb);
//It should be at maximum one indirect object dependency
if(indirectObjectIterator.hasNext()) {
Annotation ioAnnotation = indirectObjectIterator.next();
SDDependency ioDependency = (SDDependency) ioAnnotation;
//Getting indirect object
Token indirectObject = ioDependency.getDep();
//Finding related indirect objects
related(aJCas, indirectObject, conjunctionIterator, indirectObjectDescriptions, indirectObjectOcurrencies, indirectObjects);
//Expanding indirect objects
expand(aJCas, indirectObject, nounCompoundIterator, indirectObjectDescriptions, indirectObjectOcurrencies, indirectObjects);
//Generating indirect object role annotation
indirectObjectRole = AnnotationGenerator.generateRole(calculateBegin(indirectObjectOcurrencies), calculateEnd(indirectObjectOcurrencies), "INDIRECTOBJECT", indirectObjectDescriptions, indirectObjectOcurrencies, aJCas);
}
}
//Calculate begin and end positions
int begin = calculateBegin(subjectOcurrencies, verbOcurrencies, directObjectOcurrencies, indirectObjectOcurrencies);
int end = calculateEnd(subjectOcurrencies, verbOcurrencies, directObjectOcurrencies, indirectObjectOcurrencies);
AnnotationGenerator.generateStructure(begin, end, subjectRole, verbRole, directObjectRole, indirectObjectRole, aJCas);
//Adding to visited list
for(Token visitedVerb : verbs)
visitedTokens.add(visitedVerb);
}
}
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
super.destroy();
}
private void related(JCas aJCas, Token token, FSIterator<Annotation> conjunctionIterator, List<String> descriptions, List<List<Token>> ocurrencies, List<Token> tokens) {
descriptions.add(token.getCoveredText());
List<Token> rootOcurrencies = new ArrayList<Token>();
rootOcurrencies.add(token);
ocurrencies.add(rootOcurrencies);
tokens.add(token);
FSIterator<Annotation> relatedIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, conjunctionIterator.copy(), SDDependency.type, "gov", token);
while(relatedIterator.hasNext()) {
Annotation relatedAnnotation = relatedIterator.next();
SDDependency relatedDependency = (SDDependency) relatedAnnotation;
Token related = relatedDependency.getDep();
List<Token> relatedOcurrencies = new ArrayList<Token>();
descriptions.add(related.getCoveredText());
relatedOcurrencies.add(related);
ocurrencies.add(relatedOcurrencies);
tokens.add(related);
}
}
private void expand(JCas aJCas, Token token, FSIterator<Annotation> nounCompoundIterator, List<String> descriptions, List<List<Token>> ocurrencies, List<Token> tokens) {
for (int i = 0; i < ocurrencies.size(); i++) {
List<Token> tokenOcurrencies = ocurrencies.get(i);
//To expand, it should be of size one
if(tokenOcurrencies.size() == 1) {
Token tokenToExpand = tokenOcurrencies.get(0);
FSIterator<Annotation> compoundIterator = IteratorUtil.getFeatureFilteredIterator(aJCas, nounCompoundIterator.copy(), SDDependency.type, "gov", tokenToExpand);
while(compoundIterator.hasNext()) {
Annotation compoundAnnotation = compoundIterator.next();
SDDependency compoundDependency = (SDDependency) compoundAnnotation;
Token expand = compoundDependency.getDep();
String description = descriptions.get(i);
description = expand.getCoveredText() + " " + description;
//descriptions.remove(i);
descriptions.set(i, description);
tokenOcurrencies.add(tokenOcurrencies.size(), expand);
//tokens.add(expand);
}
}
}
}
private int calculateBegin(
List<List<Token>> subjectOcurrencies,
List<List<Token>> verbOcurrencies,
List<List<Token>> directObjectOcurrencies,
List<List<Token>> indirectObjectOcurrencies) {
int begin = -1;
begin = calculateBegin(begin, subjectOcurrencies);
begin = calculateBegin(begin, verbOcurrencies);
begin = calculateBegin(begin, directObjectOcurrencies);
begin = calculateBegin(begin, indirectObjectOcurrencies);
return begin;
}
private int calculateBegin(List<List<Token>> ocurrencies) {
return calculateBegin(-1, ocurrencies);
}
private int calculateBegin(int begin, List<List<Token>> ocurrencies) {
if(ocurrencies != null)
for(List<Token> ocurrency : ocurrencies)
for(Token token : ocurrency)
if(begin == -1 || begin > token.getBegin())
begin = token.getBegin();
return begin;
}
private int calculateEnd(
List<List<Token>> subjectOcurrencies,
List<List<Token>> verbOcurrencies,
List<List<Token>> directObjectOcurrencies,
List<List<Token>> indirectObjectOcurrencies) {
int end = -1;
end = calculateEnd(end, subjectOcurrencies);
end = calculateEnd(end, verbOcurrencies);
end = calculateEnd(end, directObjectOcurrencies);
end = calculateEnd(end, indirectObjectOcurrencies);
return end;
}
private int calculateEnd(List<List<Token>> ocurrencies) {
return calculateEnd(-1, ocurrencies);
}
private int calculateEnd(int end, List<List<Token>> ocurrencies) {
if(ocurrencies != null)
for(List<Token> ocurrency : ocurrencies)
for(Token token : ocurrency)
if(end == -1 || end < token.getEnd())
end = token.getEnd();
return end;
}
}