package edu.isistan.uima.unified.analysisengines.srl;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.ZipFile;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.ExternalResource;
import se.lth.cs.srl.corpus.Word;
import se.lth.cs.srl.corpus.Yield;
import se.lth.cs.srl.languages.Language;
import se.lth.cs.srl.languages.Language.L;
import se.lth.cs.srl.pipeline.Pipeline;
import edu.isistan.uima.unified.analysisengines.AnnotationGenerator;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.CoNLLDependency;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.nlp.Token;
import edu.isistan.uima.unified.typesystems.srl.Argument;
public class CoNLLSRLAnnotator extends JCasAnnotator_ImplBase {
@ConfigurationParameter(name="model")
private String modelName;
@ConfigurationParameter(name="propbank")
private String propBankName;
@ConfigurationParameter(name="nombank")
private String nomBankName;
//
private ZipFile zipFile;
private Pipeline srl;
//
private CoNLLSRLAnnotatorHelper helper;
//
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
//
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
try {
//modelName = (String) aContext.getConfigParameterValue("model");
Language.setLanguage(L.eng);
zipFile = new ZipFile(new File(modelName));
srl = Pipeline.fromZipFile(zipFile);
//srl=Pipeline.fromZipFile(zipFile, new Step[] {Step.pd, Step.ai, Step.ac} );
//propBankName = (String) aContext.getConfigParameterValue("propbank");
//nomBankName = (String) aContext.getConfigParameterValue("nombank");
helper = new CoNLLSRLAnnotatorHelper(propBankName, nomBankName);
}
catch (Exception e) {
e.printStackTrace();
}
finally {
try {
zipFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if(srl == null)
return;
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating SRL predicates and arguments (Matetools)");
//
//String docText = aJCas.getDocumentText();
AnnotationIndex<Annotation> sAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> tAnnotations = aJCas.getAnnotationIndex(Token.type);
AnnotationIndex<Annotation> dAnnotations = aJCas.getAnnotationIndex(CoNLLDependency.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sAnnotations.size());
//
for(Annotation sAnnotation : sAnnotations) {
//Sentence sentenceAnnotation = (Sentence) sAnnotation;
//String sentence = sAnnotation.getCoveredText();
Iterator<Annotation> tokenIterator = tAnnotations.subiterator(sAnnotation);
List<Token> tokenList = new LinkedList<Token>();
while(tokenIterator.hasNext()) {
Annotation tAnnotation = tokenIterator.next();
tokenList.add((Token)tAnnotation);
}
Iterator<Annotation> dependencyIterator = dAnnotations.subiterator(sAnnotation);
List<CoNLLDependency> dependencyList = new LinkedList<CoNLLDependency>();
while(dependencyIterator.hasNext()) {
Annotation dAnnotation = dependencyIterator.next();
dependencyList.add((CoNLLDependency)dAnnotation);
}
Token[] tokenAnnotations = new Token[tokenList.size()];
for(int i = 0; i < tokenList.size(); i++)
tokenAnnotations[i] = tokenList.get(i);
String[] tokensArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
tokensArray[i] = tokenAnnotations[i].getCoveredText();
String[] lemmasArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
lemmasArray[i] = tokenAnnotations[i].getLemma();
String[] posArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
posArray[i] = tokenAnnotations[i].getPos();
String[] morphArray = new String[tokenAnnotations.length];
for(int i = 0; i < tokenAnnotations.length; i++)
morphArray[i] = tokenAnnotations[i].getMorph();
int[] headArray = new int[tokenAnnotations.length];
String[] labelArray = new String[tokenAnnotations.length];
for(int i = 0; i < dependencyList.size(); i++) {
CoNLLDependency dependency = dependencyList.get(i);
Token source = dependency.getSource();
Token target = dependency.getTarget();
int sourceIndex;
int targetIndex= tokenList.indexOf(target);
int index = targetIndex;
int head;
String label = dependency.getRelation();
if(source != null) {
sourceIndex = tokenList.indexOf(source);
head = sourceIndex + 1;
}
else {
sourceIndex = -1;
head = 0;
}
headArray[index] = head;
labelArray[index] = label;
}
ArrayList<String> forms = new ArrayList<String>();
forms.add("<ROOT>");
for(int i = 0; i < tokensArray.length; i++)
forms.add(tokensArray[i]);
ArrayList<String> lemmas = new ArrayList<String>();
lemmas.add("<ROOT>");
for(int i = 0; i < lemmasArray.length; i++)
lemmas.add(lemmasArray[i]);
ArrayList<String> poss = new ArrayList<String>();
poss.add("<ROOT-POS>");
for(int i = 0; i < posArray.length; i++)
poss.add(posArray[i]);
ArrayList<String> morphs = new ArrayList<String>();
morphs.add("<ROOT-PFEAT>");
for(int i = 0; i < morphArray.length; i++)
morphs.add(morphArray[i]);
ArrayList<Integer> heads = new ArrayList<Integer>();
heads.add(-1);
for(int i = 0; i < headArray.length; i++)
heads.add(headArray[i]);
ArrayList<String> labels = new ArrayList<String>();
labels.add("NONE");
for(int i = 0; i < labelArray.length; i++)
labels.add(labelArray[i]);
String[] formsComplete = forms.toArray(new String[0]);
String[] lemmasComplete = lemmas.toArray(new String[0]);
String[] possComplete = poss.toArray(new String[0]);
String[] morphsComplete;
if(morphArray[0] != null && !morphArray[0].isEmpty())
morphsComplete = morphs.toArray(new String[0]);
else
morphsComplete = possComplete.clone();
int[] headsComplete = new int[heads.size()];
for(int i = 0; i < heads.size(); i++)
headsComplete[i] = heads.get(i).intValue();
String[] labelsComplete = labels.toArray(new String[0]);
se.lth.cs.srl.corpus.Sentence sentence;
sentence = new se.lth.cs.srl.corpus.Sentence(formsComplete, lemmasComplete, possComplete, morphsComplete, headsComplete, labelsComplete);
srl.parseSentence(sentence);
for(se.lth.cs.srl.corpus.Predicate predicate : sentence.getPredicates()) {
String label = predicate.getSense();
int rootIndex = sentence.indexOf(predicate) - 1;
Token root = tokenList.get(rootIndex);
String kind = predicate.getPOS().startsWith("V") ? "PROPBANK" : "NOMBANK";
String description = kind.equals("PROPBANK") ? helper.getPropbankPredicateDescription(label) : helper.getNombankPredicateDescription(label);
boolean passiveVoice = predicate.isPassiveVoiceEng();
List<Argument> arguments = new LinkedList<Argument>();
SortedSet<Yield> yields = new TreeSet<Yield>();
Map<Word, String> argumentMap = predicate.getArgMap();
for(Word argument : argumentMap.keySet())
yields.addAll(argument.getYield(predicate, argumentMap.get(argument), argumentMap.keySet()).explode());
for(Yield yield : yields) {
String argumentLabel = yield.getArgLabel();
String argumentDescription = kind.equals("PROPBANK") ? helper.getPropbankArgumentDescription(label, argumentLabel) : helper.getNombankArgumentDescription(label, argumentLabel);
Word first = yield.first();
int firstIndex = sentence.indexOf(first);
Word last = yield.last();
int lastIndex = sentence.indexOf(last);
int argumentRootIndex = firstIndex - 1;
Token argumentRoot = tokenList.get(argumentRootIndex);
List<Token> yieldList = new LinkedList<Token>();
for (int index = firstIndex; index <= lastIndex; index++) {
Token yieldToken = tokenList.get(index - 1);
yieldList.add(yieldToken);
}
int yieldBegin;
int yieldEnd;
if(yield.isContinuous()) {
yieldBegin = tokenList.get(firstIndex - 1).getBegin();
yieldEnd = tokenList.get(lastIndex - 1).getEnd();
}
else {
yieldBegin = argumentRoot.getBegin();
yieldEnd = argumentRoot.getEnd();
}
Argument argument = AnnotationGenerator.generateArgument(yieldBegin, yieldEnd, argumentLabel, argumentDescription, argumentRoot, yieldList, aJCas);
arguments.add(argument);
}
int begin = root.getBegin();
int end = root.getEnd();
AnnotationGenerator.generatePredicate(begin, end, label, description, root, kind, passiveVoice, arguments, aJCas);
}
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
if(zipFile != null) {
try {
zipFile.close();
} catch (IOException e) {
e.printStackTrace();
}
zipFile = null;
}
if(srl != null)
srl = null;
super.destroy();
}
}