package edu.isistan.uima.unified.analysisengines.wsd;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.data.Synset;
import net.didion.jwnl.dictionary.Dictionary;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.SubProgressMonitor;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.descriptor.ExternalResource;
import edu.isistan.uima.unified.algorithms.similarity.SimilarityMeasure;
import edu.isistan.uima.unified.analysisengines.wordnet.JWNLInitialization;
import edu.isistan.uima.unified.sharedresources.ProgressMonitorResource;
import edu.isistan.uima.unified.typesystems.nlp.Sentence;
import edu.isistan.uima.unified.typesystems.wordnet.Sense;
public class BanerjeeWSDAnnotator extends JCasAnnotator_ImplBase {
@ConfigurationParameter(name="jwnl")
private String jwnlName;
@ConfigurationParameter(name="wordnet")
private String wordnetName;
protected Dictionary dictionary;
//
@ConfigurationParameter(name="similarity")
private String similarityName;
protected SimilarityMeasure measure;
//
@ExternalResource(key="monitor")
private ProgressMonitorResource monitorResource;
private IProgressMonitor subMonitor;
@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
super.initialize(aContext);
try {
//jwnlName = (String) aContext.getConfigParameterValue("jwnl");
//wordnetName = (String) aContext.getConfigParameterValue("wordnet");
if(!JWNLInitialization.isInit())
JWNLInitialization.init(jwnlName, wordnetName);
dictionary = JWNLInitialization.getDictionary();
//
//similarityName = (String) aContext.getConfigParameterValue("similarity");
Map<String, String> params = new HashMap<String, String>();
params.put("cache", "50000");
if(similarityName.equals("Rago"))
params.put("simType", "edu.isistan.uima.unified.algorithms.similarity.Rago");
if(similarityName.equals("Lesk"))
params.put("simType", "edu.isistan.uima.unified.algorithms.similarity.Lesk");
if(similarityName.equals("Lin"))
params.put("simType", "edu.isistan.uima.unified.algorithms.similarity.Lin");
if(similarityName.equals("JCn"))
params.put("simType", "edu.isistan.uima.unified.algorithms.similarity.JCn");
if(similarityName.equals("Lin") || similarityName.equals("JCn")) {
params.put("infocontent", "file:" + System.getenv("MODELS_PATH") + "similarity/ic-bnc-resnik-add1.dat");
params.put("mapping", "file:" + System.getenv("MODELS_PATH") + "similarity/domain_independent.txt");
}
measure = SimilarityMeasure.newInstance(params);
} catch (JWNLException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if(dictionary == null)
return;
//
subMonitor = new SubProgressMonitor(monitorResource.getMonitor(), 1, SubProgressMonitor.PREPEND_MAIN_LABEL_TO_SUBTASK);
subMonitor.subTask("Annotating word sense disambiguation (Banerjee)");
//
AnnotationIndex<Annotation> sentenceAnnotations = aJCas.getAnnotationIndex(Sentence.type);
AnnotationIndex<Annotation> senseAnnotations = aJCas.getAnnotationIndex(Sense.type);
//
subMonitor.beginTask(this.getClass().getSimpleName(), sentenceAnnotations.size());
//
for(Annotation sAnnotation : sentenceAnnotations) {
//Sentence sentenceAnnotation = (Sentence) sAnnotation;
//String sentence = sentenceAnnotation.getCoveredText();
Iterator<Annotation> senseIterator = senseAnnotations.subiterator(sAnnotation);
List<Sense> senseList = new LinkedList<Sense>();
while(senseIterator.hasNext()) {
Annotation tAnnotation = senseIterator.next();
senseList.add((Sense)tAnnotation);
}
disambiguate(senseList);
//
subMonitor.worked(1);
}
//
subMonitor.done();
}
@Override
public void destroy() {
if(dictionary != null) {
dictionary.close();
dictionary = null;
}
super.destroy();
}
private void disambiguate(List<Sense> senses) {
for(int wt = 0; wt < senses.size(); wt++) {
Sense target = senses.get(wt);
//String targetWord = target.getCoveredText();
StringArray targetSenses = target.getSenses();
double[] score = new double[targetSenses.size()];
for(int sti = 0; sti < targetSenses.size(); sti++) {
score[sti] = 0;
for(int wj = 0; wj < senses.size(); wj++) {
if(wj != wt) {
Sense context = senses.get(wj);
//String contextWord = context.getCoveredText();
StringArray contextSenses = context.getSenses();
//If the word has already have been disambiguated, then use only that disambiguated sense
//if(wj < wt && context.getSense() != null) {
//contextSenses = new StringArray(); {sentidos[wj]};
//contextSenses.add(context.getSense());
//}
//else
contextSenses = context.getSenses();
double[] temporalscore = new double[contextSenses.size()];
for(int sjk = 0; sjk < contextSenses.size(); sjk++) {
temporalscore[sjk] = 0;
try {
Synset s1 = dictionary.getSynsetAt(POS.getPOSForLabel(target.getPos().toLowerCase()), Long.valueOf(targetSenses.get(sti)));
Synset s2 = dictionary.getSynsetAt(POS.getPOSForLabel(context.getPos().toLowerCase()), Long.valueOf(contextSenses.get(sjk)));
temporalscore[sjk] = measure.getSimilarity(s1, s2);
} catch (NumberFormatException e) {
e.printStackTrace();
} catch (JWNLException e) {
e.printStackTrace();
}
}
double maxscore = 0;
for(int sjk = 0; sjk < temporalscore.length; sjk++) {
if(temporalscore[sjk] > maxscore) {
maxscore = temporalscore[sjk];
}
}
score[sti] += maxscore;
}
}
}
int bestsense = -1;
double bestscore = 0;
for(int sti = 0; sti < score.length; sti++) {
if((bestsense == -1) || (bestsense != -1 && score[sti] > bestscore)) {
bestscore = score[sti];
bestsense = sti;
}
}
try {
String disambiguatedSense = targetSenses.get(bestsense);
target.setSense(disambiguatedSense);
Synset disambiguatedSynset;
disambiguatedSynset = dictionary.getSynsetAt(POS.getPOSForLabel(target.getPos().toLowerCase()), Long.valueOf(disambiguatedSense));
String disambiguatedGloss = disambiguatedSynset.getGloss();
target.setGloss(disambiguatedGloss);
} catch (NumberFormatException e) {
e.printStackTrace();
} catch (JWNLException e) {
e.printStackTrace();
}
}
}
}