package com.cse10.classifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import weka.core.OptionHandler;
import weka.core.stemmers.Stemmer;
import java.io.Serializable;
import java.util.*;
/**
* Wrapper class for StanfordCoreNLP which provides lemmatizer functionality
* Created by Chamath on 12/20/2014
*/
public class StanfordCoreNLPLemmatizer implements Stemmer, OptionHandler, Serializable {
protected StanfordCoreNLP pipeline;
protected String currentVersion;
protected String[] options;
public StanfordCoreNLPLemmatizer() {
Properties props;
props = new Properties();
props.put("annotators", "tokenize, ssplit, pos, lemma");
this.pipeline = new StanfordCoreNLP(props);
currentVersion = "1.0";
options = new String[2];
options[0] = "-S";
options[1] = "Stanford Core NLP";
}
/**
* Convert given word into its base form
*
* @param word
* @return
*/
@Override
public String stem(String word) {
List<String> lemmas = new LinkedList<String>();
// Create an empty Annotation just with the given text
Annotation document = new Annotation(word);
// run all Annotators on this text
this.pipeline.annotate(document);
// Iterate over all of the sentences found
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
for (CoreMap sentence : sentences) {
// Iterate over all tokens in a sentence
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
// Retrieve and add the lemma for each word into the
// list of lemmas
lemmas.add(token.get(CoreAnnotations.LemmaAnnotation.class));
}
}
return lemmas.get(0);
}
@Override
public String getRevision() {
return currentVersion;
}
@Override
public Enumeration listOptions() {
Vector v = new Vector();
for (String s : options) {
v.add(s);
}
return v.elements();
}
@Override
public void setOptions(String[] options) throws Exception {
this.options = options;
}
@Override
public String[] getOptions() {
return options;
}
}