package jhazm;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
* @author Mojtaba Khallash
*/
public class POSTagger {
public static POSTagger instance;
private MaxentTagger tagger;
public POSTagger() throws IOException {
this("resources/models/persian.tagger");
}
public POSTagger(String pathToModel) throws IOException {
this.tagger = new MaxentTagger(pathToModel);
}
public static POSTagger i() throws IOException {
if (instance != null) return instance;
instance = new POSTagger();
return instance;
}
public List<List<TaggedWord>> batchTags(List<List<String>> sentences) {
List<List<TaggedWord>> result = new ArrayList<>();
for (List<String> sentence : sentences) {
result.add(batchTag(sentence));
}
return result;
}
public List<TaggedWord> batchTag(List<String> sentence) {
String[] sen = new String[sentence.size()];
for (int i = 0; i < sentence.size(); i++)
sen[i] = sentence.get(i).replace(" ", "_");
List newSent = Sentence.toWordList(sen);
List taggedSentence = this.tagger.tagSentence(newSent);
List<TaggedWord> taggedSen = new ArrayList<>();
for (int i = 0; i < taggedSentence.size(); i++) {
TaggedWord tw = (TaggedWord)taggedSentence.get(i);
tw.setWord(sentence.get(i));
taggedSen.add(tw);
}
return taggedSen;
}
}