package jhazm;
import com.infomancers.collections.yield.Yielder;
import edu.stanford.nlp.ling.TaggedWord;
import jhazm.tokenizer.SentenceTokenizer;
import jhazm.tokenizer.WordTokenizer;
import org.maltparser.concurrent.ConcurrentMaltParserModel;
import org.maltparser.concurrent.ConcurrentMaltParserService;
import org.maltparser.concurrent.graph.ConcurrentDependencyGraph;
import org.maltparser.core.exception.MaltChainedException;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.List;
/**
*
* @author Mojtaba Khallash
*/
public class DependencyParser {
public static DependencyParser instance;
public POSTagger tagger;
private SentenceTokenizer sentenceTokenizer;
private WordTokenizer wordTokenizer;
private Normalizer normalizer;
private Lemmatizer lemmatizer;
private String modelFile;
private ConcurrentMaltParserModel model;
public DependencyParser() {
this(null, null, "resources/models/langModel.mco");
}
public DependencyParser(POSTagger tagger, Lemmatizer lemmatizer, String modelFile) {
this.tagger = tagger;
this.lemmatizer = lemmatizer;
this.modelFile = modelFile;
}
public static DependencyParser i() {
if (instance != null) return instance;
instance = new DependencyParser();
return instance;
}
public SentenceTokenizer getSentenceTokenizer() {
if (sentenceTokenizer == null)
sentenceTokenizer = new SentenceTokenizer();
return sentenceTokenizer;
}
public void setSentenceTokenizer(SentenceTokenizer value) {
this.sentenceTokenizer = value;
}
public WordTokenizer getWordTokenizer() throws IOException {
if (wordTokenizer == null)
wordTokenizer = new WordTokenizer();
return wordTokenizer;
}
public void setWordTokenizer(WordTokenizer value) {
this.wordTokenizer = value;
}
public Normalizer getNormalizer() {
return normalizer;
}
public void setNormalizer(Normalizer normalizer) {
this.normalizer = normalizer;
}
public Lemmatizer getLemmatizer() {
return lemmatizer;
}
public void setLemmatizer(Lemmatizer lemmatizer) {
this.lemmatizer = lemmatizer;
}
public POSTagger getTagger() throws IOException {
if (tagger == null)
tagger = new POSTagger();
return tagger;
}
public void setTagger(POSTagger value) {
this.tagger = value;
}
private ConcurrentMaltParserModel getModel()
throws IOException,
MaltChainedException {
if (model == null) {
URL maltModelURL = new File(this.modelFile).toURI().toURL();
this.model = ConcurrentMaltParserService.initializeParserModel(maltModelURL);
}
return model;
}
// Gets list of raw text
public Iterable<ConcurrentDependencyGraph> rawParse(String text)
throws IOException {
if (this.normalizer != null)
text = this.normalizer.run(text);
return rawParses(getSentenceTokenizer().tokenize(text));
}
// Gets list of raw sentences
public Iterable<ConcurrentDependencyGraph> rawParses(List<String> sentences)
throws IOException {
return new YieldParsedSentence(sentences);
}
public ConcurrentDependencyGraph rawParse(List<TaggedWord> sentence)
throws IOException,
MaltChainedException {
String[] conll = new String[sentence.size()];
for (int i = 0; i < sentence.size(); i++) {
TaggedWord taggedWord = sentence.get(i);
String word = taggedWord.word();
String Lemma = "_";
if (this.lemmatizer != null)
Lemma = this.lemmatizer.lemmatize(word);
String pos = taggedWord.tag();
conll[i] = String.format("%s\t%s\t%s\t%s\t%s\t%s",
i + 1, word, Lemma, pos, pos, "_");
}
return parse(conll);
}
public ConcurrentDependencyGraph parse(String[] conllSentence)
throws IOException,
MaltChainedException {
return this.getModel().parse(conllSentence);
}
class YieldParsedSentence extends Yielder<ConcurrentDependencyGraph> {
private final List<String> sentences;
private int index;
public YieldParsedSentence(List<String> sentences) {
this.sentences = sentences;
index = -1;
}
@Override
protected void yieldNextCore() {
try {
index++;
if (index < sentences.size()) {
String sentence = sentences.get(index);
List<String> words = getWordTokenizer().tokenize(sentence);
yieldReturn(rawParse(getTagger().batchTag(words)));
}
}
catch (Exception ex) {
ex.printStackTrace();
}
}
}
}