// Copyright 2015 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package lemming.lemma.toutanova;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import lemming.lemma.LemmaCandidateSet;
import lemming.lemma.LemmaInstance;
import lemming.lemma.Lemmatizer;
import lemming.lemma.LemmatizerGenerator;
import lemming.lemma.toutanova.ToutanovaTrainer.ToutanovaOptions;
public class ToutanovaLemmatizer implements Lemmatizer, LemmatizerGenerator {
private static final long serialVersionUID = 1L;
private ToutanovaModel model_;
private transient Decoder decoder_;
private transient NbestDecoder nbest_decoder_;
private ToutanovaOptions options_;
private transient Map<LemmaInstance, String> cache_;
public ToutanovaLemmatizer(ToutanovaOptions options, ToutanovaModel model) {
model_ = model;
options_ = options;
}
@Override
public String lemmatize(LemmaInstance instance) {
if (cache_ == null) {
cache_ = new HashMap<>();
}
String lemma = cache_.get(instance);
if (lemma != null)
return lemma;
if (decoder_ == null) {
decoder_ = options_.getDecoderInstance();
decoder_.init(model_);
}
ToutanovaInstance tinstance = getToutanovaInstance(instance);
lemma = decoder_.decode(tinstance).getOutput();
if (lemma == null || lemma.isEmpty()) {
lemma = "_";
}
cache_.put(instance, lemma);
return lemma;
}
private ToutanovaInstance getToutanovaInstance(LemmaInstance instance) {
ToutanovaInstance tinstance = new ToutanovaInstance(instance, null);
model_.addIndexes(tinstance, false);
return tinstance;
}
public ToutanovaModel getModel() {
return model_;
}
@Override
public void addCandidates(LemmaInstance instance, LemmaCandidateSet set) {
ToutanovaInstance tinstance = getToutanovaInstance(instance);
if (nbest_decoder_ == null) {
nbest_decoder_ = new ZeroOrderNbestDecoder(options_.getNbestRank());
nbest_decoder_.init(model_);
}
Collection<lemming.lemma.toutanova.Result> results = nbest_decoder_.decode(tinstance);
if (results == null)
return;
for (lemming.lemma.toutanova.Result result : results) {
set.getCandidate(result.getOutput());
}
}
@Override
public boolean isOOV(LemmaInstance instance) {
return model_.isOOV(instance);
}
}