// Copyright 2015 Thomas Müller
// This file is part of MarMoT, which is licensed under GPLv3.
package lemming.lemma;
import java.util.Map;
import lemming.lemma.SimpleLemmatizerTrainer.SimpleLemmatizerTrainerOptions;
import marmot.util.Counter;
public class SimpleLemmatizer implements LemmatizerGenerator {
private static final long serialVersionUID = 1L;
private static final String SEPARATOR = "\t";
private Map<String, Counter<String>> map_;
private SimpleLemmatizerTrainerOptions options_;
public SimpleLemmatizer(SimpleLemmatizerTrainerOptions options, Map<String, Counter<String>> map) {
map_ = map;
options_ = options;
}
public static String toKey(LemmaInstance instance) {
String pos_tag = instance.getPosTag();
if (pos_tag == null) {
return null;
}
String form = instance.getForm();
return String.format("%s%s%s", form, SEPARATOR, pos_tag);
}
@Override
public String lemmatize(LemmaInstance instance) {
Counter<String> lemmas = null;
String key = null;
if (options_.getUsePos()) {
key = toKey(instance);
if (key != null) {
lemmas = map_.get(key);
if (lemmas != null && (!options_.getAbstainIfAmbigous() || lemmas.size() == 1 )) {
return lemmas.max();
}
}
}
if (options_.getUseBackup()) {
key = toSimpleKey(instance);
if (key != null) {
lemmas = map_.get(key);
if (lemmas != null && (!options_.getAbstainIfAmbigous() || lemmas.size() == 1 )) {
return lemmas.max();
}
}
}
if (options_.getHandleUnseen()) {
return instance.getForm();
}
return null;
}
public static String toSimpleKey(LemmaInstance instance) {
return instance.getForm();
}
private void addCandidates(Counter<String> lemmas, LemmaCandidateSet set) {
if (lemmas != null) {
for (Map.Entry<String, Double> lemma : lemmas.entrySet()) {
set.getCandidate(lemma.getKey());
}
}
}
@Override
public void addCandidates(LemmaInstance instance, LemmaCandidateSet set) {
String key = toKey(instance);
if (key != null) {
Counter<String> lemmas = map_.get(key);
addCandidates(lemmas, set);
}
key = toSimpleKey(instance);
if (key != null) {
Counter<String> lemmas = map_.get(key);
addCandidates(lemmas, set);
}
}
@Override
public boolean isOOV(LemmaInstance instance) {
return map_.get(toSimpleKey(instance)) == null;
}
}