package experimental.analyzer.tagger; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import java.util.List; import experimental.analyzer.Analyzer; import experimental.analyzer.AnalyzerInstance; import experimental.analyzer.AnalyzerReading; import experimental.analyzer.AnalyzerTag; import marmot.core.FeatureVector; import marmot.core.State; import marmot.core.lattice.ZeroOrderSumLattice; import marmot.morph.MorphModel; import marmot.morph.MorphTagger; import marmot.morph.MorphWeightVector; import marmot.morph.Sentence; import marmot.morph.Word; import marmot.util.SymbolTable; public class TaggerAnalyzer implements Analyzer { private static final long serialVersionUID = 1L; private MorphTagger tagger_; private double log_threshold_; public TaggerAnalyzer(MorphTagger tagger, double threshold) { tagger_ = tagger; log_threshold_ = Math.log(threshold); } @Override public Collection<AnalyzerReading> analyze(AnalyzerInstance instance) { MorphModel model = (MorphModel) tagger_.getModel(); Word word = new Word(instance.getForm(), null, null); model.addIndexes(word, false); Sentence sentence = new Sentence(Collections.singletonList(word)); ZeroOrderSumLattice lattice = (ZeroOrderSumLattice) tagger_.getSumLattice(false, sentence); List<List<State>> states = lattice.prune(log_threshold_); assert states.size() == 2 : states; List<State> tags = states.get(0); SymbolTable<String> pos_table = model.getTagTables().get(0); SymbolTable<String> morph_table = model.getTagTables().get(1); Collection<AnalyzerReading> readings = new LinkedList<>(); for (State state : tags) { int morph_index = state.getIndex(); int tag_index = state.getSubLevelState().getIndex(); String pos_tag = pos_table.toSymbol(tag_index); String morph_tag = morph_table.toSymbol(morph_index); AnalyzerTag tag = new AnalyzerTag(pos_tag, morph_tag); AnalyzerReading reading = new AnalyzerReading(tag, null); readings.add(reading); } return readings; } @Override public String represent(AnalyzerInstance instance) { MorphModel model = (MorphModel) tagger_.getModel(); Word word = new Word(instance.getForm(), null, null); model.addIndexes(word, false); Sentence sentence = new Sentence(Collections.singletonList(word)); MorphWeightVector weights = (MorphWeightVector) tagger_.getWeightVector(); FeatureVector vector = weights.extractStateFeatures(sentence, 0); return vector.toString(); } @Override public int getNumTags() { throw new UnsupportedOperationException(); } @Override public boolean isUnknown(AnalyzerInstance instance) { MorphModel model = (MorphModel) tagger_.getModel(); int form_index = model.getWordTable().toIndex(instance.getForm()); return model.isOOV(form_index); } }