package experimental.analyzer.tagger; import java.util.Collection; import java.util.Collections; import java.util.LinkedList; import experimental.analyzer.Analyzer; import experimental.analyzer.AnalyzerInstance; import experimental.analyzer.AnalyzerReading; import experimental.analyzer.AnalyzerResult; import experimental.analyzer.AnalyzerTag; import experimental.analyzer.AnalyzerTrainer; import marmot.core.Options; import marmot.core.Sequence; import marmot.morph.MorphModel; import marmot.morph.MorphOptions; import marmot.morph.MorphTagger; import marmot.morph.Sentence; import marmot.morph.Word; public class TaggerAnalyzerTrainer extends AnalyzerTrainer { public double getFscore(MorphTagger tagger, Collection<AnalyzerInstance> instances, double threshold) { TaggerAnalyzer analyzer = new TaggerAnalyzer(tagger, threshold); AnalyzerResult result = AnalyzerResult.test(analyzer, instances); double fscore = result.getFscore(); return fscore; } @Override public Analyzer train(Collection<AnalyzerInstance> instances) { Collection<Sequence> sentences = new LinkedList<>(); for (AnalyzerInstance instance : instances) { for (AnalyzerReading reading : instance.getReadings()) { AnalyzerTag tag = reading.getTag(); Word word = new Word(instance.getForm(), tag.getPosTag(), tag.getMorphTag()); Sentence sentence = new Sentence(Collections.singletonList(word)); sentences.add(sentence); } } MorphOptions options = new MorphOptions(); options.setProperty(Options.ORDER, "0"); options.setProperty(MorphOptions.FEATURE_TEMPLATES, "affix,sig"); options.setProperty(MorphOptions.OBSERVED_FEATURE, "false"); options.setProperty(Options.PENALTY, "0.0"); if (options_.containsKey(AnalyzerTrainer.FLOAT_DICT_)) { options.setProperty(MorphOptions.FLOAT_TYPE_DICT, options_.get(AnalyzerTrainer.FLOAT_DICT_)); } MorphTagger tagger = (MorphTagger) MorphModel.train(options, sentences, null); double [] thresholds = {0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0.01}; double best_threshold = 0.0; double best_fscore = -1; for (double threshold : thresholds) { double fscore = getFscore(tagger, instances, threshold); if (fscore > best_fscore) { best_fscore = fscore; best_threshold = threshold; } } System.err.println("Best threshold: " + best_threshold); return new TaggerAnalyzer(tagger, best_threshold); } }