/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ /** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */ package cc.mallet.fst.confidence; import java.util.*; import cc.mallet.classify.*; import cc.mallet.fst.*; import cc.mallet.pipe.*; import cc.mallet.types.*; /** * Estimates the confidence of a {@link Segment} extracted by a {@link * Transducer} using a {@link MaxEnt} classifier to classify segments * as "correct" or "incorrect." xxx needs some interface work */ public class MaxEntConfidenceEstimator extends TransducerConfidenceEstimator { MaxEntTrainer meTrainer; MaxEnt meClassifier; Pipe pipe; String correct, incorrect; public MaxEntConfidenceEstimator (Transducer model, double gaussianVariance) { super(model); meTrainer = new MaxEntTrainer (gaussianVariance); } public MaxEntConfidenceEstimator (Transducer model) { this (model, 10.0); } public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) { this.meClassifier = (MaxEnt) meTrainer.train (ilist); this.pipe = ilist.getPipe (); this.correct = correct; this.incorrect = incorrect; InfoGain ig = new InfoGain (ilist); int igl = Math.min (30, ig.numLocations()); for (int i = 0; i < igl; i++) System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); return this.meClassifier; } /** Calculates the confidence in the tagging of a {@link Segment}. */ public double estimateConfidenceFor (Segment segment, SumLatticeDefault cachedLattice) { Classification c = this.meClassifier.classify (pipe.instanceFrom(new Instance ( segment, segment.getTruth(), null, null))); return c.getLabelVector().value (this.correct); } }