TransducerConfidenceEstimator.java example

Explorer
topic-modeling-master
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

/** 
		@author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/

package cc.mallet.fst.confidence;


import java.util.logging.*;
import java.util.*;
import java.io.Serializable;

import cc.mallet.extract.LabeledSpan;
import cc.mallet.fst.*;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
import cc.mallet.util.MalletLogger;

/**
 * Abstract class that estimates the confidence of a {@link Segment}
 * extracted by a {@link Transducer}.
 */
abstract public class TransducerConfidenceEstimator implements Serializable
{
	private static Logger logger = MalletLogger.getLogger(TransducerConfidenceEstimator.class.getName());

	protected Transducer model; // the trained Transducer which
															// performed the extractions.

	java.util.Vector segmentConfidences; 

	public TransducerConfidenceEstimator (Transducer model) {
		this.model = model;
	}
	
	/**
		 Calculates the confidence in the tagging of a {@link Segment}.
	 */
	public double estimateConfidenceFor (Segment segment) {
		return estimateConfidenceFor (segment, null);
	}

	abstract public double estimateConfidenceFor (Segment segment, SumLatticeDefault lattice);

	public java.util.Vector getSegmentConfidences () {return this.segmentConfidences;}

	/**
		 Ranks all {@link Segment}s in this {@link InstanceList} by
		 confidence estimate.
		 @param ilist list of segmentation instances
		 @param startTags represent the labels for the start states (B-)
		 of all segments
		 @param continueTags represent the labels for the continue state
		 (I-) of all segments
		 @return array of {@link Segment}s ordered by non-decreasing
		 confidence scores, as calculated by <code>estimateConfidenceFor</code>
	 */
	public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags,
																						 Object[] continueTags) {
		ArrayList segmentList = new ArrayList ();
		SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags);			
		if (this.segmentConfidences == null)
			segmentConfidences = new java.util.Vector ();
		while (iter.hasNext ()) {
			Segment segment = (Segment) iter.nextSegment ();
			double confidence = estimateConfidenceFor (segment);
			segment.setConfidence (confidence);
			logger.fine ("confidence=" + segment.getConfidence() + " for segment\n"
									 + segment.sequenceToString() + "\n");
			segmentList.add (segment);
		}
		Collections.sort (segmentList);
		Segment[] ret = new Segment[1];
		ret = (Segment[]) segmentList.toArray (ret);
		return ret;
	}

	/**
		 ranks the segments in one {@link Instance}
		 @param instance instances to be segmented
		 @param startTags represent the labels for the start states (e.g. B-)
		 of all segments
		 @param continueTags represent the labels for the continue state
		 (e.g. I-) of all segments
		 @return array of {@link Segment}s ordered by non-decreasing
		 confidence scores, as calculated by <code>estimateConfidenceFor</code>
	 */
	public Segment[] rankSegmentsByConfidence (Instance instance, Object[] startTags,
																						 Object[] continueTags) {
		InstanceList ilist = new InstanceList (new Noop(instance.getDataAlphabet(),instance.getTargetAlphabet()));
		ilist.add (instance);
		return rankSegmentsByConfidence (ilist, startTags, continueTags);
	}

        public Transducer getTransducer() { return this.model; }
}