/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ /** @author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a> */ package cc.mallet.fst.confidence; import java.util.logging.*; import java.util.*; import cc.mallet.fst.*; import cc.mallet.pipe.iterator.*; import cc.mallet.types.*; import cc.mallet.util.MalletLogger; /** * Abstract class that estimates the confidence of a {@link Sequence} * extracted by a {@link Transducer}.Note that this is different from * {@link TransducerConfidenceEstimator}, which estimates the * confidence for a single {@link Segment}. */ abstract public class TransducerSequenceConfidenceEstimator { private static Logger logger = MalletLogger.getLogger(TransducerSequenceConfidenceEstimator.class.getName()); protected Transducer model; // the trained Transducer which // performed the extractions. public TransducerSequenceConfidenceEstimator (Transducer model) { this.model = model; } /** Calculates the confidence in the tagging of a {@link Sequence}. */ abstract public double estimateConfidenceFor ( Instance instance, Object[] startTags, Object[] inTags); /** Ranks all {@link Sequences}s in this {@link InstanceList} by confidence estimate. @param ilist list of segmentation instances @param startTags represent the labels for the start states (B-) of all segments @param continueTags represent the labels for the continue state (I-) of all segments @return array of {@link InstanceWithConfidence}s ordered by non-decreasing confidence scores, as calculated by <code>estimateConfidenceFor</code> */ public InstanceWithConfidence[] rankInstancesByConfidence (InstanceList ilist, Object[] startTags, Object[] continueTags) { ArrayList confidenceList = new ArrayList (); for (int i=0; i < ilist.size(); i++) { Instance instance = ilist.get (i); Sequence predicted = new MaxLatticeDefault (model, (Sequence)instance.getData()).bestOutputSequence(); double confidence = estimateConfidenceFor (instance, startTags, continueTags); confidenceList.add (new InstanceWithConfidence ( instance, confidence, predicted)); logger.info ("instance#"+i+" confidence="+confidence); } Collections.sort (confidenceList); InstanceWithConfidence[] ret = new InstanceWithConfidence[1]; ret = (InstanceWithConfidence[]) confidenceList.toArray (ret); return ret; } }