/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.extract; import java.io.Serializable; import java.io.ObjectOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import cc.mallet.fst.confidence.*; import cc.mallet.types.Label; import cc.mallet.types.LabelAlphabet; import cc.mallet.types.Sequence; /** * Created: Oct 26, 2005 * * @author <A HREF="mailto:culotta@cs.umass.edu>culotta@cs.umass.edu</A> */ public class ConfidenceTokenizationFilter implements TokenizationFilter, Serializable { ExtractionConfidenceEstimator confidenceEstimator; TokenizationFilter underlyingFilter; public ConfidenceTokenizationFilter (ExtractionConfidenceEstimator confidenceEstimator, TokenizationFilter underlyingFilter) { super(); this.confidenceEstimator = confidenceEstimator; this.underlyingFilter = underlyingFilter; } public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { DocumentExtraction extraction = new DocumentExtraction("Extraction", dict, input, seq, null, backgroundTag.toString()); confidenceEstimator.estimateConfidence(extraction); return extraction.getExtractedSpans(); } // Serialization garbage private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 1; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject(confidenceEstimator); out.writeObject(underlyingFilter); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { in.readInt (); // read version this.confidenceEstimator = (ExtractionConfidenceEstimator) in.readObject(); this.underlyingFilter = (TokenizationFilter) in.readObject(); } }