/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.pipe; import java.io.*; import cc.mallet.types.Alphabet; import cc.mallet.types.FeatureVector; import cc.mallet.types.FeatureVectorSequence; import cc.mallet.types.Instance; import cc.mallet.types.Token; import cc.mallet.types.TokenSequence; /** * Convert the token sequence in the data field of each instance to a feature vector sequence. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */ public class TokenSequence2FeatureVectorSequence extends Pipe implements Serializable { boolean augmentable; // Create AugmentableFeatureVector's in the sequence boolean binary; // Create binary (Augmentable)FeatureVector's in the sequence boolean growAlphabet = true; public TokenSequence2FeatureVectorSequence (Alphabet dataDict, boolean binary, boolean augmentable) { super (dataDict, null); this.augmentable = augmentable; this.binary = binary; } public TokenSequence2FeatureVectorSequence (Alphabet dataDict) { this (dataDict, false, false); } public TokenSequence2FeatureVectorSequence (boolean binary, boolean augmentable) { super (new Alphabet(), null); this.augmentable = augmentable; this.binary = binary; } public TokenSequence2FeatureVectorSequence () { this (false, false); } public Instance pipe (Instance carrier) { carrier.setData(new FeatureVectorSequence ((Alphabet)getDataAlphabet(), (TokenSequence)carrier.getData(), binary, augmentable, growAlphabet)); return carrier; } public void setGrowAlphabet(boolean growAlphabet) { this.growAlphabet = growAlphabet; } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeBoolean(augmentable); out.writeBoolean(binary); out.writeBoolean(growAlphabet); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); augmentable = in.readBoolean(); binary = in.readBoolean(); // growAlphabet = true; growAlphabet = in.readBoolean(); } }