/* Copyright (C) 2005 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.types; import java.io.ObjectOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import cc.mallet.pipe.TokenSequenceRemoveStopwords; /** A FeatureSequence with a parallel record of bigrams, kept in a separate dictionary * @author <a href="mailto:mccallum@cs.umass.edu">Andrew McCallum</a> */ public class FeatureSequenceWithBigrams extends FeatureSequence { public final static String deletionMark = "NextTokenDeleted"; Alphabet biDictionary; int[] biFeatures; public FeatureSequenceWithBigrams (Alphabet dict, Alphabet bigramDictionary, TokenSequence ts) { super (dict, ts.size()); int len = ts.size(); this.biDictionary = bigramDictionary; this.biFeatures = new int[len]; Token t, pt = null; for (int i = 0; i < len; i++) { t = ts.get(i); super.add(t.getText()); if (pt != null && pt.getProperty(deletionMark) == null) biFeatures[i] = biDictionary == null ? 0 : biDictionary.lookupIndex(pt.getText()+"_"+t.getText(), true); else biFeatures[i] = -1; pt = t; } } public Alphabet getBiAlphabet () { return biDictionary; } public final int getBiIndexAtPosition (int pos) { return biFeatures[pos]; } public Object getObjectAtPosition (int pos) { return biFeatures[pos] == -1 ? null : (biDictionary == null ? null : biDictionary.lookupObject (biFeatures[pos])); } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; private static final int NULL_INTEGER = -1; private void writeObject (ObjectOutputStream out) throws IOException { out.writeInt (CURRENT_SERIAL_VERSION); out.writeObject (biDictionary); out.writeInt (biFeatures.length); for (int i = 0; i < biFeatures.length; i++) out.writeInt (biFeatures[i]); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int version = in.readInt (); biDictionary = (Alphabet) in.readObject (); int featuresLength = in.readInt(); biFeatures = new int[featuresLength]; for (int i = 0; i < featuresLength; i++) biFeatures[i] = in.readInt (); } }