/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ /** A subset of features. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */ package cc.mallet.types; import java.util.BitSet; import java.util.regex.Pattern; import java.io.*; import cc.mallet.types.*; public class FeatureSelection implements AlphabetCarrying, Serializable { Alphabet dictionary; BitSet selectedFeatures; // boolean defaultValue; //Implement this by using it to reverse all the exterior interfaces public FeatureSelection (Alphabet dictionary, BitSet selectedFeatures) { this.dictionary = dictionary; this.selectedFeatures = selectedFeatures; } public FeatureSelection (Alphabet dictionary) { this.dictionary = dictionary; this.selectedFeatures = new BitSet(); } public FeatureSelection (RankedFeatureVector rsv, int numFeatures) { this.dictionary = rsv.getAlphabet(); this.selectedFeatures = new BitSet (dictionary.size()); int numSelections = Math.min (numFeatures, dictionary.size()); for (int i = 0; i < numSelections; i++) selectedFeatures.set (rsv.getIndexAtRank(i)); } /** Creates a FeatureSelection that includes only those features whose names match a given regex. * A static factory method. * @param dictionary A dictionary of fetaure names. Entries must be string. * @param regex Features whose names match this pattern will be included. * @return A new FeatureSelection. * */ public static FeatureSelection createFromRegex (Alphabet dictionary, Pattern regex) { BitSet included = new BitSet (dictionary.size()); for (int i = 0; i < dictionary.size(); i++) { String feature = (String) dictionary.lookupObject (i); if (regex.matcher (feature).matches()) { included.set (i); } } return new FeatureSelection (dictionary, included); } public Object clone () { return new FeatureSelection (dictionary, (BitSet)selectedFeatures.clone()); } public Alphabet getAlphabet () { return dictionary; } public Alphabet[] getAlphabets () { return new Alphabet[] { dictionary }; } public int cardinality () { return selectedFeatures.cardinality(); } public BitSet getBitSet () { return selectedFeatures; } public void add (Object o) { add (dictionary.lookupIndex(o)); } public void add (int index) { assert (index >= 0); selectedFeatures.set (index); } public void remove (Object o) { remove (dictionary.lookupIndex(o)); } public void remove (int index) { selectedFeatures.set (index, false); } public boolean contains (Object o) { int index = dictionary.lookupIndex (o, false); if (index == -1) return false; return contains (index); } public boolean contains (int index) { return selectedFeatures.get (index); } public void or (FeatureSelection fs) { selectedFeatures.or (fs.selectedFeatures); } public int nextSelectedIndex (int index) { return selectedFeatures.nextSetBit (index); } public int nextDeselectedIndex (int index) { return selectedFeatures.nextClearBit (index); } // Serialization private static final long serialVersionUID = 1; private static final int CURRENT_SERIAL_VERSION = 0; static final int NULL_INTEGER = -1; /* Need to check for null pointers. */ private void writeObject (ObjectOutputStream out) throws IOException { int i, size; out.writeInt(CURRENT_SERIAL_VERSION); out.writeObject(dictionary); out.writeObject(selectedFeatures); } private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { int size, i; int version = in.readInt (); dictionary = (Alphabet) in.readObject(); selectedFeatures = (BitSet) in.readObject(); } }