package LBJ2.learn; import LBJ2.classify.DiscreteConjunctiveFeature; import LBJ2.classify.DiscreteReferrer; import LBJ2.classify.Feature; import LBJ2.classify.RealConjunctiveFeature; import LBJ2.classify.RealReferrer; import LBJ2.util.ExceptionlessInputStream; import LBJ2.util.ExceptionlessOutputStream; import LBJ2.util.IVector; /** * Instances of this class are intended to store features that are children * of other features and which do not correspond to their own weights in any * learner's weight vector. While a {@link Lexicon} will store an instance * of this class in its {@link Lexicon#lexiconChildren} field, an instance of * this class will never do so. Also, the {@link #lookupChild(Feature)} * method behaves differently in this class, since it is assumed that * children are stored here. **/ public class ChildLexicon extends Lexicon { /** * The elements of this vector (which correspond to the features in * {@link #lexiconInv}) serve a dual purpose; first, to indicate by * absolute value the number of other features currently stored in this * object that have the corresponding feature as a child, and second, to * indicate by sign if the corresponding feature has been marked for * removal. **/ private IVector parents; // Initialization happens in clear() /** * A reference to the lexicon that uses this lexicon as its child lexicon. **/ private Lexicon parentLexicon; /** Creates an empty lexicon. */ public ChildLexicon() { } /** * Creates an empty lexicon. * * @param p The lexicon that uses this lexicon as its child lexicon. **/ public ChildLexicon(Lexicon p) { parentLexicon = p; } // Lexicon's constructor will call clear(), so there's no need to initialize // parents. /** * Creates an empty lexicon with the given encoding. * * @param p The lexicon that uses this lexicon as its child lexicon. * @param e The encoding to use when adding features to this lexicon. **/ public ChildLexicon(Lexicon p, String e) { super(e); // The super constructor will call clear(), so there's no need to // initialize parents. parentLexicon = p; } /** Clears the data structures associated with this instance. */ public void clear() { super.clear(); parents = new IVector(); } /** * Sets the value of {@link #parentLexicon} and makes sure that any * features marked for removal in this lexicon are the identical objects * also present in the parent. This is useful in particular just after * lexicons have been read from disk. * * @param p The new parent lexicon. **/ public void setParent(Lexicon p) { parentLexicon = p; int N = lexiconInv.size(); for (int i = 0; i < N; ++i) { Feature f = lexiconInv.get(i); if (f != null && parents.get(i) < 0) { Feature pf = p.lookupKey(p.lookup(f)); if (pf == null) { System.err.println("LBJ ERROR: Can't find feature " + f + " in parent lexicon."); new Exception().printStackTrace(); System.exit(1); } lexiconInv.set(i, pf); if (lexicon != null) lexicon.put(pf, lexicon.remove(f)); } } } /** * Removes the mapping for the given feature from this lexicon and returns * the feature object representing it that was stored here. * * @param f The feature to remove. * @return The representation of <code>f</code> that used to be stored * here, or <code>null</code> if it wasn't present. **/ public Feature remove(Feature f) { if (contains(f)) { // contains(Feature) calls lazyMapCreation() int index = lookup(f); int count = parents.get(index); if (count == 0) { f.removeFromChildLexicon(this); // Calls decrementParentCounts lexicon.remove(f); return lexiconInv.set(index, null); } else if (count > 0) { parents.set(index, -count); return lexiconInv.get(index); } else { System.err.println( "LBJ ERROR: Marking feature as removable for the second time: " + f); new Exception().printStackTrace(); System.exit(1); } } return null; } /** * The parent of feature <code>f</code> is being removed, so we decrement * <code>f</code>'s parent counts and remove it if it's ready. * * @param f The child feature whose parent counts need updating and which * may be removed as well. **/ public void decrementParentCounts(Feature f) { int index = lookup(f); int count = parents.get(index); if (count == 0) { System.err.println( "LBJ ERROR: Parent count incorrect for feature " + f); new Exception().printStackTrace(); System.exit(1); } else if (count < 0) { parents.increment(index); if (count == -1) { f.removeFromChildLexicon(this); lexicon.remove(f); lexiconInv.set(index, null); } } else parents.decrement(index); } /** * This method adds the given feature to this lexicon and also recursively * adds its children, if any. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return A feature equivalent to <code>f</code> that is stored in this * lexicon. **/ public Feature getChildFeature(Feature f, int label) { return lexiconInv.get(f.childLexiconLookup(this, label)); } /** * Updates the counts in {@link #parents} for the children of * <code>f</code>. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(Feature f, int label) { return lookup(f, true, label); } /** * Updates the counts in {@link #parents} for the children of * <code>f</code>. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(DiscreteConjunctiveFeature f, int label) { int oldSize = lexiconInv.size(); int result = lookup(f, true, label); if (oldSize < lexiconInv.size()) { incrementParentCounts(f.getLeft(), label); incrementParentCounts(f.getRight(), label); } return result; } /** * Updates the counts in {@link #parents} for the children of * <code>f</code>. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(RealConjunctiveFeature f, int label) { int oldSize = lexiconInv.size(); int result = lookup(f, true, label); if (oldSize < lexiconInv.size()) { incrementParentCounts(f.getLeft(), label); incrementParentCounts(f.getRight(), label); } return result; } /** * Updates the counts in {@link #parents} for the children of * <code>f</code>. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(DiscreteReferrer f, int label) { int oldSize = lexiconInv.size(); int result = lookup(f, true, label); if (oldSize < lexiconInv.size()) incrementParentCounts(f.getReferent(), label); return result; } /** * Updates the counts in {@link #parents} for the children of * <code>f</code>. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(RealReferrer f, int label) { int oldSize = lexiconInv.size(); int result = lookup(f, true, label); if (oldSize < lexiconInv.size()) incrementParentCounts(f.getReferent(), label); return result; } /** * Helper method for methods like * {@link #childLexiconLookup(DiscreteConjunctiveFeature,int)} that * actually does the work of looking up the child feature and updating its * parent counts. * * @param f The feature to look up. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. **/ protected void incrementParentCounts(Feature f, int label) { int index = f.childLexiconLookup(this, label); // Increment count while preserving sign to indicate mark for removal. if (parents.get(index) == 0) parents.set(index, parentLexicon.contains(f) ? -1 : 1); else if (parents.get(index) > 0) parents.increment(index); else parents.decrement(index); } /** * Unlike the overridden method in {@link Lexicon}, this method simply * checks {@link #lexicon} for the feature and will throw an exception if * it can't be found. * * @param f The feature to look up. * @return If the feature was found in {@link #lexicon}, its associated * integer index is returned. * @throws UnsupportedOperationException If the feature isn't found * anywhere in the lexicon. **/ public int lookupChild(Feature f) { lazyMapCreation(); Integer I = (Integer) lexicon.get(f); if (I != null) return I.intValue(); throw new UnsupportedOperationException( "When calling ChildLexicon.lookupChild(Feature), the feature must " + "be present in the lexicon. (" + f + ")"); } /** <!-- write(ExceptionlessOutputStream) --> * Writes a binary representation of the lexicon. * * @param out The output stream. **/ public void write(ExceptionlessOutputStream out) { int size = lexiconInv.size(); int n = 0; while (n < size && lexiconInv.get(n) != null) ++n; int i = n; while (i < size && lexiconInv.get(i) == null) ++i; while (i < size) { parents.set(n++, parents.get(i++)); while (i < size && lexiconInv.get(i) == null) ++i; } size = parents.size(); if (n < size) { for (i = size - 1; i >= n; --i) parents.remove(i); parents = new IVector(parents); } lexiconInv.consolidate(); lexicon = null; super.write(out); parents.write(out); } /** <!-- read(ExceptionlessInputStream) --> * Reads a binary representation of the lexicon. * * @param in The input stream. * @param readCounts Whether or not to read the feature counts. **/ public void read(ExceptionlessInputStream in, boolean readCounts) { super.read(in, readCounts); parents.read(in); } /** <!-- printCountTable(boolean) --> * Produces on <code>STDOUT</code> a table of feature counts including a * line indicating the position of {@link #pruneCutoff}. It's probably not * a good idea to call this method unless you know your lexicon is small. * * @param p Whether or not to include package names in the output. **/ public void printCountTable(boolean p) { featureCounts = parents; super.printCountTable(p); featureCounts = null; } }