package LBJ2.classify; import java.io.IOException; import java.io.Serializable; import LBJ2.learn.ChildLexicon; import LBJ2.learn.Lexicon; import LBJ2.util.ByteString; import LBJ2.util.ClassUtils; import LBJ2.util.ExceptionlessInputStream; import LBJ2.util.ExceptionlessOutputStream; /** * Objects of this class represent the value of a <code>Classifier</code>'s * decision. * * @author Nick Rizzolo **/ public abstract class Feature implements Cloneable, Comparable, Serializable { /** * The Java <code>package</code> containing the classifier that produced * this feature. **/ protected String containingPackage; /** The name of the LBJ classifier that produced this feature. */ protected String generatingClassifier; /** * For internal use only. * * @see #readFeature(ExceptionlessInputStream) **/ protected Feature() { } /** * Initializing constructor. * * @param p The package containing the classifier that produced this * feature. * @param c The name of the classifier that produced this feature. **/ public Feature(String p, String c) { containingPackage = p; generatingClassifier = c; } /** Retrieves this feature's package. */ public String getPackage() { return containingPackage; } /** Retrieves the name of the classifier that produced this feature. */ public String getGeneratingClassifier() { return generatingClassifier; } /** * Retrieves this feature's identifier as a string. * * @return This feature's identifier as a string. **/ public abstract String getStringIdentifier(); /** * Retrieves this feature's identifier as a byte string. * * @return This feature's identifier as a byte string. **/ public abstract ByteString getByteStringIdentifier(); /** * Gives a string representation of the value of this feature. * * @return A string representation of the value of this feature. **/ public abstract String getStringValue(); /** * Gives a string representation of the value of this feature. * * @return A string representation of the value of this feature. **/ public abstract ByteString getByteStringValue(); /** * Determines whether or not the parameter is equivalent to the string * representation of the value of this feature. * * @param v The string to compare against. * @return <code>true</code> iff the parameter is equivalent to the string * representation of the value of this feature. **/ public abstract boolean valueEquals(String v); /** * Determines if this feature is discrete. * * @return <code>true</code> iff this is discrete. **/ public abstract boolean isDiscrete(); /** * Determines if this feature contains a byte string identifier field. * * @return <code>true</code> iff this feature contains a byte string * identifier field. **/ public boolean hasByteStringIdentifier() { return false; } /** * Determines if this feature contains a string identifier field. * * @return <code>true</code> iff this feature contains a string identifier * field. **/ public boolean hasStringIdentifier() { return false; } /** * Determines if this feature is primitive. * * @return <code>true</code> iff this is primitive. **/ public boolean isPrimitive() { return false; } /** * Determines if this feature is conjunctive. * * @return <code>true</code> iff this feature is conjunctive. **/ public boolean isConjunctive() { return false; } /** * Determines if this feature is a referring feature. * * @return <code>true</code> iff this feature is a referring feature. **/ public boolean isReferrer() { return false; } /** * Determines if this feature comes from an array. * * @return <code>true</code> iff this feature comes from an array. **/ public boolean fromArray() { return false; } /** * The depth of a feature is one more than the maximum depth of any of its * children, or 0 if it has no children. * * @return The depth of this feature as described above. **/ public int depth() { return 0; } /** * Returns the index in the generating classifier's value list of this * feature's value. * * @return A non-negative integer index, or -1 if this feature is real or * doesn't have a value list. **/ public short getValueIndex() { return -1; } /** * Returns the total number of values this feature might possibly be set * to. * * @return Some integer greater than 1 iff this feature is a discrete * feature with a specified value list or a conjunctive feature * whose arguments have value lists, and 0 otherwise. **/ public short totalValues() { return 0; } /** * If this feature is an array feature, call this method to set its array * length; otherwise, this method has no effect. * * @param l The new length. **/ public void setArrayLength(int l) { } /** * Returns the strength of this feature if it were to be placed in a * mathematical vector space. **/ public abstract double getStrength(); /** * Return the feature that should be used to index this feature into a * lexicon. This method simply calls <code>getFeatureKey(lexicon, true, * -1)</code>. * * @see #getFeatureKey(Lexicon,boolean,int) * @param lexicon The lexicon into which this feature will be indexed. * @return A feature object appropriate for use as the key of a map. **/ public Feature getFeatureKey(Lexicon lexicon) { return getFeatureKey(lexicon, true, -1); } /** * Return the feature that should be used to index this feature into a * lexicon. * * @param lexicon The lexicon into which this feature will be indexed. * @param training Whether or not the learner is currently training. * @param label The label of the example containing this feature, or -1 * if we aren't doing per class feature counting. * @return A feature object appropriate for use as the key of a map. **/ public abstract Feature getFeatureKey(Lexicon lexicon, boolean training, int label); /** * Returns a {@link RealFeature} whose value is the strength of the current * feature, and whose <code>identifier</code> field contains all the * information necessary to distinguish this feature from other features. * When defining this method, <code>RealFeature</code>s may simply return * themselves. **/ public abstract RealFeature makeReal(); /** * Create a feature representing the conjunction of this feature with the * given argument feature. * * @param f The feature to conjunct with. * @param c The classifier producing the resulting feature. * @return A feature representing the conjunction of this feature and * <code>f</code>. **/ public abstract Feature conjunction(Feature f, Classifier c); /** * Create a feature representing the conjunction of this feature with the * given argument feature. * * @param f The feature to conjunct with. * @param c The classifier producing the resulting feature. * @return A feature representing the conjunction of this feature and * <code>f</code>. **/ protected Feature conjunctWith(DiscreteFeature f, Classifier c) { return new RealConjunctiveFeature(c, f, this); } /** * Create a feature representing the conjunction of this feature with the * given argument feature. * * @param f The feature to conjunct with. * @param c The classifier producing the resulting feature. * @return A feature representing the conjunction of this feature and * <code>f</code>. **/ protected Feature conjunctWith(RealFeature f, Classifier c) { return new RealConjunctiveFeature(c, f, this); } /** * Returns a new feature object that's identical to this feature except its * strength is given by <code>s</code>. * * @param s The strength of the new feature. * @return A new feature object as above, or <code>null</code> if this * feature cannot take the specified strength. **/ public abstract Feature withStrength(double s); /** * Returns a feature object in which any strings that are being used to * represent an identifier or value have been encoded in byte strings. * * @param e The encoding to use. * @return A feature object as above; possible this object. **/ public abstract Feature encode(String e); /** * Takes care of any feature-type-specific tasks that need to be taken care * of when removing a feature of this type from a {@link ChildLexicon}, in * particular updating parent counts and removing children of this feature * if necessary. * * @param lex The child lexicon this feature is being removed from. **/ public void removeFromChildLexicon(ChildLexicon lex) { } /** * Does a feature-type-specific lookup of this feature in the given * {@link ChildLexicon}. * * @param lex The child lexicon this feature is being looked up in. * @param label The label of the example containing this feature, or -1 if * we aren't doing per class feature counting. * @return The index of <code>f</code> in this lexicon. **/ public int childLexiconLookup(ChildLexicon lex, int label) { return lex.childLexiconLookup(this, label); } /** * The hash code of a <code>Feature</code> is a function of the hash codes * of {@link #containingPackage} and {@link #generatingClassifier}. * * @return The hash code of this <code>Feature</code>. **/ public int hashCode() { return 31 * containingPackage.hashCode() + generatingClassifier.hashCode(); } /** * Two <code>Feature</code>s are equal when their packages and generating * classifiers are equivalent. * * @return <code>true</code> iff the argument is an equivalent * <code>Feature</code>. **/ public boolean equals(Object o) { assert (getClass() == o.getClass()) == (getClass().getName().equals(o.getClass().getName())) : "getClass() doesn't behave as expected."; if (!(o instanceof Feature)) return false; Feature f = (Feature) o; if (getClass() != o.getClass() && !classEquivalent(f)) return false; assert !(f.containingPackage.equals(containingPackage) && f.containingPackage != containingPackage) : "Features \"" + f + "\" and \"" + this + " have equivalent package strings in different objects."; assert !(f.generatingClassifier.equals(generatingClassifier) && f.generatingClassifier != generatingClassifier) : "Features \"" + f + "\" and \"" + this + " have equivalent classifier name strings in different objects."; return f.containingPackage == containingPackage && f.generatingClassifier == generatingClassifier; } /** * Some features are functionally equivalent, differing only in the * encoding of their values; this method will return <code>true</code> iff * the class of this feature and <code>f</code> are different, but they * differ only because they encode their values differently. This method * does not compare the values themselves, however. * * @param f Another feature. * @return See above. **/ public boolean classEquivalent(Feature f) { return false; } /** * Used to sort features into an order that is convenient both to page * through and for the lexicon to read off disk. * * @param o An object to compare with. * @return Integers appropriate for sorting features first by package, then * by classifier name, and then by identifier. **/ public abstract int compareTo(Object o); /** * Compares only the run-time types, packages, classifier names, and * identifiers of the features. This method must be overridden in order to * accomplish the comparison of identifiers, but the overriding method will * still have the convenience of calling this method to accomplish the * majority of the work. * * @param o An object to compare with. * @return Integers appropriate for sorting features first by package, then * by classifier name, and then by identifier. **/ public int compareNameStrings(Object o) { int d = compareTypes(o); if (d != 0) return d; Feature f = (Feature) o; d = containingPackage.compareTo(f.containingPackage); if (d != 0) return d; return generatingClassifier.compareTo(f.generatingClassifier); } /** * Compares only the run-time types of the features. * * @param o An object to compare with. * @return Integers appropriate for sorting features by run-time type. **/ private int compareTypes(Object o) { if (!(o instanceof Feature)) return -1; Feature f = (Feature) o; boolean b1 = isDiscrete(); boolean b2 = f.isDiscrete(); int d = (b2 ? 1 : 0) - (b1 ? 1 : 0); if (d != 0) return d; int i1 = depth(); int i2 = f.depth(); d = i1 - i2; if (d != 0) return d; b1 = isReferrer(); b2 = f.isReferrer(); d = (b2 ? 1 : 0) - (b1 ? 1 : 0); if (d != 0) return d; b1 = fromArray(); b2 = f.fromArray(); d = (b1 ? 1 : 0) - (b2 ? 1 : 0); if (d != 0) return d; b1 = hasStringIdentifier(); b2 = f.hasStringIdentifier(); return (b1 ? 1 : 0) - (b2 ? 1 : 0); } /** * Writes a string representation of this <code>Feature</code> to the * specified buffer. * * @param buffer The buffer to write to. **/ public abstract void write(StringBuffer buffer); /** * Writes a string representation of this <code>Feature</code>'s package, * generating classifier, and sometimes identifier information to the * specified buffer. This method will need to be overridden to write the * identifier information, but at least the overriding method will have the * convenience of calling this method to accomplish most of the work first. * * @param buffer The buffer to write to. **/ public void writeNameString(StringBuffer buffer) { if (containingPackage != null && containingPackage.length() > 0) { buffer.append(containingPackage); buffer.append("."); } buffer.append(generatingClassifier); if (hasByteStringIdentifier()) buffer.append("|B|"); } /** * Writes a complete binary representation of the feature. * * @param out The output stream. **/ public void write(ExceptionlessOutputStream out) { out.writeString(getClass().getName()); out.writeString(containingPackage); out.writeString(generatingClassifier); } /** * Writes a string representation of this <code>Feature</code> to the * specified buffer, omitting the package name. * * @param buffer The buffer to write to. **/ public void writeNoPackage(StringBuffer buffer) { String p = containingPackage; containingPackage = null; write(buffer); containingPackage = p; } /** * Writes a binary representation of the feature intended for use by a * lexicon, omitting redundant information when possible. * * @param out The output stream. * @param lex The lexicon out of which this feature is being written. * @param c The fully qualified name of the assumed class. The runtime * class of this feature won't be written if it's equivalent to * <code>c</code>. * @param p The assumed package string. This feature's package string * won't be written if it's equivalent to <code>p</code>. * @param g The assumed classifier name string. This feature's * classifier name string won't be written if it's equivalent * to <code>g</code>. * @param si The assumed identifier as a string. If this feature has a * string identifier, it won't be written if it's equivalent to * <code>si</code>. * @param bi The assumed identifier as a byte string. If this feature * has a byte string identifier, it won't be written if it's * equivalent to <code>bi</code>. * @return The name of the runtime type of this feature. **/ public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, String g, String si, ByteString bi) { String clazz = getClass().getName(); out.writeString(clazz.equals(c) ? null : clazz); out.writeString(containingPackage == p ? null : containingPackage); out.writeString(generatingClassifier == g ? null : generatingClassifier); return clazz; } /** * Reads the binary representation of a feature of any type from the given * stream. The stream is expected to first return a string containing the * fully qualified class name of the feature. If the <i>short</i> value * <code>-1</code> appears instead, this method returns <code>null</code>. * * <p> This method is appropriate for reading features as written by * {@link #write(ExceptionlessOutputStream)}. * * @param in The input stream. * @return The feature read from the stream. **/ public static Feature readFeature(ExceptionlessInputStream in) { String name = in.readString(); if (name == null) return null; Class c = ClassUtils.getClass(name); Feature result = null; try { result = (Feature) c.newInstance(); } catch (Exception e) { System.err.println("Error instantiating feature '" + name + "':"); e.printStackTrace(); in.close(); System.exit(1); } result.read(in); return result; } /** * Reads the representation of a feature with this object's run-time type * from the given stream, overwriting the data in this object. * * <p> This method is appropriate for reading features as written by * {@link #write(ExceptionlessOutputStream)}. * * @param in The input stream. **/ public void read(ExceptionlessInputStream in) { containingPackage = in.readString().intern(); generatingClassifier = in.readString().intern(); } /** * Reads the representation of a feature of any type as stored by a * lexicon, omitting redundant information. * * <p> This method is appropriate for reading features as written by * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. * * @param in The input stream. * @param lex The lexicon we are reading in to. * @param c The assumed class. If no class name is given in the input * stream, a feature of this type is instantiated. * @param p The assumed package string. If no package name is given in * the input stream, the instantiated feature is given this * package. * @param g The assumed classifier name string. If no classifier name * is given in the input stream, the instantiated feature is * given this classifier name. * @param si The assumed identifier as a string. If the feature being * read has a string identifier field and no identifier is * given in the input stream, the feature is given this * identifier. * @param bi The assumed identifier as a byte string. If the feature * being read has a byte string identifier field and no * identifier is given in the input stream, the feature is * given this identifier. * @return The feature read from the stream. **/ public static Feature lexReadFeature(ExceptionlessInputStream in, Lexicon lex, Class c, String p, String g, String si, ByteString bi) { String name = in.readString(); if (name != null) c = ClassUtils.getClass(name); Feature result = null; try { result = (Feature) c.newInstance(); } catch (Exception e) { System.err.println("Error instantiating feature '" + name + "':"); e.printStackTrace(); in.close(); System.exit(1); } result.lexRead(in, lex, p, g, si, bi); return result; } /** * Reads the representation of a feature with this object's run-time type * as stored by a lexicon, overwriting the data in this object. * * <p> This method is appropriate for reading features as written by * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. * * @param in The input stream. * @param lex The lexicon we are reading in to. * @param p The assumed package string. If no package name is given in * the input stream, the instantiated feature is given this * package. * @param g The assumed classifier name string. If no classifier name * is given in the input stream, the instantiated feature is * given this classifier name. * @param si The assumed identifier as a string. If the feature being * read has a string identifier field and no identifier is * given in the input stream, the feature is given this * identifier. * @param bi The assumed identifier as a byte string. If the feature * being read has a byte string identifier field and no * identifier is given in the input stream, the feature is * given this identifier. **/ public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, ByteString bi) { containingPackage = in.readString(); if (containingPackage == null) containingPackage = p; else containingPackage = containingPackage.intern(); generatingClassifier = in.readString(); if (generatingClassifier == null) generatingClassifier = g; else generatingClassifier = generatingClassifier.intern(); } /** Returns a string representation of this <code>Feature</code>. */ public String toString() { StringBuffer result = new StringBuffer(); write(result); return result.toString(); } /** * Returns a string representation of this <code>Feature</code> omitting * the package. **/ public String toStringNoPackage() { StringBuffer result = new StringBuffer(); writeNoPackage(result); return result.toString(); } /** Returns a shallow clone of this <code>Feature</code>. */ public Object clone() { Object result = null; try { result = super.clone(); } catch (Exception e) { System.err.println("Can't clone feature '" + this + "':"); e.printStackTrace(); } return result; } /** * Special handling during deserialization to ensure that * <code>Strings</code> are <code>intern()</code>ed. * * @param in The stream to deserialize from. **/ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); containingPackage = containingPackage.intern(); generatingClassifier = generatingClassifier.intern(); } }