package LBJ2.classify; import LBJ2.learn.Lexicon; import LBJ2.util.ByteString; import LBJ2.util.ExceptionlessInputStream; import LBJ2.util.ExceptionlessOutputStream; /** * This feature is functionally equivalent to * {@link DiscretePrimitiveFeature}, however its {@link #value} is stored as * a <code>String</code> instead of a {@link ByteString}. Discrete * classifiers return features of this type (or * {@link DiscreteConjunctiveFeature}s or {@link DiscreteReferringFeature}s * that contain features of this type). Before storing these features in a * lexicon, however, they are converted to {@link DiscretePrimitiveFeature}s * using the specified encoding. * * @author Nick Rizzolo **/ public class DiscretePrimitiveStringFeature extends DiscreteFeature { /** * The <code>identifier</code> string distinguishes this * <code>Feature</code> from other <code>Feature</code>s. **/ protected String identifier; /** The discrete value is represented as a string. */ protected String value; /** * For internal use only. * * @see Feature#readFeature(ExceptionlessInputStream) **/ protected DiscretePrimitiveStringFeature() { } /** * Sets both the identifier and the value. The value index and total * allowable values, having not been specified, default to -1 and 0 * respectively. * * @param p The new discrete feature's package. * @param c The name of the classifier that produced this feature. * @param i The new discrete feature's identifier. * @param v The new discrete feature's value. **/ public DiscretePrimitiveStringFeature(String p, String c, String i, String v) { this(p, c, i, v, (short) -1, (short) 0); } /** * Sets the identifier, value, value index, and total allowable values. * * @param p The new discrete feature's package. * @param c The name of the classifier that produced this feature. * @param i The new discrete feature's identifier. * @param v The new discrete feature's value. * @param vi The index corresponding to the value. * @param t The total allowable values for this feature. **/ public DiscretePrimitiveStringFeature(String p, String c, String i, String v, short vi, short t) { super(p, c, vi, t); identifier = i; value = v; } /** * Determines if this feature contains a string identifier field. * * @return <code>true</code> iff this feature contains a string identifier * field. **/ public boolean hasStringIdentifier() { return true; } /** * Determines if this feature is primitive. * * @return <code>true</code> iff this is primitive. **/ public boolean isPrimitive() { return true; } /** * Retrieves this feature's identifier as a string. * * @return This feature's identifier as a string. **/ public String getStringIdentifier() { return identifier; } /** * Retrieves this feature's identifier as a byte string. * * @return This feature's identifier as a byte string. **/ public ByteString getByteStringIdentifier() { return new ByteString(identifier); } /** * Gives a string representation of the value of this feature. * * @return {@link #value}. **/ public String getStringValue() { return value; } /** * Gives a string representation of the value of this feature. * * @return The byte string encoding of {@link #value}. **/ public ByteString getByteStringValue() { return new ByteString(value); } /** * Determines whether or not the parameter is equivalent to the string * representation of the value of this feature. * * @param v The string to compare against. * @return <code>true</code> iff the parameter is equivalent to the string * representation of the value of this feature. **/ public boolean valueEquals(String v) { return v.equals(value); } /** * Return the feature that should be used to index this feature into a * lexicon. If it is a binary feature, we return the feature with an empty * value so that the feature will be mapped to the same weight whether it * is active or not. If the feature can take multiple values, then simply * return the feature object as-is. * * @param lexicon The lexicon into which this feature will be indexed. * @param training Whether or not the learner is currently training. * @param label The label of the example containing this feature, or -1 * if we aren't doing per class feature counting. * @return A feature object appropriate for use as the key of a map. **/ public Feature getFeatureKey(Lexicon lexicon, boolean training, int label) { if (totalValues() == 2) return new DiscretePrimitiveStringFeature( containingPackage, generatingClassifier, identifier, "", (short) -1, (short) 2); return this; } /** * Returns a {@link RealPrimitiveFeature} whose * {@link RealPrimitiveFeature#value value} field is set to the strength of * the current feature, and whose {@link #identifier} field contains all * the information necessary to distinguish this feature from other * features. **/ public RealFeature makeReal() { if (totalValues == 2) return new RealPrimitiveStringFeature( containingPackage, generatingClassifier, identifier, valueIndex); else { StringBuffer id = new StringBuffer(identifier); id.append('_'); id.append(value); return new RealPrimitiveStringFeature( containingPackage, generatingClassifier, id.toString(), 1); } } /** * Returns a new feature object that's identical to this feature except its * strength is given by <code>s</code>. * * @param s The strength of the new feature. * @return A new feature object as above, or <code>null</code> if this * feature cannot take the specified strength. **/ public Feature withStrength(double s) { if (totalValues != 2 || !(s == 0 || s == 1)) return null; return new DiscretePrimitiveStringFeature( containingPackage, generatingClassifier, identifier, "", (short) Math.round(s), (short) 2); } /** * Returns a feature object in which any strings that are being used to * represent an identifier or value have been encoded in byte strings. * * @param e The encoding to use. * @return A feature object as above; possible this object. **/ public Feature encode(String e) { if (e == null || e == "String") return this; ByteString id = identifier.length() == 0 ? ByteString.emptyString : new ByteString(identifier, e); return new DiscretePrimitiveFeature(containingPackage, generatingClassifier, id, new ByteString(value, e), valueIndex, totalValues); } /** * The hash code of a <code>DiscretePrimitiveStringFeature</code> is the * sum of the hash codes of its containing package, identifier, and value. * * @return The hash code of this feature. **/ public int hashCode() { return 31 * super.hashCode() + 17 * identifier.hashCode() + value.hashCode(); } /** * Two <code>DiscretePrimitiveStringFeature</code>s are equivalent when * their containing packages, identifiers, and values are equivalent. * * @param o The object with which to compare this feature. * @return <code>true</code> iff the parameter is an equivalent feature. **/ public boolean equals(Object o) { if (!super.equals(o)) return false; if (o instanceof DiscretePrimitiveStringFeature) { DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; return identifier.equals(f.identifier) && valueIndex > -1 ? valueIndex == f.valueIndex : value.equals(f.value); } DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o; return f.identifier.equals(identifier) && valueIndex > -1 ? valueIndex == f.valueIndex : f.value.equals(value); } /** * Some features are functionally equivalent, differing only in the * encoding of their values; this method will return <code>true</code> iff * the class of this feature and <code>f</code> are different, but they * differ only because they encode their values differently. This method * does not compare the values themselves, however. * * @param f Another feature. * @return See above. **/ public boolean classEquivalent(Feature f) { return f instanceof DiscretePrimitiveFeature; } /** * Used to sort features into an order that is convenient both to page * through and for the lexicon to read off disk. * * @param o An object to compare with. * @return Integers appropriate for sorting features first by package, then * by identifier, then by value. **/ public int compareTo(Object o) { int d = compareNameStrings(o); if (d != 0) return d; DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o; d = identifier.compareTo(f.identifier); if (d != 0) return d; return value.compareTo(f.value); } /** * Writes a string representation of this <code>Feature</code> to the * specified buffer. * * @param buffer The buffer to write to. **/ public void write(StringBuffer buffer) { writeNameString(buffer); buffer.append("("); buffer.append(value); buffer.append(")"); } /** * Writes a string representation of this <code>Feature</code>'s package, * generating classifier, and identifier information to the specified * buffer. * * @param buffer The buffer to write to. **/ public void writeNameString(StringBuffer buffer) { super.writeNameString(buffer); buffer.append(":"); buffer.append(identifier); } /** * Writes a complete binary representation of the feature. * * @param out The output stream. **/ public void write(ExceptionlessOutputStream out) { super.write(out); out.writeString(identifier); out.writeString(value); } /** * Reads the representation of a feaeture with this object's run-time type * from the given stream, overwriting the data in this object. * * @param in The input stream. **/ public void read(ExceptionlessInputStream in) { super.read(in); identifier = in.readString(); value = in.readString(); } /** * Writes a binary representation of the feature intended for use by a * lexicon, omitting redundant information when possible. * * @param out The output stream. * @param lex The lexicon out of which this feature is being written. * @param c The fully qualified name of the assumed class. The runtime * class of this feature won't be written if it's equivalent to * <code>c</code>. * @param p The assumed package string. This feature's package string * won't be written if it's equivalent to <code>p</code>. * @param g The assumed classifier name string. This feature's * classifier name string won't be written if it's equivalent * to <code>g</code>. * @param si The assumed identifier as a string. If this feature has a * string identifier, it won't be written if it's equivalent to * <code>si</code>. * @param bi The assumed identifier as a byte string. If this feature * has a byte string identifier, it won't be written if it's * equivalent to <code>bi</code>. * @return The name of the runtime type of this feature. **/ public String lexWrite(ExceptionlessOutputStream out, Lexicon lex, String c, String p, String g, String si, ByteString bi) { String result = super.lexWrite(out, lex, c, p, g, si, bi); out.writeString(identifier.equals(si) ? null : identifier); out.writeString(value); return result; } /** * Reads the representation of a feature with this object's run-time type * as stored by a lexicon, overwriting the data in this object. * * <p> This method is appropriate for reading features as written by * {@link #lexWrite(ExceptionlessOutputStream,Lexicon,String,String,String,String,ByteString)}. * * @param in The input stream. * @param lex The lexicon we are reading in to. * @param p The assumed package string. If no package name is given in * the input stream, the instantiated feature is given this * package. * @param g The assumed classifier name string. If no classifier name * is given in the input stream, the instantiated feature is * given this classifier name. * @param si The assumed identifier as a string. If the feature being * read has a string identifier field and no identifier is * given in the input stream, the feature is given this * identifier. * @param bi The assumed identifier as a byte string. If the feature * being read has a byte string identifier field and no * identifier is given in the input stream, the feature is * given this identifier. **/ public void lexRead(ExceptionlessInputStream in, Lexicon lex, String p, String g, String si, ByteString bi) { super.lexRead(in, lex, p, g, si, bi); identifier = in.readString(); if (identifier == null) identifier = si; value = in.readString(); } }