package edu.stanford.nlp.ling; import java.io.DataInputStream; import java.io.DataOutputStream; /** * A WordTag corresponds to a tagged (e.g., for part of speech) word * and is implemented with String-valued word and tag. It implements * the Label interface; the <code>value()</code> method for that * interface corresponds to the word of the WordTag. * <p/> * The equality relation for WordTag is defined as identity of both * word and tag. Note that this is different from * <code>TaggedWord</code>, for which equality derives from * <code>ValueLabel</code> and requires only identity of value. * * @author Roger Levy */ public class WordTag implements Label, HasWord, HasTag, Comparable<WordTag> { private String word; private String tag; private static final String DIVIDER = "/"; /** * Create a new <code>WordTag</code>. * * @param word This word is passed to the supertype constructor * @param tag The <code>value()</code> of this label is set as the * tag of this Label */ public WordTag(String word, String tag) { setWord(word); setTag(tag); } public WordTag(String word) { this(word, null); } public <E extends Label & HasTag> WordTag(E word) { this(word.value(), word.tag()); } private WordTag() { } // only used internally for doing setFromString() /** * Create a new <code>WordTag</code> from a Label. The value of * the Label corresponds to the word of the WordTag. * * @param word The <code>value()</code> of this label is set as the * word of the <code>WordTag</code> * @param tag The <code>value()</code> of this label is set as the * tag of the <code>WordTag</code> */ public WordTag(Label word, Label tag) { this(word.value(), tag.value()); } public static WordTag valueOf(String s) { WordTag result = new WordTag(); result.setFromString(s); return result; } public static WordTag valueOf(String s, String tagDivider) { WordTag result = new WordTag(); result.setFromString(s, tagDivider); return result; } /** * Return a String representation of just the "main" value of this label. * * @return the "value" of the label */ public String value() { return word; } public String word() { return value(); } /** * Set the value for the label (if one is stored). * * @param value - the value for the label */ public void setValue(String value) { word = value; } public String tag() { return tag; } public void setWord(String word) { setValue(word); } public void setTag(String tag) { this.tag = tag; } /** * Return a String representation of the label. For a multipart label, * this will return all parts. The <code>toString()</code> method * causes a label to spill its guts. It should always return an * empty string rather than <code>null</code> if there is no value. * * @return a text representation of the full label contents */ @Override public String toString() { return toString(DIVIDER); } public String toString(String divider) { String tag = tag(); if (tag == null) { return word(); } else { return word() + divider + tag; } } /** * Sets a WordTag from decoding * the <code>String</code> passed in. The String is divided according * to the divider character (usually, "/"). We assume that we can * always just * divide on the rightmost divider character, rather than trying to * parse up escape sequences. If the divider character isn't found * in the word, then the whole string becomes the word, and the tag * is <code>null</code>. * * @param wordTagString The word that will go into the <code>Word</code> */ @Override public void setFromString(String wordTagString) { setFromString(wordTagString, DIVIDER); } public void setFromString(String wordTagString, String divider) { int where = wordTagString.lastIndexOf(divider); if (where >= 0) { setWord(wordTagString.substring(0, where).intern()); setTag(wordTagString.substring(where + 1).intern()); } else { setWord(wordTagString.intern()); setTag(null); } } /** A WordTag is equal only to another WordTag with the same word and tag values. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof WordTag)) return false; final WordTag wordTag = (WordTag) o; if (tag != null ? !tag.equals(wordTag.tag) : wordTag.tag != null) return false; if (word != null ? !word.equals(wordTag.word) : wordTag.word != null) return false; return true; } @Override public int hashCode() { int result; result = (word != null ? word.hashCode() : 0); result = 29 * result + (tag != null ? tag.hashCode() : 0); return result; } /** * Orders first by word, then by tag. * * @param wordTag object to compare to * @return result (positive if <code>this</code> is greater than * <code>obj</code>, 0 if equal, negative otherwise) */ public int compareTo(WordTag wordTag) { int first = (word != null ? word().compareTo(wordTag.word()) : 0); if(first != 0) return first; else { if (tag() == null) { if (wordTag.tag() == null) return 0; else return -1; } return tag().compareTo(wordTag.tag()); } } // extra class guarantees correct lazy loading (Bloch p.194) private static class LabelFactoryHolder { private static final LabelFactory lf = new WordTagFactory(); } /** * Return a factory for this kind of label * (i.e., <code>TaggedWord</code>). * The factory returned is always the same one (a singleton). * * @return The label factory */ public LabelFactory labelFactory() { return LabelFactoryHolder.lf; } /** * Return a factory for this kind of label. * * @return The label factory */ public static LabelFactory factory() { return LabelFactoryHolder.lf; } public void read(DataInputStream in) { try { word = in.readUTF(); tag = in.readUTF(); } catch (Exception e) { e.printStackTrace(); } } public void save(DataOutputStream out) { try { out.writeUTF(word); out.writeUTF(tag); } catch (Exception e) { e.printStackTrace(); } } private static final long serialVersionUID = -1859527239216813742L; }