package edu.stanford.nlp.ling; import edu.stanford.nlp.util.CoreMap; /** * This class is mainly for use with RTE in terms of the methods it provides, * but on a more general level, it provides a {@link CoreLabel} that uses its * DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement * Comparable/compareTo, hashCode, and equals. This means no other annotations, * including the identity of the word, are taken into account when using these * methods. * * @author rafferty * */ public class IndexedWord extends CoreLabel implements Comparable<IndexedWord> { private static final long serialVersionUID = 3739633991145239829L; /** * The identifier that points to no word. */ public static final IndexedWord NO_WORD = new IndexedWord(null, -1, -1); /** * Various printing options for toString */ public static final String WORD_FORMAT = "WORD_FORMAT"; public static final String WORD_TAG_FORMAT = "WORD_TAG_FORMAT"; public static final String WORD_TAG_INDEX_FORMAT = "WORD_TAG_INDEX_FORMAT"; public static final String VALUE_FORMAT = "VALUE_FORMAT"; public static final String COMPLETE_FORMAT = "COMPLETE_FORMAT"; private static String printFormat = WORD_TAG_FORMAT; /** * Default constructor; uses {@link CoreLabel} default constructor */ public IndexedWord() { super(); } /** * Copy Constructor - relies on {@link CoreLabel} copy constructor * It will set the value, and if the word is not set otherwise, set * the word to the value. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(Label w) { super(w); if (this.word() == null) this.setWord(this.value()); } /** * Construct an IndexedWord from a CoreLabel just as for a CoreMap. * <i>Implementation note:</i> this is a the same as the constructor * that takes a CoreMap, but is needed to ensure unique most specific * type inference for selecting a constructor at compile-time. * * @param w A Label to initialize this IndexedWord from */ public IndexedWord(CoreLabel w) { this((CoreMap) w); } /** * Copy Constructor - relies on {@link CoreLabel} copy constructor * @param w A Label to initialize this IndexedWord from */ public IndexedWord(CoreMap w) { super(w); if (this.word() == null) this.setWord(this.value()); } /** * Constructor for setting docID, sentenceIndex, and * index without any other annotations. * * @param docID The document ID (arbitrary string) * @param sentenceIndex The sentence number in the document (normally 0-based) * @param index The index of the word in the sentence (normally 0-based) */ public IndexedWord(String docID, int sentenceIndex, int index) { super(); this.set(CoreAnnotations.DocIDAnnotation.class, docID); this.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex); this.set(CoreAnnotations.IndexAnnotation.class, index); } /** * Copies the given label and then sets the docID, sentenceIndex, * and Index; if these differ from those in label, the parameters * will be used (not the label values). * * @param docID The document ID (arbitrary string) * @param sentenceIndex The sentence number in the document (normally 0-based) * @param index The index of the word in the sentence (normally 0-based) * @param label The CoreLabel to initialize all other fields from. */ public IndexedWord(String docID, int sentenceIndex, int index, CoreLabel label) { this(label); this.set(CoreAnnotations.DocIDAnnotation.class, docID); this.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex); this.set(CoreAnnotations.IndexAnnotation.class, index); } /** * This .equals is dependent only on docID, sentenceIndex, and index. * It doesn't consider the actual word value, but assumes that it is * validly represented by token position. * All IndexedWords that lack these fields will be regarded as equal. */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof IndexedWord)) return false; //now compare on appropriate keys final IndexedWord otherWord = (IndexedWord) o; String myDocID = getString(CoreAnnotations.DocIDAnnotation.class); String otherDocID = otherWord.getString(CoreAnnotations.DocIDAnnotation.class); if (myDocID == null) { if (otherDocID != null) return false; } else if ( ! myDocID.equals(otherDocID)) { return false; } Integer mySentInd = get(CoreAnnotations.SentenceIndexAnnotation.class); Integer otherSentInd = otherWord.get(CoreAnnotations.SentenceIndexAnnotation.class); if (mySentInd == null) { if (otherSentInd != null) return false; } else if ( ! mySentInd.equals(otherSentInd)) { return false; } Integer myInd = get(CoreAnnotations.IndexAnnotation.class); Integer otherInd = otherWord.get(CoreAnnotations.IndexAnnotation.class); if (myInd == null) { if (otherInd != null) return false; } else if ( ! myInd.equals(otherInd)) { return false; } return true; } /** * This hashCode uses only the docID, sentenceIndex, and index. * See compareTo for more info. */ @Override public int hashCode() { boolean sensible = false; int result = 0; if (get(CoreAnnotations.DocIDAnnotation.class) != null) { result = get(CoreAnnotations.DocIDAnnotation.class).hashCode(); sensible = true; } if (has(CoreAnnotations.SentenceIndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.SentenceIndexAnnotation.class).hashCode(); sensible = true; } if (has(CoreAnnotations.IndexAnnotation.class)) { result = 29 * result + get(CoreAnnotations.IndexAnnotation.class).hashCode(); sensible = true; } if ( ! sensible) { System.err.println("WARNING!!! You have hashed an IndexedWord with no docID, sentIndex or wordIndex. You will almost certainly lose"); } return result; } /** * NOTE: This compareTo is based on and made to be compatible with the one * from IndexedFeatureLabel. You <em>must</em> have a DocIDAnnotation, * SentenceIndexAnnotation, and IndexAnnotation for this to make sense and * be guaranteed to work properly. Currently, it won't error out and will * try to return something sensible if these are not defined, but that really * isn't proper usage! * * This compareTo method is based not by value elements like the word(), * but on passage position. It puts NO_WORD elements first, and then orders * by document, sentence, and word index. If these do not differ, it * returns equal. * * @param w The IndexedWord to compare with * @return Whether this is less than w or not in the ordering */ public int compareTo(IndexedWord w) { if (this.equals(IndexedWord.NO_WORD)) { if (w.equals(IndexedWord.NO_WORD)) { return 0; } else { return -1; } } if (w.equals(IndexedWord.NO_WORD)) { return 1; } String docID = this.getString(CoreAnnotations.DocIDAnnotation.class); int docComp = docID.compareTo(w.getString(CoreAnnotations.DocIDAnnotation.class)); if (docComp != 0) return docComp; int sentComp = sentIndex() - w.sentIndex(); if (sentComp != 0) return sentComp; return index() - w.index(); } /** * Computes the toString based on whatever the printFormat is * currently set as. */ @Override public String toString() { return toString(printFormat); } public static void setPrintFormat(String printFormat) { IndexedWord.printFormat = printFormat; } /** * Prints the toString in the form of format. * * @param format One of the constants defined for this class. (You must use * one of these constants, because the Strings are compared by ==.) * @return A printed representation */ public String toString(String format) { if (this.equals(NO_WORD)) return "NO_WORD"; StringBuilder result = new StringBuilder(); // word if (format == WORD_FORMAT || format == WORD_TAG_FORMAT || format == WORD_TAG_INDEX_FORMAT) { result.append(word()); // tag if (format == WORD_TAG_FORMAT || format == WORD_TAG_INDEX_FORMAT) { String tag = tag(); if (tag != null && tag.length() != 0) { result.append('-').append(tag); } // index if (format == WORD_TAG_INDEX_FORMAT) { result.append('-').append(sentIndex()).append(':').append(index()); } } // value format } else if (format == VALUE_FORMAT) { result.append(value()); if (index() >= 0) { result.append(':').append(index()); } } else { return super.toString(); } return result.toString(); } public static LabelFactory factory() { return new LabelFactory() { public Label newLabel(String labelStr) { IndexedWord label = new IndexedWord(); label.setValue(labelStr); return label; } public Label newLabel(String labelStr, int options) { return newLabel(labelStr); } public Label newLabel(Label oldLabel) { return new IndexedWord(oldLabel); } public Label newLabelFromString(String encodedLabelStr) { throw new UnsupportedOperationException("This code branch left blank" + " because we do not understand what this method should do."); } }; } /** * {@inheritDoc} */ @Override public LabelFactory labelFactory() { return IndexedWord.factory(); } }