package edu.stanford.nlp.ling;
import edu.stanford.nlp.process.Morphology;
/**
* A WordLemmaTag corresponds to a pair of a tagged (e.g., for part of speech)
* word and its lemma. WordLemmaTag is implemented with String-valued word,
* lemma and tag.
* It implements the Label interface; the {@code value()} method for that
* interface corresponds to the word of the WordLemmaTag.
* <p/>
* The equality relation for WordLemmaTag is defined as identity of
* word, lemma and tag.
*
* @author Marie-Catherine de Marneffe
*/
public class WordLemmaTag implements Label, Comparable<WordLemmaTag>, HasWord, HasTag {
private String word;
private String lemma;
private String tag;
private static final String DIVIDER = "/";
public WordLemmaTag(String word) {
this.word = word;
this.lemma = null;
setTag(null);
}
public WordLemmaTag(Label word) {
this(word.value());
}
public WordLemmaTag() {
}
/**
* Create a new {@code WordLemmaTag}.
*
* @param word This word is set as the word of this Label
* @param tag The {@code value()} of this Label is set as the
* tag of this Label
*/
public WordLemmaTag(String word, String tag) {
WordTag wT = new WordTag(word, tag);
this.word = word;
this.lemma = Morphology.stemStatic(wT).word();
setTag(tag);
}
/**
* Create a new {@code WordLemmaTag}.
*
* @param word This word is passed to the supertype constructor
* @param lemma The lemma is set as the lemma of this Label
* @param tag The {@code value()} of this Label is set as the
* tag of this Label
*/
public WordLemmaTag(String word, String lemma, String tag) {
this(word);
this.lemma = lemma;
setTag(tag);
}
/**
* Create a new {@code WordLemmaTag} from a Label. The value of
* the Label corresponds to the word of the WordLemmaTag.
*
* @param word This word is passed to the supertype constructor
* @param tag The {@code value()} of this Label is set as the
* tag of this Label
*/
public WordLemmaTag(Label word, Label tag) {
this(word);
WordTag wT = new WordTag(word, tag);
this.lemma = Morphology.stemStatic(wT).word();
setTag(tag.value());
}
/**
* Return a String representation of just the "main" value of this Label.
*
* @return the "value" of the Label
*/
@Override
public String value() {
return word;
}
@Override
public String word() {
return value();
}
/**
* Set the value for the Label.
*
* @param value the value for the Label
*/
@Override
public void setValue(String value) {
word = value;
}
@Override
public void setWord(String word) {
setValue(word);
}
public void setLemma(String lemma) {
this.lemma = lemma;
}
/**
* Set the tag for the Label.
*
* @param tag the value for the Label
*/
@Override
public final void setTag(String tag) {
this.tag = tag;
}
@Override
public String tag() {
return tag;
}
public String lemma() {
return lemma;
}
/**
* Return a String representation of the Label. For a multipart Label,
* this will return all parts.
*
* @return a text representation of the full label contents: word/lemma/tag
*/
@Override
public String toString() {
return toString(DIVIDER);
}
public String toString(String divider) {
return word() + divider + lemma + divider + tag;
}
/**
* The String is divided according to the divider character (usually, "/").
* We assume that we can always just divide on the rightmost divider character,
* rather than trying to parse up escape sequences. If the divider character isn't found
* in the word, then the whole string becomes the word, and lemma and tag
* are {@code null}.
* We assume that if only one divider character is found, word and tag are present in
* the String, and lemma will be computed.
*
* @param labelStr The word that will go into the {@code WordLemmaTag}
*/
@Override
public void setFromString(String labelStr) {
setFromString(labelStr, DIVIDER);
}
public void setFromString(String labelStr, String divider) {
int first = labelStr.indexOf(divider);
int second = labelStr.lastIndexOf(divider);
if (first == second) {
setWord(labelStr.substring(0, first));
setTag(labelStr.substring(first + 1));
setLemma(Morphology.lemmaStatic(labelStr.substring(0, first), labelStr.substring(first + 1)));
} else if (first >= 0) {
setWord(labelStr.substring(0, first));
setLemma(labelStr.substring(first + 1, second));
setTag(labelStr.substring(second + 1));
} else {
setWord(labelStr);
setLemma(null);
setTag(null);
}
}
/**
* Equality is satisfied only if the compared object is a WordLemmaTag
* and has String-equal word, lemma and tag fields.
*/
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof WordLemmaTag)) return false;
final WordLemmaTag other = (WordLemmaTag) o;
return word().equals(other.word()) && lemma().equals(other.lemma()) &&
tag().equals(other.tag());
}
@Override
public int hashCode() {
int result;
result = (word != null ? word.hashCode() : 3);
result = 29 * result + (tag != null ? tag.hashCode() : 0);
result = 29 * result + (lemma != null ? lemma.hashCode() : 0);
return result;
}
/**
* Orders first by word, then by lemma, then by tag.
*
* @param wordLemmaTag object to compare to
* @return result (positive if {@code this} is greater than
* {@code obj}, 0 if equal, negative otherwise)
*/
@Override
public int compareTo(WordLemmaTag wordLemmaTag) {
int first = word().compareTo(wordLemmaTag.word());
if (first != 0)
return first;
int second = lemma().compareTo(wordLemmaTag.lemma());
if (second != 0)
return second;
else
return tag().compareTo(wordLemmaTag.tag());
}
/**
* Return a factory for this kind of label
* (i.e., {@code TaggedWord}).
* The factory returned is always the same one (a singleton).
*
* @return The label factory
*/
@Override
public LabelFactory labelFactory() {
return new WordLemmaTagFactory();
}
/*for debugging only*/
public static void main(String[] args) {
WordLemmaTag wLT = new WordLemmaTag();
wLT.setFromString("hunter/NN");
System.out.println(wLT.word());
System.out.println(wLT.lemma());
System.out.println(wLT.tag());
WordLemmaTag wLT2 = new WordLemmaTag();
wLT2.setFromString("bought/buy/V");
System.out.println(wLT2.word());
System.out.println(wLT2.lemma());
System.out.println(wLT2.tag());
WordLemmaTag wLT3 = new WordLemmaTag();
wLT2.setFromString("life");
System.out.println(wLT3.word());
System.out.println(wLT3.lemma());
System.out.println(wLT3.tag());
}
private static final long serialVersionUID = -5993410244163988138L;
}