/* * Copyright (C) 2015 Artificial Intelligence * Laboratory @ University of Udine. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package it.uniud.ailab.dcore.persistence; import it.uniud.ailab.dcore.annotation.Annotable; import it.uniud.ailab.dcore.annotation.Annotation; import it.uniud.ailab.dcore.annotation.annotations.TextAnnotation; /** * The smallest object of the Distiller, which represents a component of a sentence * (in most cases, a word). */ public class Token extends Annotable { /** * The stem of the word. */ private String stem; /** * The part-of-speech tag of the word. */ private String PoS; /** * The lemmatized form of the token, that is obtained removing inflectional * endings only and returning the base or dictionary form of a word. */ private String lemma; /** * Creates a token. * * @param text the text of the token. */ public Token(String text){ super(text); } // <editor-fold desc="Id, stem and PoS"> /** * Set the stem of the token; if you don't have a stemmer for a certain * language, the lemmatized version of the word should work fine as well. * * @param stem the stemmed token */ public void setStem(String stem) { this.stem = stem; } /** * Set the POS tag of the token. * * @param PoS the POS tag. */ public void setPoS(String PoS) { this.PoS = PoS; } /** * Set the lemmatize form of the token. * * @param lemma the lemma for the token. */ public void setLemma(String lemma){ this.lemma = lemma; } /** * Returns the text of the token. * * @return the text of the token. */ public String getText() { return super.getIdentifier(); } /** * Returns the stem of the token. * * @return the stem of the token. */ public String getStem() { return stem; } /** * Returns the POS tag of the token. * * @return the POS tag of the token. */ public String getPoS() { return PoS; } /** * Returns the lemmatize form of the token. * * @return the lemma for token. */ public String getLemma(){ return lemma; } // </editor-fold> // <editor-fold desc="Annotations"> /** * Gets all the annotations associated with the token that have been * generated by a specific annotator. * * @param annotator the identifier of an annotator. * @return the annotations generated by the specified annotator. */ // public List<TextAnnotation> getAnnotations(String annotator) { // List<TextAnnotation> ret = new ArrayList<>(); // for (TextAnnotation ann : this.getAnnotations()) // { // if (ann.getAnnotator().equals(annotator)) { // ret.add(ann); // } // } // return ret; // } /** * Check if the token has been annotated by a given annotator. Please note * that to retrieve all the annotations generated by an annotator you should * use getAnnotations() instead. * * @param annotator the identifier of an annotator. * @return the first annotation in the list generated by the given annotator. */ // public TextAnnotation hasAnnotation(String annotator) { // TextAnnotation a = null; // for (TextAnnotation b : this.getAnnotations()) // { // if (b.getAnnotator().equals(annotator)) { // a = b; // break; // } // } // return a; // } // </editor-fold> /** * A full string representation of the token, which returns not only the text, * but also the stem and the annotations of the token. * * @return */ @Override public String toString() { String ret = getText() + " {(POS:" + getPoS() + "), " + "(Lemma: " + getLemma() + "), " + "(Stem: " +getStem()+ ")"; for (Annotation a : getAnnotations()) { if (a instanceof TextAnnotation) ret = ret + ", (" + a.getAnnotator() + ":" + ((TextAnnotation) a).getAnnotation() + ")"; } return ret + "}"; } /** * Two tokens are equal if they have the same text, stem and POS tag. * Tokens with different annotation may just refer to same word in different * sentences; while the annotations are different, the word is the same. * * For example, "Engineering" per se and the word "Engineering" in * "Software Engineering" should be treated as equal, even if they may * be annotated with different Wikipedia entities. * * @param obj the token to compare with * @return true if the tokens are equal, false otherwise */ @Override public boolean equals(Object obj) { if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } final Token other = (Token) obj; if (!getText().equals(other.getText())) { return false; } if (!stem.equals(other.stem)) { return false; } return PoS.equals(other.PoS); } @Override public String getIdentifier() { return getText(); } }