Token.java example

Explorer

distiller-CORE-master
- src
  - main
    - java
      - it
        uniud
        ailab
        dcore
        Blackboard.java
        DistilledOutput.java
        Distiller.java
        DistillerException.java
        DistillerFactory.java
        Pipeline.java
        Stage.java
        annotation
        Annotable.java
        Annotation.java
        AnnotationException.java
        Annotator.java
        DefaultAnnotations.java
        annotations
        CoreferenceChainAnnotation.java
        FeatureAnnotation.java
        InferenceAnnotation.java
        NERAnnotation.java
        ScoredAnnotation.java
        TextAnnotation.java
        UriAnnotation.java
        annotators
        ChunkingNerAnnotator.java
        CoreferenceResolverAnnotator.java
        DocumentPhraseMaximalityAnnotator.java
        GenericEvaluatorAnnotator.java
        GenericNGramGeneratorAnnotator.java
        GenericWikipediaAnnotator.java
        GramMergerAnnotator.java
        ItalianLemmatizerAnnotator.java
        LinearEvaluatorAnnotator.java
        PorterStemmerAnnotator.java
        RawTdidfAnnotator.java
        RegexNGramGeneratorAnnotator.java
        SimpleAnnotationFilterAnnotator.java
        SimpleCutFilterAnnotator.java
        SimpleNGramGeneratorAnnotator.java
        SkylineGramFilterAnnotator.java
        StatisticalAnnotator.java
        StopwordSimpleFilterAnnotator.java
        SyuzhetAnnotator.java
        TagMeGramAnnotator.java
        TagMeTokenAnnotator.java
        WikipediaInferenceAnnotator.java
        eval
        Evaluator.java
        GenericDataset.java
        TrainingSetGenerator.java
        datasets
        SemEval2010.java
        kp
        KeyphraseEvaluator15.java
        KeyphraseEvaluatorAll.java
        training
        KeyphraseTrainingSetGenerator.java
        io
        CsvPrinter.java
        FileWriterStage.java
        GenericSheetPrinter.java
        GramPrinter.java
        IOBlackboard.java
        SentencePrinter.java
        TokenPrinter.java
        launchers
        Launcher.java
        SampleInference.java
        SimpleKE.java
        StanfordKE.java
        persistence
        DocumentComponent.java
        DocumentComposite.java
        Gram.java
        Keyphrase.java
        Mention.java
        Sentence.java
        Token.java
        utils
        BlackboardUtils.java
        DocumentUtils.java
        Either.java
        FileSystem.java
        GramUtils.java
        ListUtils.java
        Pair.java
        SnowballStemmerSelector.java
        StageUtils.java
        WikipediaUtils.java
        wrappers
        external
        CybozuLanguageDetectorAnnotator.java
        OpenNlpBootstrapperAnnotator.java
        RCallerEvaluator.java
        StanfordBootstrapperAnnotator.java
        StanfordFastBootstrapperAnnotator.java
  - test
    - java
      - test.java

/*
 * Copyright (C) 2015 Artificial Intelligence
 * Laboratory @ University of Udine.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package it.uniud.ailab.dcore.persistence;

import it.uniud.ailab.dcore.annotation.Annotable;
import it.uniud.ailab.dcore.annotation.Annotation;
import it.uniud.ailab.dcore.annotation.annotations.TextAnnotation;

/**
 * The smallest object of the Distiller, which represents a component of a sentence
 * (in most cases, a word).
 */
public class Token extends Annotable {

    /**
     * The stem of the word.
     */
    private String stem;
    
    /**
     * The part-of-speech tag of the word.
     */
    private String PoS;
    
    /**
     * The lemmatized form of the token, that is obtained removing inflectional 
     * endings only and returning the base or dictionary form of a word.
     */
    private String lemma;

    /**
     * Creates a token.
     * 
     * @param text the text of the token.
     */
    public Token(String text){
        super(text);
    }
    
    // <editor-fold desc="Id, stem and PoS">
    /**
     * Set the stem of the token; if you don't have a stemmer for a certain
     * language, the lemmatized version of the word should work fine as well.
     * 
     * @param stem the stemmed token
     */
    public void setStem(String stem) {
        this.stem = stem;
    }
    
    /**
     * Set the POS tag of the token.
     * 
     * @param PoS the POS tag.
     */
    public void setPoS(String PoS) {
        this.PoS = PoS;
    }   
    
    /**
     * Set the lemmatize form of the token.
     * 
     * @param lemma the lemma for the token. 
     */
    public void setLemma(String lemma){
        this.lemma = lemma;
    }
    
    /**
     * Returns the text of the token.
     * 
     * @return the text of the token.
     */
    public String getText() {
        return super.getIdentifier();
    }

    /**
     * Returns the stem of the token.
     * 
     * @return the stem of the token.
     */
    public String getStem() {
        return stem;
    }
    
    /**
     * Returns the POS tag of the token.
     * 
     * @return the POS tag of the token.
     */
    public String getPoS() {
        return PoS;
    }    
    
    /**
     * Returns the lemmatize form of the token.
     * 
     * @return the lemma for token. 
     */
    public String getLemma(){
        return lemma;
    }
    // </editor-fold>
    
    
    // <editor-fold desc="Annotations">    
    /**
     * Gets all the annotations associated with the token that have been
     * generated by a specific annotator.
     * 
     * @param annotator the identifier of an annotator.
     * @return the annotations generated by the specified annotator.
     */
//    public List<TextAnnotation> getAnnotations(String annotator) {
//        List<TextAnnotation> ret = new ArrayList<>();
//        for (TextAnnotation ann : this.getAnnotations())
//        {
//            if (ann.getAnnotator().equals(annotator))  {
//                ret.add(ann);
//            }
//        }
//        return ret;
//    }
    
    /**
     * Check if the token has been annotated by a given annotator. Please note
     * that to retrieve all the annotations generated by an annotator you should
     * use getAnnotations() instead.
     * 
     * @param annotator the identifier of an annotator.
     * @return the first annotation in the list generated by the given annotator.
     */
//    public TextAnnotation hasAnnotation(String annotator) {
//        TextAnnotation a = null;
//        for (TextAnnotation b : this.getAnnotations())
//        {
//            if (b.getAnnotator().equals(annotator))  {
//                a = b;
//                break;
//            }
//        }
//        return a;
//    }
    // </editor-fold>
    
    /**
     * A full string representation of the token, which returns not only the text,
     * but also the stem and the annotations of the token.
     * 
     * @return 
     */
    @Override
    public String toString() {
        String ret = getText() + " {(POS:" + getPoS() + "), " 
                + "(Lemma: " + getLemma() + "), "
                + "(Stem: " +getStem()+ ")";
        for (Annotation a : getAnnotations()) {
            if (a instanceof TextAnnotation)
                ret = ret + ", (" + a.getAnnotator() + ":" + 
                        ((TextAnnotation) a).getAnnotation() + ")";
        }
        return ret + "}";
    }            

    /**
     * Two tokens are equal if they have the same text, stem and POS tag. 
     * Tokens with different annotation may just refer to same word in different
     * sentences; while the annotations are different, the word is the same.
     * 
     * For example, "Engineering" per se and the word "Engineering" in
     * "Software Engineering" should be treated as equal, even if they may
     * be annotated with different Wikipedia entities.
     * 
     * @param obj the token to compare with
     * @return true if the tokens are equal, false otherwise
     */
    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final Token other = (Token) obj;
        if (!getText().equals(other.getText())) {
            return false;
        }
        if (!stem.equals(other.stem)) {
            return false;
        }
        return PoS.equals(other.PoS);
    }

    @Override
    public String getIdentifier() {
        return getText();
    }

   
}