package ecologylab.bigsemantics.model.text; import ecologylab.serialization.annotations.simpl_inherit; /*** * * @author rhema * Extends Term allowing one to detach the term from a vector * and change it's score arbitrarily. This is useful * in the context of normalizing terms. */ @simpl_inherit public class TermWithScore extends Term { private double score; private double tf; private double idf; public double getIdf() { return idf; } public double getTf() { return tf; } public double getScore() { return score; } public void setScore(double score) { this.score = score; } public TermWithScore() { super(); } public static int TF_SCORE_TYPE = 0; public static int IDF_SCORE_TYPE = 1; public static int TF_IDF_SCORE_TYPE = 2; public static int TF_IDF_DIST_SCORE_TYPE = 3; public static int IF_IDF_CLOSENESS_SCORE_TYPE = 4; double getScore(int scoreType, Term t, double tf, double farness)//dist is between 0 and 1 with 0 as close and 1 as far { double idf = TermDictionary.getTermForWord(t.getWord()).idf(); if(idf == 0) idf = TermDictionary.averageIDF; double returnValue = tf*Math.pow(idf, -20*farness + 10); if(returnValue == Double.NaN) { debug(" Nan value for idf "+idf+" and tf "+tf); return 0; } return returnValue; } public static int SCORE_TYPE = 0; public TermWithScore(Term t, double score) { this(t,score,OrderedNormalizedTermVectorCache.TF_ONLY); } public TermWithScore(Term t, double score, double dfBonus) { this.setWord(t.getWord()); tf = score; idf = TermDictionary.getTermForWord(t.getWord()).idf(); this.score = getScore(SCORE_TYPE, t, score, dfBonus); } @Override public String toString() { return this.getWord() + ":" + score; } @Override public int compareTo(Term o) { TermWithScore convertedTerm = (TermWithScore) o; double difference = this.getScore() - convertedTerm.getScore(); if (difference > 0) return -1; else return (difference == 0) ? this.getWord().compareTo(convertedTerm.getWord()) : 1; } }