package weka.deduping.metrics;
import java.util.*;
/** A lightweight object for storing information about a token (a.k.a word, term)
* in an inverted index.
*
* @author Ray Mooney
*/
public class TokenInfo
{
/** The IDF (inverse document frequency) factor for this token
* which indicates how much to weight an occurence. Tokens that
* appear in many documents are not very discriminative and therefore
* weighted less. */
public double idf;
/** A list of TokenOccurences giving documents where this
* token occurs */
public ArrayList occList;
/** Create an initially empty data structure */
public TokenInfo() {
occList = new ArrayList();
idf = 0.0;
}
}