/* * File: TermFrequencyLocalTermWeighter.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright April 20, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.term.vector.weighter.local; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.math.matrix.SparseVectorFactory; import gov.sandia.cognition.math.matrix.Vector; import gov.sandia.cognition.math.matrix.VectorFactory; /** * Local weighting for term frequency. The input is assumed to be a vector of * the number of times a term appears in the document. If n_i,j is the number of * times term i appears in document j, the term frequency for term i in document * j is: * * tf_(i,j) = n_(i,j) / (sum_k n_(k, j) * * @author Justin Basilico * @since 3.0 */ @PublicationReference ( author="Wikipedia", title="tf-idf", type=PublicationType.WebPage, url="http://en.wikipedia.org/wiki/tf-idf", year=2009 ) public class TermFrequencyLocalTermWeighter extends AbstractLocalTermWeighter { /** * Creates a new {@code TermFrequencyLocalTermWeighter}. */ public TermFrequencyLocalTermWeighter() { this(SparseVectorFactory.getDefault()); } /** * Creates a new {@code LogLocalTermWeighter}. * * @param vectorFactory * The vector factory to use. */ public TermFrequencyLocalTermWeighter( final VectorFactory<? extends Vector> vectorFactory) { super(vectorFactory); } public Vector computeLocalWeights( final Vector counts) { // Since the counts are positive, the 1-norm of them is their sum. final Vector result = this.vectorFactory.copyVector(counts); final double countSum = counts.norm1(); if (countSum != 0.0) { result.scaleEquals(1.0 / countSum); } return result; } }