/* * File: LogLocalTermWeighter.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright April 20, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.term.vector.weighter.local; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.math.matrix.SparseVectorFactory; import gov.sandia.cognition.math.matrix.Vector; import gov.sandia.cognition.math.matrix.VectorEntry; import gov.sandia.cognition.math.matrix.VectorFactory; /** * Implements the log-based local term weighting scheme. Takes in a vector of * term counts and for positive entries, it makes the weight log(1 + count). * Counts of zero (or less) are weighted as zero. * * @author Justin Basilico * @since 3.0 */ @PublicationReference( author={"Susan T. Dumais"}, title="Improving the retrieval of information from external sources", year=1991, type=PublicationType.Journal, publication="Behavior Research Methods, Instruments, and Computers", pages={229, 236}, url="http://www.google.com/url?sa=t&source=web&ct=res&cd=1&url=http%3A%2F%2Fwww.psychonomic.org%2Fsearch%2Fview.cgi%3Fid%3D5145&ei=o7joSdGEHY-itgPLre3tAQ&usg=AFQjCNEvm6PZEL6_Hk3XThI6DQ-gGx9EnQ&sig2=-gjFzNroJQirwGtwjaJvgQ" ) public class LogLocalTermWeighter extends AbstractLocalTermWeighter { /** * Creates a new {@code LogLocalTermWeighter}. */ public LogLocalTermWeighter() { this(SparseVectorFactory.getDefault()); } /** * Creates a new {@code LogLocalTermWeighter}. * * @param vectorFactory * The vector factory to use. */ public LogLocalTermWeighter( final VectorFactory<? extends Vector> vectorFactory) { super(vectorFactory); } public Vector computeLocalWeights( final Vector counts) { // Create the vector to hold the result. final Vector result = this.getVectorFactory().copyVector(counts); // Compute the local weight, which is log(1 + count) for each term. for (VectorEntry entry : result) { final double value = entry.getValue(); if (value > 0.0) { entry.setValue(Math.log(1.0 + value)); } else if (value != 0.0) { entry.setValue(0.0); } } return result; } }