/*
* File: NormalizedLogLocalTermWeighter.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright April 22, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.text.term.vector.weighter.local;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.math.matrix.SparseVectorFactory;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
/**
* Implements a normalized version of the log local weighter. It is similar
* to the {@code LogLocalWeighter} except that it normalizes by the average
* frequency. It takes in a vector of term counts and for positive entries, it
* makes the weight log(1 + count) / log(1 + average) where average is the
* average count across the whole document. Counts of zero (or less) are
* weighted as zero.
*
* @author Justin Basilico
* @since 3.0
*/
@PublicationReference(
author={"Erica Chisholm", "Tamara G. Kolda"},
title="New Term Weighting Formulas for the Vector Space Method in Information Retrieval",
type=PublicationType.TechnicalReport,
url="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.40.3899",
year=1999,
notes="We use a slight modification of the formula for normalizing in this paper."
)
public class NormalizedLogLocalTermWeighter
extends LogLocalTermWeighter
{
/**
* Creates a new {@code NormalizedLogLocalTermWeighter}.
*/
public NormalizedLogLocalTermWeighter()
{
this(SparseVectorFactory.getDefault());
}
/**
* Creates a new {@code NormalizedLogLocalTermWeighter}
*
* @param vectorFactory
* The vector factory to use.
*/
public NormalizedLogLocalTermWeighter(
final VectorFactory<? extends Vector> vectorFactory)
{
super(vectorFactory);
}
@Override
public Vector computeLocalWeights(
final Vector counts)
{
// Compute the local weights.
final Vector result = super.computeLocalWeights(counts);
final int dimensionality = result.getDimensionality();
if (dimensionality != 0)
{
final double average = counts.norm1() / dimensionality;
final double divisor = Math.log(1.0 + average);
result.scaleEquals(1.0 / divisor);
}
return result;
}
}