/* * File: CommonTermWeighterFactory.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright April 20, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.term.vector.weighter; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.text.term.vector.weighter.local.LogLocalTermWeighter; import gov.sandia.cognition.text.term.vector.weighter.local.TermFrequencyLocalTermWeighter; import gov.sandia.cognition.text.term.vector.weighter.global.DominanceGlobalTermWeighter; import gov.sandia.cognition.text.term.vector.weighter.global.EntropyGlobalTermWeighter; import gov.sandia.cognition.text.term.vector.weighter.global.InverseDocumentFrequencyGlobalTermWeighter; import gov.sandia.cognition.text.term.vector.weighter.normalize.UnitTermWeightNormalizer; /** * A factory for well-known weighting schemes. * * @author Justin Basilico * @since 3.0 */ public class CommonTermWeighterFactory extends Object { /** * Private constructor for utility class. */ private CommonTermWeighterFactory() { super(); } /** * Creates a term-frequency (TF) weighting scheme. No global weight or * normalizer is used. * * @return * A new TF-IDF weighter. */ public static CompositeLocalGlobalTermWeighter createTFWeighter() { return new CompositeLocalGlobalTermWeighter( new TermFrequencyLocalTermWeighter(), null, null); } /** * Creates a term-frequency inverse-document-frequency (TF-IDF) weighting * scheme but without any normalization. * * @return * A new TF-IDF weighter. */ @PublicationReference( author = "Wikipedia", title = "tf-idf", type = PublicationType.WebPage, url = "http://en.wikipedia.org/wiki/tf-idf", year = 2009 ) public static CompositeLocalGlobalTermWeighter createTFIDFWeighter() { return new CompositeLocalGlobalTermWeighter( new TermFrequencyLocalTermWeighter(), new InverseDocumentFrequencyGlobalTermWeighter(), null); } /** * Creates a term-frequency inverse-document-frequency (TF-IDF) weighting * scheme with unit vector normalization (2-norm). * * @return * A new TF-IDF weighter. */ public static CompositeLocalGlobalTermWeighter createTFIDFWeighterWithUnitNormalization() { return new CompositeLocalGlobalTermWeighter( new TermFrequencyLocalTermWeighter(), new InverseDocumentFrequencyGlobalTermWeighter(), new UnitTermWeightNormalizer()); } /** * Creates a log-entropy weighting scheme. * * @return * A new log-entropy weighter. */ @PublicationReference( author={"Susan T. Dumais"}, title="Improving the retrieval of information from external sources", year=1991, type=PublicationType.Journal, publication="Behavior Research Methods, Instruments, and Computers", pages={229, 236}, url="http://www.google.com/url?sa=t&source=web&ct=res&cd=1&url=http%3A%2F%2Fwww.psychonomic.org%2Fsearch%2Fview.cgi%3Fid%3D5145&ei=o7joSdGEHY-itgPLre3tAQ&usg=AFQjCNEvm6PZEL6_Hk3XThI6DQ-gGx9EnQ&sig2=-gjFzNroJQirwGtwjaJvgQ" ) public static CompositeLocalGlobalTermWeighter createLogEntropyWeighter() { return new CompositeLocalGlobalTermWeighter( new LogLocalTermWeighter(), new EntropyGlobalTermWeighter(), null); } /** * Creates a log-dominance weighting scheme. * * @return * A new log-dominance weighter. */ public static CompositeLocalGlobalTermWeighter createLogDominanceWeighter() { return new CompositeLocalGlobalTermWeighter( new LogLocalTermWeighter(), new DominanceGlobalTermWeighter(), null); } }