/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.preprocessing.filter;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.util.attribute.*;
import org.carrot2.util.attribute.constraint.DoubleRange;
/**
* A filter that removes "incomplete" labels.
* <p>
* See <a href="http://project.carrot2.org/publications/osinski-2003-lingo.pdf">this
* document</a>, page 31 for a definition of a complete phrase.
*/
@Bindable(prefix = "CompleteLabelFilter")
public class CompleteLabelFilter implements ILabelFilter
{
/**
* Remove truncated phrases. Tries to remove "incomplete" cluster labels. For example,
* in a collection of documents related to <i>Data Mining</i>, the phrase
* <i>Conference on Data</i> is incomplete in a sense that most likely it should be
* <i>Conference on Data Mining</i> or even <i>Conference on Data Mining in Large
* Databases</i>. When truncated phrase removal is enabled, the algorithm would try to
* remove the "incomplete" phrases like the former one and leave only the more
* informative variants.
*/
@Input
@Processing
@Attribute
@Label("Remove truncated phrases")
@Level(AttributeLevel.BASIC)
@Group(DefaultGroups.LABELS)
public boolean enabled = true;
/**
* Truncated label threshold. Determines the strength of the truncated label filter.
* The lowest value means strongest truncated labels elimination, which may lead to
* overlong cluster labels and many unclustered documents. The highest value
* effectively disables the filter, which may result in short or truncated labels.
*/
@Input
@Processing
@Attribute
@DoubleRange(min = 0.0, max = 1.0)
@Label("Truncated label threshold")
@Level(AttributeLevel.ADVANCED)
@Group(DefaultGroups.LABELS)
public double labelOverrideThreshold = 0.65;
/**
* Left complete label filter.
*/
private LeftCompleteLabelFilter leftCompleteLabelFilter = new LeftCompleteLabelFilter();
/**
* Right complete label filter.
*/
private RightCompleteLabelFilter rightCompleteLabelFilter = new RightCompleteLabelFilter();
/**
* Marks incomplete labels.
*/
public void filter(PreprocessingContext context, boolean [] acceptedStems,
boolean [] acceptedPhrases)
{
if (!enabled)
{
return;
}
leftCompleteLabelFilter.filter(context, acceptedStems, acceptedPhrases,
labelOverrideThreshold);
rightCompleteLabelFilter.filter(context, acceptedStems, acceptedPhrases,
labelOverrideThreshold);
}
public boolean isEnabled()
{
return enabled;
}
}