/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.preprocessing.filter;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.linguistic.ILexicalData;
import org.carrot2.text.preprocessing.LabelFormatter;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.util.attribute.*;
/**
* Accepts labels that are not declared as stop labels in the stoplabels.<lang>
* files.
*/
@Bindable(prefix = "StopLabelFilter")
public class StopLabelFilter extends SingleLabelFilterBase
{
/**
* Remove stop labels. Removes labels that are declared as stop labels in the
* stoplabels.<lang> files. Please note that adding a long list of regular
* expressions to the stoplabels file may result in a noticeable performance penalty.
*/
@Input
@Processing
@Attribute
@Label("Remove stop labels")
@Level(AttributeLevel.BASIC)
@Group(DefaultGroups.LABELS)
public boolean enabled = true;
/*
*
*/
public final LabelFormatter labelFormatter = new LabelFormatter();
/*
*
*/
public ILexicalData lexicalData;
@Override
public void filter(PreprocessingContext context, boolean [] acceptedStems,
boolean [] acceptedPhrases)
{
lexicalData = context.language.getLexicalData();
super.filter(context, acceptedStems, acceptedPhrases);
}
@Override
public boolean acceptPhrase(PreprocessingContext context, int phraseIndex)
{
final String formatedLabel = labelFormatter.format(context, phraseIndex
+ context.allWords.image.length);
return !lexicalData.isStopLabel(formatedLabel);
}
@Override
public boolean acceptWord(PreprocessingContext context, int wordIndex)
{
final String formattedLabel = labelFormatter.format(context, wordIndex);
return !lexicalData.isStopLabel(formattedLabel);
}
public boolean isEnabled()
{
return enabled;
}
}