/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing.filter; import org.carrot2.core.attribute.Processing; import org.carrot2.text.analysis.ITokenizer; import org.carrot2.text.preprocessing.PreprocessingContext; import org.carrot2.util.attribute.Attribute; import org.carrot2.util.attribute.AttributeLevel; import org.carrot2.util.attribute.Bindable; import org.carrot2.util.attribute.DefaultGroups; import org.carrot2.util.attribute.Group; import org.carrot2.util.attribute.Input; import org.carrot2.util.attribute.Label; import org.carrot2.util.attribute.Level; /** * Accepts labels that do not consist only of query words. */ @Bindable(prefix = "QueryLabelFilter") public class QueryLabelFilter extends SingleLabelFilterBase { /** * Remove query words. Removes labels that consist only of words contained in the * query. */ @Input @Processing @Attribute @Label("Remove query words") @Level(AttributeLevel.BASIC) @Group(DefaultGroups.LABELS) public boolean enabled = true; @Override public boolean acceptPhrase(PreprocessingContext context, int phraseIndex) { final int [] wordIndices = context.allPhrases.wordIndices[phraseIndex]; final short [] flag = context.allWords.type; for (int i = 0; i < wordIndices.length; i++) { if (!isQueryWord(flag[wordIndices[i]])) { return true; } } return false; } @Override public boolean acceptWord(PreprocessingContext context, int wordIndex) { return !isQueryWord(context.allWords.type[wordIndex]); } private final boolean isQueryWord(short flag) { return (flag & ITokenizer.TF_QUERY_WORD) != 0; } public boolean isEnabled() { return enabled; } }