/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing.filter; import org.carrot2.core.attribute.Processing; import org.carrot2.text.preprocessing.PreprocessingContext; import org.carrot2.util.attribute.*; /** * Accepts labels whose length in characters is greater or equal to the provided value. */ @Bindable(prefix = "MinLengthLabelFilter") public class MinLengthLabelFilter extends SingleLabelFilterBase { /** * Remove labels shorter than 3 characters. Removes labels whose total length in * characters, including spaces, is less than 3. */ @Input @Processing @Attribute @Label("Remove short labels") @Level(AttributeLevel.BASIC) @Group(DefaultGroups.LABELS) public boolean enabled = true; private final static int MIN_LENGTH = 3; @Override public boolean acceptPhrase(PreprocessingContext context, int phraseIndex) { final int [] wordIndices = context.allPhrases.wordIndices[phraseIndex]; char [][] wordImage = context.allWords.image; int wordIndex = 0; int length = wordImage[wordIndices[wordIndex++]].length; while (length < MIN_LENGTH && wordIndex < wordIndices.length) { length += wordImage[wordIndices[wordIndex]].length + 1 /* space */; wordIndex++; } return length >= MIN_LENGTH; } @Override public boolean acceptWord(PreprocessingContext context, int wordIndex) { return context.allWords.image[wordIndex].length >= MIN_LENGTH; } public boolean isEnabled() { return enabled; } }