/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing.filter; import org.carrot2.core.attribute.Processing; import org.carrot2.text.preprocessing.PreprocessingContext; import org.carrot2.util.attribute.*; /** * Accepts labels that do not end in words in the Saxon Genitive form (e.g. "Threatening * the Country's"). */ @Bindable(prefix = "GenitiveLabelFilter") public class GenitiveLabelFilter extends SingleLabelFilterBase { /** * Remove labels ending in genitive form. Removes labels that do end in words * in the Saxon Genitive form (e.g. "Threatening the Country's"). */ @Input @Processing @Attribute @Label("Remove labels ending in genitive form") @Level(AttributeLevel.BASIC) @Group(DefaultGroups.LABELS) public boolean enabled = true; private final static char [][] ENDINGS = new char [] [] { "'s".toCharArray(), "`s".toCharArray(), "s'".toCharArray(), "s`".toCharArray() }; @Override public boolean acceptPhrase(PreprocessingContext context, int phraseIndex) { final int [] wordIndices = context.allPhrases.wordIndices[phraseIndex]; return isGenitive(context.allWords.image, wordIndices[wordIndices.length - 1]); } @Override public boolean acceptWord(PreprocessingContext context, int wordIndex) { return isGenitive(context.allWords.image, wordIndex); } private boolean isGenitive(char [][] wordImage, final int wordIndex) { char [] image = wordImage[wordIndex]; outer: for (char [] ending : ENDINGS) { if (image.length >= ending.length) { for (int i = 0; i < ending.length; i++) { if (image[image.length - ending.length + i] != ending[i]) { continue outer; } } return false; } } return true; } public boolean isEnabled() { return enabled; } }