/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing; import org.carrot2.text.linguistic.ILexicalDataFactory; import org.carrot2.text.linguistic.IStemmerFactory; import org.carrot2.text.preprocessing.filter.CompleteLabelFilter; import org.junit.Before; /** * Test cases for {@link CompleteLabelFilter}. */ public class LabelFilterTestBase extends PreprocessingComponentTestBase { /** Filter processor under tests */ protected LabelFilterProcessor labelFilterProcessor; /** Other preprocessing components required for the test */ private Tokenizer tokenizer; private CaseNormalizer caseNormalizer; private LanguageModelStemmer languageModelStemmer; private PhraseExtractor phraseExtractor; private StopListMarker stopListMarker; @Before public void setUpPreprocessingComponents() { tokenizer = new Tokenizer(); caseNormalizer = new CaseNormalizer(); languageModelStemmer = new LanguageModelStemmer(); phraseExtractor = new PhraseExtractor(); stopListMarker = new StopListMarker(); labelFilterProcessor = new LabelFilterProcessor(); // Disable all filters by default. Tests will enable the filters they need. labelFilterProcessor.minLengthLabelFilter.enabled = false; labelFilterProcessor.queryLabelFilter.enabled = false; labelFilterProcessor.numericLabelFilter.enabled = false; labelFilterProcessor.stopWordLabelFilter.enabled = false; labelFilterProcessor.completeLabelFilter.enabled = false; initializeFilters(labelFilterProcessor); } protected void initializeFilters(LabelFilterProcessor filterProcessor) { } protected void check(int [] expectedLabelsFeatureIndex) { check(expectedLabelsFeatureIndex, -1); } protected void check(int [] expectedLabelsFeatureIndex, int expectedFirstPhraseIndex) { runPreprocessing(); assertThat(context.allLabels.featureIndex).as("allLabels.featureIndex") .isEqualTo(expectedLabelsFeatureIndex); assertThat(context.allLabels.firstPhraseIndex).as("allLabels.firstPhraseIndex") .isEqualTo(expectedFirstPhraseIndex); } protected void runPreprocessing() { tokenizer.tokenize(context); caseNormalizer.normalize(context); languageModelStemmer.stem(context); phraseExtractor.extractPhrases(context); stopListMarker.mark(context); labelFilterProcessor.process(context); } @Override protected ILexicalDataFactory createLexicalDataFactory() { return new TestLexicalDataFactory(); } @Override protected IStemmerFactory createStemmerFactory() { return new TestStemmerFactory(); } }