/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.preprocessing;
import org.carrot2.util.tests.CarrotTestCase;
import org.junit.Before;
import org.junit.Test;
/**
* Language-independent test cases for {@link LanguageModelStemmer}.
*/
public class StemmerEnglishTest extends CarrotTestCase
{
PreprocessingContextBuilder contextBuilder;
// @formatter:off
@Before
public void prepareContextBuilder()
{
contextBuilder = new PreprocessingContextBuilder();
}
@Test
public void testLowerCaseWords()
{
PreprocessingContextAssert a = contextBuilder
.newDoc("data mining", "data mining")
.buildContextAssert();
a.constainsStem("data").withTf(2).withDocumentTf(0, 2).withFieldIndices(0, 1);
a.constainsStem("mine").withTf(2).withDocumentTf(0, 2).withFieldIndices(0, 1);
assertThat(a.context.allStems.image.length).isEqualTo(2);
assertThat(a.tokens()).onProperty("stemImage")
.containsExactly("data", "mine", null,
"data", "mine", null);
}
@Test
public void testUpperCaseWords()
{
PreprocessingContextAssert a = contextBuilder
.newDoc("DATA MINING", "DATA MINING")
.buildContextAssert();
a.constainsStem("data").withTf(2).withDocumentTf(0, 2).withFieldIndices(0, 1);
a.constainsStem("mine").withTf(2).withDocumentTf(0, 2).withFieldIndices(0, 1);
assertThat(a.context.allStems.image.length).isEqualTo(2);
assertThat(a.tokens()).onProperty("stemImage")
.containsExactly("data", "mine", null,
"data", "mine", null);
}
@Test
public void testMixedCaseWords()
{
PreprocessingContextAssert a = contextBuilder
.newDoc("DATA MINING Data Mining", "Data Mining Data Mining")
.buildContextAssert();
a.constainsStem("data").withTf(4).withDocumentTf(0, 4).withFieldIndices(0, 1);
a.constainsStem("mine").withTf(4).withDocumentTf(0, 4).withFieldIndices(0, 1);
assertThat(a.context.allStems.image.length).isEqualTo(2);
assertThat(a.tokens()).onProperty("stemImage")
.containsExactly("data", "mine", "data", "mine", null,
"data", "mine", "data", "mine", null);
}
// @formatter:on
}