/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.linguistic; import org.carrot2.core.LanguageCode; import org.carrot2.text.util.MutableCharArray; import org.carrot2.util.tests.CarrotTestCase; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.*; /** * Superclass for testing {@link LanguageModel}s. */ public abstract class LanguageModelTestBase extends CarrotTestCase { /** * */ protected LanguageModel languageModel; /** * @return Returns language code for this test. */ protected abstract LanguageCode getLanguageCode(); /** * */ @Before public void setupLanguage() { this.languageModel = LanguageModel.create(getLanguageCode(), new DefaultStemmerFactory(), new DefaultTokenizerFactory(), new DefaultLexicalDataFactory()); } /** * */ @Test public void testStemmerAvailable() { assertNotNull(languageModel.getStemmer()); assertThat(languageModel.getStemmer().getClass()).as("Stemmer class for: " + languageModel.getLanguageCode()) .isNotEqualTo(IdentityStemmer.class); } /** * */ @Test public void testLanguageCode() { assertEquals(getLanguageCode(), languageModel.getLanguageCode()); } /** * */ @Test public void testStemming() { final String [][] testData = getStemmingTestData(); final IStemmer stemmer = languageModel.getStemmer(); for (String [] pair : testData) { CharSequence stemmed = stemmer.stem(pair[0]); assertEquals("Stemming difference: " + pair[0] + " should become " + pair[1] + " but was transformed into " + stemmed, pair[1], stemmed == null ? null : stemmed.toString()); } } /** * */ @Test public void testCommonWords() { final String [] testData = getCommonWordsTestData(); for (String word : testData) { assertTrue(languageModel.getLexicalData().isCommonWord( new MutableCharArray(word))); } } /** * Override and provide word pairs for {@link LanguageModel#getStemmer()} tests. * Sample data should follow this format: * * <pre> * return new String [] [] * { * { * "inflected", "base" * }, * { * "inflected", "base" * }, * }; * </pre> */ protected String [][] getStemmingTestData() { return new String [] [] { /* Empty by default. */ }; } /** * Override and provide words for testing against * {@link ILexicalData#isCommonWord(MutableCharArray)}). */ protected String [] getCommonWordsTestData() { return new String [] { /* Empty by default. */ }; } }