/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.text.linguistic;
import java.io.IOException;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.lucene.HindiStemmerAdapter;
import org.junit.Test;
/**
* Test cases for {@link HindiStemmerAdapter}. Test strings taken from Lucene's
* TestThaiAnalyzer.
*/
public class HindiStemmerFactoryTest extends TokenizerTestBase
{
@Override
protected ITokenizer createTokenStream() throws IOException
{
return new DefaultTokenizerFactory().getTokenizer(LanguageCode.HINDI);
}
@Test
public void testTokens()
{
assertEqualTokens(
"डाटा को कई जगह पर foobar",
new TokenImage [] {
term("डाटा"),
term("को"),
term("कई"),
term("जगह"),
term("पर"),
term("foobar"),
});
assertEqualTokens(
"रिडनडेंसी कहलाता है । डाटा माइनिंग",
new TokenImage [] {
term("रिडनडेंसी"),
term("कहलाता"),
term("है"),
sentenceDelimiter("।"),
term("डाटा"),
term("माइनिंग"),
});
}
}