/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.linguistic; import java.io.IOException; import org.carrot2.core.LanguageCode; import org.carrot2.core.Platform; import org.carrot2.text.analysis.ITokenizer; import org.carrot2.text.linguistic.lucene.ThaiTokenizerAdapter; import org.junit.Assume; import org.junit.Test; /** * Test cases for {@link ThaiTokenizerAdapter}. Test strings taken from Lucene's * TestThaiAnalyzer. */ public class ThaiTokenizerTest extends TokenizerTestBase { @Override protected ITokenizer createTokenStream() throws IOException { return new DefaultTokenizerFactory().getTokenizer(LanguageCode.THAI); } @Test public void testThaiTermTokens() { Assume.assumeTrue(Platform.getPlatform() != Platform.DOTNET); Assume.assumeTrue(ThaiTokenizerAdapter.platformSupportsThai()); assertEqualTokens( "การที่ได้ต้องแสดงว่างานดี", tokens(ITokenizer.TT_TERM, "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี")); } @Test public void testThaiEnglishTermTokens() { Assume.assumeTrue(Platform.getPlatform() != Platform.DOTNET); Assume.assumeTrue(ThaiTokenizerAdapter.platformSupportsThai()); assertEqualTokens("ประโยคว่า The quick brown", tokens(ITokenizer.TT_TERM, "ประโยค", "ว่า", "The", "quick", "brown")); } @Test public void testNumericTokens() { Assume.assumeTrue(Platform.getPlatform() != Platform.DOTNET); Assume.assumeTrue(ThaiTokenizerAdapter.platformSupportsThai()); assertEqualTokens("๑๒๓", tokens(ITokenizer.TT_TERM, "๑๒๓")); } }