package tv.dyndns.kishibe.qmaclone.server.relevance;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Mockito.when;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.runners.MockitoJUnitRunner;
import org.mockito.stubbing.Answer;
import tv.dyndns.kishibe.qmaclone.server.testing.QMACloneTestEnv;
import com.google.guiceberry.junit4.GuiceBerryRule;
import com.google.inject.Inject;
@RunWith(MockitoJUnitRunner.class)
public class ViterbiTokenizerTest {
@Rule
public final GuiceBerryRule rule = new GuiceBerryRule(QMACloneTestEnv.class);
@Inject
private WordSegmenter wordSegmenter;
private ViterbiAnalyzer viterbiAnalyzer;
@Mock
private ViterbiTokenizer.Factory viterbiTokenizerFactory;
@Before
public void setUp() throws Exception {
viterbiAnalyzer = new ViterbiAnalyzer(viterbiTokenizerFactory);
}
@Test
public final void testVitabiTokenizer() throws Exception {
String s = "「Google」で2020年完成予定の 人工知能で会話しつつ検索などを行うサービスを 「Google ○○○○○」という? BRAIN";
StringReader reader = new StringReader(s);
when(viterbiTokenizerFactory.create(any(Reader.class))).then(new Answer<ViterbiTokenizer>() {
@Override
public ViterbiTokenizer answer(InvocationOnMock invocation) throws Throwable {
Reader reader = (Reader) invocation.getArguments()[0];
return new ViterbiTokenizer(wordSegmenter, reader);
}
});
try (Analyzer a = viterbiAnalyzer;
TokenStream ts = a.tokenStream("default", reader)) {
CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
Set<String> words = new HashSet<String>();
ts.reset();
while (ts.incrementToken()) {
words.add(termAttribute.toString());
}
ts.end();
assertTrue(words.contains("google"));
assertTrue(words.contains("人工知能"));
assertTrue(words.contains("検索"));
assertTrue(words.contains("会話"));
assertTrue(words.contains("サービス"));
}
}
}