package com.tistory.devyongsik.analyzer.util; import java.io.IOException; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import com.google.common.collect.Lists; import junit.framework.Assert; public class AnalyzerTestUtil { protected TestToken getToken(String term, int start, int end) { TestToken t = new TestToken(); t.setTerm(term); t.setStartOffset(start); t.setEndOffset(end); return t; } protected void verify(List<TestToken> expactedTokens, List<TestToken> extractedTokens) { for(TestToken testToken : expactedTokens) { Assert.assertTrue("[" + testToken + "] is expacted but not.", extractedTokens.contains(testToken)); } } protected List<TestToken> collectExtractedNouns(TokenStream stream) throws IOException { CharTermAttribute charTermAtt = stream.addAttribute(CharTermAttribute.class); OffsetAttribute offSetAtt = stream.addAttribute(OffsetAttribute.class); TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class); List<TestToken> extractedTokens = Lists.newArrayList(); while(stream.incrementToken()) { TestToken t = getToken(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset()); System.out.println("termAtt.term() : " + charTermAtt.toString()); System.out.println("startoffSetAtt : " + offSetAtt.startOffset()); System.out.println("endoffSetAtt : " + offSetAtt.endOffset()); System.out.println("typeAttr : " + typeAttr.toString()); extractedTokens.add(t); } return extractedTokens; } }