package com.github.btpka3.lucene.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.Attribute;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
public class PinyinAbbrTokenizerTest {
@Test
public void test1() throws Exception {
StringReader input = new StringReader("abc湖南省123");
PinyinAbbrTokenizer ts = new PinyinAbbrTokenizer(input);
print(ts);
ts.close();
}
private void print(TokenStream ts) throws IOException {
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
while (it.hasNext()) {
System.out.println(it.next());
}
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAtt = ts.getAttribute(PositionLengthAttribute.class);
TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class);
OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
TermToBytesRefAttribute byteRefAtt = ts.getAttribute(TermToBytesRefAttribute.class);
ts.reset();
while (ts.incrementToken()) {
System.out.printf("%3d ~ %3d : %15s : %3d : %3d : '%s' - '%s' : %n",
offsetAtt.startOffset(),
offsetAtt.endOffset(),
typeAtt.type(),
posIncrAtt.getPositionIncrement(),
posLenAtt.getPositionLength(),
new String(byteRefAtt.getBytesRef().bytes),
termAtt.toString()
);
}
}
}