package com.senseidb.clue.test;
import java.io.IOException;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.BytesRef;
public class PayloadTokenizer extends Tokenizer {
private final String[] tokens;
private CharTermAttribute termAttr;
private PayloadAttribute payloadAttr;
private PositionIncrementAttribute positionAttr;
private OffsetAttribute offsetAttr;
private BytesRef payload;
private int count = 0;
private Iterator<String> iter = null;
public PayloadTokenizer(String text)
throws IOException {
setReader(new StringReader(text));
this.tokens = text.toLowerCase().split(",");
termAttr = addAttribute(CharTermAttribute.class);
termAttr.resizeBuffer(text.length()); // maximum size necessary is the size of the input
payloadAttr = addAttribute(PayloadAttribute.class);
payload = new BytesRef(new byte[4]);
positionAttr = addAttribute(PositionIncrementAttribute.class);
offsetAttr = addAttribute(OffsetAttribute.class);
}
private static BytesRef intToByteArray(int value,BytesRef reuse) {
ByteBuffer.wrap(reuse.bytes).putInt(value);
return reuse;
}
@Override
public final boolean incrementToken() throws IOException {
if (iter.hasNext()) {
clearAttributes();
// This is the dummy term.
termAttr.setEmpty();
termAttr.append(iter.next());
payloadAttr.setPayload(intToByteArray(Float.floatToIntBits(this.tokens.length),payload));
positionAttr.setPositionIncrement(1);
offsetAttr.setOffset(0, count);
count++;
return true;
}
return false;
}
@Override
public void reset() throws IOException {
super.reset();
iter = Arrays.asList(this.tokens).iterator();
count = 0;
}
}