package org.xbib.elasticsearch.index.analysis.concat;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
/**
* The ConcatTokenFilter is authored by
* <a href="http://sujitpal.blogspot.de/2011/07/lucene-token-concatenating-tokenfilter_30.html">Sujit Pal</a>.
*/
public final class ConcatTokenFilter extends TokenFilter {
private CharTermAttribute termAttr;
private PositionIncrementAttribute posIncAttr;
private State current;
private LinkedList<List<String>> words;
private LinkedList<String> phrases;
private boolean concat = false;
protected ConcatTokenFilter(TokenStream input) {
super(input);
this.termAttr = addAttribute(CharTermAttribute.class);
this.posIncAttr = addAttribute(PositionIncrementAttribute.class);
this.words = new LinkedList<>();
this.phrases = new LinkedList<>();
}
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
String term = new String(termAttr.buffer(), 0, termAttr.length());
List<String> word = posIncAttr.getPositionIncrement() > 0 ?
new LinkedList<>() : words.removeLast();
word.add(term);
words.add(word);
}
if (!concat) {
makePhrases(words, phrases, 0);
concat = true;
}
if (!phrases.isEmpty()) {
String phrase = phrases.removeFirst();
restoreState(current);
clearAttributes();
termAttr.copyBuffer(phrase.toCharArray(), 0, phrase.length());
termAttr.setLength(phrase.length());
current = captureState();
return true;
}
concat = false;
phrases.clear();
words.clear();
return false;
}
private void makePhrases(List<List<String>> words, List<String> phrases, int currPos) {
for (int i = currPos; i < words.size(); i++) {
if (phrases.isEmpty()) {
phrases.addAll(words.get(i));
} else {
List<String> newPhrases = new LinkedList<>();
for (String phrase : phrases) {
for (String word : words.get(i)) {
newPhrases.add(phrase + " " + word);
}
}
phrases.clear();
phrases.addAll(newPhrases);
}
}
}
@Override
public boolean equals(Object object) {
return object instanceof ConcatTokenFilter;
}
@Override
public int hashCode() {
return 0;
}
}