/**
*
*/
package querqy.lucene.contrib.rewrite;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.FST;
import querqy.SimpleComparableCharSequence;
import querqy.model.BooleanQuery;
import querqy.model.DisjunctionMaxClause;
import querqy.model.DisjunctionMaxQuery;
import querqy.model.Term;
import querqy.model.Clause.Occur;
/**
* @author rene
*
*/
class Sequence {
final FST.Arc<BytesRef> arc;
final List<Term> terms;
final BytesRef output;
public Sequence(FST.Arc<BytesRef> arc, List<Term> terms, BytesRef output) {
this.arc = arc;
this.terms = terms;
this.output = output;
}
public void addOutputs(Map<DisjunctionMaxQuery, Set<DisjunctionMaxClause>> addenda, SynonymMap map,
ByteArrayDataInput bytesReader) {
BytesRef finalOutput = map.fst.outputs.add(output, arc.nextFinalOutput);
bytesReader.reset(finalOutput.bytes, finalOutput.offset, finalOutput.length);
BytesRef scratchBytes = new BytesRef();
final int code = bytesReader.readVInt();
// final boolean keepOrig = (code & 0x1) == 0;
final int count = code >>> 1;
// iterate over all possible outputs
for (int outputIDX = 0; outputIDX < count; outputIDX++) {
map.words.get(bytesReader.readVInt(), scratchBytes);
char[] scratchChars = new char[scratchBytes.length];
UnicodeUtil.UTF8toUTF16(scratchBytes, scratchChars);
boolean replacementIsMultiTerm = false;
// ignore ' ' at beginning and end
for (int i = 1; i < scratchChars.length - 1 && !replacementIsMultiTerm; i++) {
replacementIsMultiTerm = scratchChars[i] == ' ';
}
// iterate through all input terms
for (Term term : terms) {
DisjunctionMaxQuery currentDmq = term.getParent();
BooleanQuery add = new BooleanQuery(currentDmq, Occur.SHOULD, true);
if (replacementIsMultiTerm) {
BooleanQuery replaceSeq = new BooleanQuery(add, Occur.MUST, true);
int start = 0;
for (int i = 0; i < scratchChars.length; i++) {
if (scratchChars[i] == ' ' && (i > start)) {
DisjunctionMaxQuery newDmq = new DisjunctionMaxQuery(replaceSeq, Occur.MUST, true);
newDmq.addClause(
new Term(newDmq,
new SimpleComparableCharSequence(scratchChars, start, i - start)));
replaceSeq.addClause(newDmq);
start = i + 1;
}
}
if (start < scratchChars.length) {
DisjunctionMaxQuery newDmq = new DisjunctionMaxQuery(replaceSeq, Occur.MUST, true);
newDmq.addClause(new Term(newDmq, new SimpleComparableCharSequence(scratchChars, start,
scratchChars.length - start)));
replaceSeq.addClause(newDmq);
}
add.addClause(replaceSeq);
} else {
DisjunctionMaxQuery replaceDmq = new DisjunctionMaxQuery(add, Occur.MUST, true);
replaceDmq.addClause(new Term(replaceDmq, new SimpleComparableCharSequence(scratchChars, 0,
scratchChars.length)));
add.addClause(replaceDmq);
}
BooleanQuery neq = new BooleanQuery(add, Occur.MUST_NOT, true);
for (Term negTerm : terms) {
DisjunctionMaxQuery neqDmq = new DisjunctionMaxQuery(neq, Occur.MUST, true);
neqDmq.addClause(negTerm.clone(neqDmq, true));
neq.addClause(neqDmq);
}
add.addClause(neq);
Set<DisjunctionMaxClause> adds = addenda.get(currentDmq);
if (adds == null) {
adds = new LinkedHashSet<>();
addenda.put(currentDmq, adds);
}
adds.add(add);
}
}
}
}