/**
*
*/
package querqy.rewrite.commonrules.model;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import querqy.CompoundCharSequence;
import querqy.model.InputSequenceElement;
import querqy.model.Term;
import querqy.trie.State;
import querqy.trie.States;
import querqy.trie.TrieMap;
/**
* @author René Kriegler, @renekrie
*
*/
public class TrieMapRulesCollection implements RulesCollection {
public static final String BOUNDARY_WORD = "\u0002";
final TrieMap<List<Instructions>> trieMap;
final boolean ignoreCase;
public TrieMapRulesCollection(TrieMap<List<Instructions>> trieMap, boolean ignoreCase) {
if (trieMap == null) {
throw new IllegalArgumentException("trieMap must not be null");
}
this.trieMap = trieMap;
this.ignoreCase = ignoreCase;
}
/* (non-Javadoc)
* @see querqy.rewrite.commonrules.model.RulesCollection#getRewriteActions(querqy.rewrite.commonrules.model.PositionSequence)
*/
@Override
public List<Action> getRewriteActions(final PositionSequence<InputSequenceElement> sequence) {
final List<Action> result = new ArrayList<>();
if (sequence.isEmpty()) {
return result;
}
// We have a list of terms (resulting from DisMax alternatives) per
// position. We now find all the combinations of terms in different
// positions and look them up as rules input in the dictionary
// LinkedList<List<Term>> positions = sequence.getPositions();
if (sequence.size() == 1) {
for (final Term term : new ClassFilter<>(sequence.getFirst(), Term.class)) {
final States<List<Instructions>> states = trieMap.get(term.toCharSequenceWithField(ignoreCase));
final State<List<Instructions>> stateExactMatch = states.getStateForCompleteSequence();
if (stateExactMatch.isFinal() && stateExactMatch.value != null) {
result.add(new Action(stateExactMatch.value, new TermMatches(new TermMatch(term)), 0, 1));
}
final List<State<List<Instructions>>> statesForPrefixes = states.getPrefixes();
if (statesForPrefixes != null) {
for (final State<List<Instructions>> stateForPrefix: statesForPrefixes) {
if (stateForPrefix.isFinal() && stateForPrefix.value != null) {
result.add(
new Action(stateForPrefix.value,
new TermMatches(
new TermMatch(term,
true,
term.subSequence(stateForPrefix.index + 1, term.length()))),
0, 1));
}
}
}
}
} else {
List<Prefix<List<Instructions>>> prefixes = new LinkedList<>();
List<Prefix<List<Instructions>>> newPrefixes = new LinkedList<>();
int pos = 0;
for (final List<InputSequenceElement> position : sequence) {
boolean anyTermAtPosition = false;
for (final InputSequenceElement element : position) {
final boolean isTerm = element instanceof Term;
anyTermAtPosition |= isTerm;
final CharSequence charSequenceForLookup;
if (isTerm) {
charSequenceForLookup = ((Term) element).toCharSequenceWithField(ignoreCase);
} else if (element instanceof InputBoundary) {
charSequenceForLookup = BOUNDARY_WORD;
} else {
throw new IllegalArgumentException("Cannot handle type of element in sequence " + element);
}
// combine term with prefixes (= sequences of terms) that brought us here
for (final Prefix<List<Instructions>> prefix : prefixes) {
final States<List<Instructions>> states = trieMap.get(
new CompoundCharSequence(null, " ", charSequenceForLookup), prefix.stateInfo);
final int ofs = isTerm ? 1 : 0;
// exact matches
final State<List<Instructions>> stateExactMatch = states.getStateForCompleteSequence();
if (stateExactMatch.isKnown()) {
if (stateExactMatch.isFinal()) {
final TermMatches matches = new TermMatches(prefix.matches);
if (isTerm) {
matches.add(new TermMatch((Term) element));
}
result.add(new Action( stateExactMatch.value, matches, pos - matches.size() + ofs, pos + ofs));
}
final Prefix<List<Instructions>> newPrefix = new Prefix<List<Instructions>>(prefix, stateExactMatch);
if (isTerm) {
newPrefix.addTerm(new TermMatch((Term) element));
}
newPrefixes.add(newPrefix);
}
// matches for prefixes (= beginnings of terms)
final List<State<List<Instructions>>> statesForPrefixes = states.getPrefixes();
if (statesForPrefixes != null) {
for (final State<List<Instructions>> stateForPrefix: statesForPrefixes) {
if (stateForPrefix.isFinal() && stateForPrefix.value != null) {
final TermMatches matches = new TermMatches(prefix.matches);
if (isTerm) {
final Term term = (Term) element;
matches.add(
new TermMatch(term,
true,
term.subSequence(stateForPrefix.index + 1, term.length())));
}
result.add(new Action( stateForPrefix.value, matches, pos - matches.size() + ofs, pos + ofs));
}
// TODO: continue with next match after prefix match
}
}
}
// now see whether the term matches on its own...
final States<List<Instructions>> states = trieMap.get(charSequenceForLookup);
final State<List<Instructions>> stateExactMatch = states.getStateForCompleteSequence();
if (stateExactMatch.isKnown()) {
if (stateExactMatch.isFinal()) {
// we do not let match the boundary on its own:
if (isTerm) {
result.add(new Action( stateExactMatch.value, new TermMatches(new TermMatch((Term) element)), pos, pos + 1));
}
}
// ... and save it as a prefix to the following term
final Prefix<List<Instructions>> newPrefix = isTerm
? new Prefix<>(new TermMatch((Term) element), stateExactMatch)
: new Prefix<>(stateExactMatch);
newPrefixes.add(new Prefix<>(newPrefix, stateExactMatch));
}
final List<State<List<Instructions>>> statesForPrefixes = states.getPrefixes();
if (statesForPrefixes != null) {
for (final State<List<Instructions>> stateForPrefix: statesForPrefixes) {
if (stateForPrefix.isFinal() && stateForPrefix.value != null) {
if (isTerm) {
final Term term = (Term) element;
result.add(new Action( stateForPrefix.value, new TermMatches(new TermMatch(term, true, term.subSequence(stateForPrefix.index + 1, term.length()))), pos, pos + 1));
// TODO: continue with next match after prefix match
}
}
}
}
}
prefixes = newPrefixes;
newPrefixes = new LinkedList<>();
if (anyTermAtPosition) {
pos++;
}
}
}
return result;
}
@Override
public Set<Instruction> getInstructions() {
final Set<Instruction> result = new HashSet<Instruction>();
for (List<Instructions> instructionsList: trieMap) {
for (Instructions instructions: instructionsList) {
result.addAll(instructions);
}
}
return result;
}
public static class Prefix<T> {
final State<T> stateInfo;
final List<TermMatch> matches;
public Prefix(final Prefix<T> prefix, final TermMatch match, final State<T> stateInfo) {
matches = new LinkedList<>(prefix.matches);
addTerm(match);
this.stateInfo = stateInfo;
}
public Prefix(final Prefix<T> prefix, final State<T> stateInfo) {
matches = new LinkedList<>(prefix.matches);
this.stateInfo = stateInfo;
}
public Prefix(final TermMatch match, final State<T> stateInfo) {
matches = new LinkedList<>();
matches.add(match);
this.stateInfo = stateInfo;
}
public Prefix(final State<T> stateInfo) {
matches = new LinkedList<>();
this.stateInfo = stateInfo;
}
private void addTerm(final TermMatch term) {
matches.add(term);
}
}
}