package querqy.rewrite.contrib; import querqy.CompoundCharSequence; import querqy.model.*; import querqy.rewrite.QueryRewriter; import java.util.LinkedList; import java.util.List; /** * <p>A query rewriter that joins two adjacent query terms into a new term and adds this new term * to the query as a synonym to the two original terms. A query A B C thus becomes:</p> * <pre> (A OR AB) (B OR AB OR BC) (C OR BC) </pre> * <p>The resulting structure has the same number of clauses like the original query.<P> * * @author muellenborn * @author René Kriegler, @renekrie */ public class ShingleRewriter extends AbstractNodeVisitor<Node> implements QueryRewriter { Term previousTerm = null; List<Term> termsToAdd = null; final boolean acceptGeneratedTerms; public ShingleRewriter(){ this(false); } public ShingleRewriter(boolean acceptGeneratedTerms) { this.acceptGeneratedTerms = acceptGeneratedTerms; } @Override public ExpandedQuery rewrite(ExpandedQuery query) { Query userQuery = query.getUserQuery(); if (userQuery != null){ previousTerm = null; termsToAdd = new LinkedList<>(); visit(userQuery); for (Term term : termsToAdd) { term.getParent().addClause(term); } } return query; } @Override public Node visit(DisjunctionMaxQuery dmq) { List<DisjunctionMaxClause> clauses = dmq.getClauses(); if (clauses != null) { switch (clauses.size()) { case 0: break; case 1: super.visit(dmq); break; default: if (acceptGeneratedTerms) { throw new IllegalArgumentException("cannot handle more then one DMQ clause"); } else { DisjunctionMaxClause nonGeneratedClause = null; for (DisjunctionMaxClause clause: clauses) { if (!clause.isGenerated()) { // second non-generated clause - cannot handle this if (nonGeneratedClause != null) { throw new IllegalArgumentException("cannot handle more then one non-generated DMQ clause"); } nonGeneratedClause = clause; } } nonGeneratedClause.accept(this); } } } return null; } @Override public Node visit(Term term) { if (previousTerm != null && eq(previousTerm.getField(), term.getField()) && (term.isGenerated() == acceptGeneratedTerms || !term.isGenerated()) && (previousTerm.isGenerated() == acceptGeneratedTerms || !previousTerm.isGenerated())) { CharSequence seq = new CompoundCharSequence(null, previousTerm, term); termsToAdd.add(buildShingle(previousTerm, seq)); termsToAdd.add(buildShingle(term, seq)); } previousTerm = term; return term; } private static <T> boolean eq(T value1, T value2) { return value1 == null && value2 == null || value1 != null && value1.equals(value2); } private Term buildShingle(Term term, CharSequence seq) { return new Term(term.getParent(), term.getField(), seq, true); } @Override public Node visit(BooleanQuery bq) { previousTerm = null; return super.visit(bq); } }