package querqy.rewrite.contrib;
import java.util.Arrays;
import org.junit.Test;
import querqy.model.*;
import querqy.rewrite.commonrules.AbstractCommonRulesTest;
import querqy.rewrite.commonrules.CommonRulesRewriter;
import querqy.rewrite.commonrules.LineParser;
import querqy.rewrite.commonrules.model.Input;
import querqy.rewrite.commonrules.model.Instruction;
import querqy.rewrite.commonrules.model.Instructions;
import querqy.rewrite.commonrules.model.RulesCollection;
import querqy.rewrite.commonrules.model.RulesCollectionBuilder;
import querqy.rewrite.commonrules.model.SynonymInstruction;
import querqy.rewrite.commonrules.model.TrieMapRulesCollectionBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static querqy.QuerqyMatchers.*;
/**
* Test for ShingleRewriter.
*/
public class ShingleRewriteTest extends AbstractCommonRulesTest {
@Test
public void testShinglingForTwoTokens() {
Query query = new Query();
addTerm(query, "cde");
addTerm(query, "ajk");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("cde"),
term("cdeajk")
),
dmq(
term("ajk"),
term("cdeajk")
)
)
);
}
@Test
public void testThatShinglingDoesNotTriggerExceptionOnSingleTerm() throws Exception {
Query query = new Query();
addTerm(query, "t1");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("t1")
)
)
);
}
@Test
public void testShinglingForTwoTokensWithSameField() {
Query query = new Query();
addTerm(query, "f1", "cde");
addTerm(query, "f1", "ajk");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("f1", "cde"),
term("f1", "cdeajk")
),
dmq(
term("f1", "ajk"),
term("f1", "cdeajk")
)
)
);
}
@Test
public void testShinglingForTwoTokensWithSameFieldAndGeneratedFlag() {
Query query = new Query();
addTerm(query, "f1", "cde", true);
addTerm(query, "f1", "ajk", true);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter(true);
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("f1", "cde"),
term("f1", "cdeajk")
),
dmq(
term("f1", "ajk"),
term("f1", "cdeajk")
)
)
);
}
@Test
public void testShinglingForTwoTokensWithDifferentFieldsDontShingle() {
Query query = new Query();
addTerm(query, "f1", "cde");
addTerm(query, "f2", "ajk");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(), bq(dmq(term("cde")), dmq(term("ajk"))));
}
@Test
public void testShinglingForTwoTokensWithOnFieldNameNullDontShingle() {
Query query = new Query();
addTerm(query, "f1", "cde");
addTerm(query, "ajk");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(), bq(dmq(term("f1", "cde")), dmq(term("ajk"))));
}
@Test
public void testShinglingForThreeTokens() {
Query query = new Query();
addTerm(query, "cde");
addTerm(query, "ajk");
addTerm(query, "xyz");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("cde"),
term("cdeajk")
),
dmq(
term("ajk"),
term("cdeajk"),
term("ajkxyz")
),
dmq(
term("xyz"),
term("ajkxyz")
)
)
);
}
@Test
public void testShinglingForThreeTokensWithThreeTokenGenerated() {
Query query = new Query();
addTerm(query, "cde", true);
addTerm(query, "ajk", true);
addTerm(query, "xyz", true);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter(true);
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("cde"),
term("cdeajk")
),
dmq(
term("ajk"),
term("cdeajk"),
term("ajkxyz")
),
dmq(
term("xyz"),
term("ajkxyz")
)
)
);
}
@Test
public void testShinglingForThreeTokensWithOneTokenGeneratedIgnoringGenerated() {
Query query = new Query();
addTerm(query, "cde", false);
addTerm(query, "ajk", false);
addTerm(query, "xyz", true);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter(false);
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("cde"),
term("cdeajk")
),
dmq(
term("ajk"),
term("cdeajk")
),
dmq(
term("xyz")
)
)
);
}
@Test
public void testShinglingForThreeTokensWithMixedFields() {
Query query = new Query();
addTerm(query, "f1", "cde");
addTerm(query, "f1", "ajk");
addTerm(query, "f2", "xyz");
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter();
rewriter.rewrite(expandedQuery);
assertThat(expandedQuery.getUserQuery(),
bq(
dmq(
term("f1", "cde"),
term("f1", "cdeajk")
),
dmq(
term("f1", "ajk"),
term("f1", "cdeajk")
),
dmq(term("f2", "xyz"))
)
);
}
@Test
public void testChainingWithWildCard() throws Exception {
RulesCollectionBuilder builder = new TrieMapRulesCollectionBuilder(false);
SynonymInstruction synInstruction = new SynonymInstruction(Arrays.asList(mkTerm( "p1"), mkTerm("$1")));
builder.addRule((Input) LineParser.parseInput("p1*"), new Instructions(Arrays.asList((Instruction) synInstruction)));
RulesCollection rules = builder.build();
CommonRulesRewriter commonRulesRewriter = new CommonRulesRewriter(rules);
ShingleRewriter shingleRewriter = new ShingleRewriter(false);
ExpandedQuery query = makeQuery("p1xyz t2");
query = commonRulesRewriter.rewrite(query, EMPTY_CONTEXT);
query = shingleRewriter.rewrite(query);
assertThat(query.getUserQuery(),
bq(
dmq(
term("p1xyz", false),
bq(
dmq(must(), term ("p1", true)),
dmq(must(), term ("xyz", true))
),
term("p1xyzt2", true)
),
dmq(
term("t2", false),
term("p1xyzt2", true)
)
)
);
}
@Test
public void testShingleWithHyphens() throws Exception {
Query query = new Query();
addTerm(query, "cde-fgh", false);
addTerm(query, "-", false);
addTerm(query, "xyz", false);
ExpandedQuery expandedQuery = new ExpandedQuery(query);
ShingleRewriter rewriter = new ShingleRewriter(false);
ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);
assertThat(rewritten.getUserQuery(),
bq(
dmq(
term("cde-fgh", false),
term ("cde-fgh-", true)
),
dmq(
term("-", false),
term("cde-fgh-", true),
term("-xyz", true)
),
dmq(
term("xyz", false),
term ("-xyz", true)
)
)
);
}
private void addTerm(Query query, String value) {
addTerm(query, null, value);
}
private void addTerm(Query query, String field, String value) {
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(query, Clause.Occur.SHOULD, true);
query.addClause(dmq);
Term term = new Term(dmq, field, value);
dmq.addClause(term);
}
private void addTerm(Query query, String value, boolean isGenerated) {
addTerm(query, null, value, isGenerated);
}
private void addTerm(Query query, String field, String value, boolean isGenerated) {
DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(query, Clause.Occur.SHOULD, true);
query.addClause(dmq);
Term term = new Term(dmq, field, value, isGenerated);
dmq.addClause(term);
}
}