package org.apache.lucene.queryparser.spans;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.queryparser.tmpspans.util.QueryParserTestBase;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.junit.Ignore;
public class TestQPTestBaseSpanQuery extends QueryParserTestBase {
@Override
public void testDefaultOperator() throws Exception {
SQPTestingConfig qp = (SQPTestingConfig)getParserConfig(new MockAnalyzer(random()));
// make sure OR is the default:
assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
setDefaultOperatorAND(qp);
assertEquals(QueryParserBase.AND_OPERATOR, qp.getDefaultOperator());
setDefaultOperatorOR(qp);
assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
}
@Override
public void testStarParsing() throws Exception {
// TODO Auto-generated method stub
}
@Override
public void testNewFieldQuery() throws Exception {
// TODO Auto-generated method stub
}
@Override
public CommonQueryParserConfiguration getParserConfig(Analyzer a)
throws Exception {
return getParserConfig(a, null);
}
public CommonQueryParserConfiguration getParserConfig(Analyzer a, Analyzer mtAnalyzer)
throws Exception {
if (a == null) {
a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
}
if (mtAnalyzer == null) {
mtAnalyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, true);
}
SQPTestingConfig qp = new SQPTestingConfig(getDefaultField(), a, mtAnalyzer);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
qp.setAnalyzeRangeTerms(true);
return qp;
}
@Override
public void setDefaultOperatorOR(CommonQueryParserConfiguration cqpC) {
assert cqpC instanceof SQPTestingConfig;
((SQPTestingConfig)cqpC).setDefaultOperator(Operator.OR);
}
@Override
public void setDefaultOperatorAND(CommonQueryParserConfiguration cqpC) {
assert cqpC instanceof SQPTestingConfig;
((SQPTestingConfig)cqpC).setDefaultOperator(Operator.AND);
}
@Override
public void setAutoGeneratePhraseQueries(CommonQueryParserConfiguration cqpC,
boolean value) {
assert (cqpC instanceof SQPTestingConfig);
((SQPTestingConfig)cqpC).setAutoGeneratePhraseQueries(value);
}
@Override
public void setDateResolution(CommonQueryParserConfiguration cqpC,
CharSequence field, Resolution value) {
//no-op
}
@Override
public Query getQuery(String query, CommonQueryParserConfiguration cqpC)
throws Exception {
assert cqpC != null : "Parameter must not be null";
assert (cqpC instanceof SQPTestingConfig) : "Parameter must be instance ofSQPTestingConfig";
SpanQueryParser qp = ((SQPTestingConfig) cqpC).getConfiguredParser();
return qp.parse(query);
}
@Override
public Query getQuery(String query, Analyzer a) throws Exception {
SQPTestingConfig config = (SQPTestingConfig)getParserConfig(a);
return config.getConfiguredParser().parse(query);
}
@Override
public boolean isQueryParserException(Exception exception) {
return exception instanceof ParseException;
}
@Override
public void assertQueryEquals(CommonQueryParserConfiguration cqpC, String field, String query, String result) throws Exception {
Query q = getQuery(query, cqpC);
if (q instanceof SpanMultiTermQueryWrapper) {
@SuppressWarnings("rawtypes")
Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery();
//TODO: we used to set boost here
q = tmp;
}
assertEquals(result, q.toString(field));
}
@Override
public void assertQueryEquals(String query, Analyzer a, String result) throws Exception {
Query q = getQuery(query, a);
if (q instanceof SpanMultiTermQueryWrapper) {
@SuppressWarnings("rawtypes")
Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery();
//TODO: we used to set boost here
q = tmp;
} else if (q instanceof SpanOrQuery){
if (((SpanOrQuery)q).getClauses().length == 0){
q = new BooleanQuery.Builder().build();
}
}
assertEquals(result, q.toString("field"));
}
public void assertQueryEqualsCMP(String query, Analyzer a, String result) throws Exception {
Query q = getQuery(query, a);
if (q instanceof SpanMultiTermQueryWrapper){
@SuppressWarnings("rawtypes")
Query tmp = ((SpanMultiTermQueryWrapper)q).getWrappedQuery();
//TODO we used to set boost here
q = tmp;
} else if (q instanceof SpanOrQuery){
if (((SpanOrQuery)q).getClauses().length == 0){
q = new BooleanQuery.Builder().build();
}
}
assertEquals(result, q.toString("field"));
}
@Override
public void assertQueryEquals(Query expected, Query test) {
if (expected instanceof BoostQuery) {
if (test instanceof BoostQuery) {
assertEquals("boost", ((BoostQuery)expected).getBoost(),
((BoostQuery)test).getBoost(), 0.0001f);
} else {
fail("expected is boost, but test is not");
}
} else if (test instanceof BoostQuery) {
fail("expected not boost, but test is");
}
if (test instanceof SpanMultiTermQueryWrapper){
@SuppressWarnings("rawtypes")
Query tmp = ((SpanMultiTermQueryWrapper)test).getWrappedQuery();
//TODO we used to set boost
test = tmp;
} else if (test instanceof SpanOrQuery){
if (((SpanOrQuery)test).getClauses().length == 0){
test = new BooleanQuery.Builder().build();
}
} else if (test instanceof BooleanQuery && expected instanceof BooleanQuery){
//lots of reasons why this simple equivalence won't work
//but it works well enough for current tests
List<BooleanClause> exClause = ((BooleanQuery)expected).clauses();
List<BooleanClause> testClause = ((BooleanQuery)test).clauses();
assertEquals("boolean clause length =", exClause.size(), testClause.size());
for (int i = 0; i < exClause.size(); i++){
assertTrue(exClause.get(i).getOccur().equals(testClause.get(i).getOccur()));
//recur
assertQueryEquals(exClause.get(i).getQuery(), testClause.get(i).getQuery());
}
return;
} else if (test instanceof SpanNearQuery && expected instanceof PhraseQuery){
//lots of reasons why this simple equivalence won't work
//but it works well enough for current tests
Term[] exTerms = ((PhraseQuery)expected).getTerms();
SpanQuery[] testClauses = ((SpanNearQuery)test).getClauses();
assertEquals("phrase clause length =", exTerms.length, testClauses.length);
for (int i = 0; i < exTerms.length; i++){
assertEquals(exTerms[i].field()+":"+exTerms[i].text(),
testClauses[i].toString());
}
assertEquals("slop", ((SpanNearQuery)test).getSlop(), ((PhraseQuery)expected).getSlop());
return;
}
super.assertQueryEquals(expected, test);
}
/*
@Override
public void assertFuzzyQueryEquals(String field, String term, int maxEdits, int prefixLen, Query query) {
assert(query instanceof SpanMultiTermQueryWrapper);
@SuppressWarnings("rawtypes")
Query wrapped = ((SpanMultiTermQueryWrapper)query).getWrappedQuery();
super.assertFuzzyQueryEquals(field, term, maxEdits, prefixLen, wrapped);
}
*/
@Override
public void assertWildcardQueryEquals(String query, String result, boolean allowLeadingWildcard) throws Exception {
CommonQueryParserConfiguration cqpC = getParserConfig(null);
cqpC.setAllowLeadingWildcard(allowLeadingWildcard);
assertQueryEquals(cqpC, "field", query, result);
}
@Override
public void assertWildcardQueryEquals(String query, String result) throws Exception {
CommonQueryParserConfiguration cqpC = getParserConfig(null);
assertQueryEquals(cqpC, "field", query, result);
}
@SuppressWarnings("rawtypes")
@Override
public void assertInstanceOf(Query q, Class other) {
if (q instanceof SpanMultiTermQueryWrapper) {
q = ((SpanMultiTermQueryWrapper)q).getWrappedQuery();
} else if (q instanceof SpanTermQuery && other.equals(TermQuery.class)) {
assertTrue("termquery", true);
return;
} else if (q instanceof SpanNearQuery && other.equals(PhraseQuery.class)) {
assertTrue("spannear/phrase", true);
return;
} else if (q instanceof SpanOrQuery && other.equals(BooleanQuery.class)) {
assertTrue("spanor/boolean", true);
return;
}
super.assertInstanceOf(q, other);
}
/**
* Overridden tests follow
*/
@Override
public void testCollatedRange() throws Exception {
CommonQueryParserConfiguration qp = getParserConfig(new MockCollationAnalyzer(), new MockCollationAnalyzer());
Query expected = TermRangeQuery.newStringRange(getDefaultField(), "collatedabc", "collateddef", true, true);
Query actual = getQuery("[abc TO def]", qp);
assertQueryEquals(expected, actual);
}
@Override
public void testCJKSloppyPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
List<SpanQuery> clauses = new ArrayList<>();
clauses.add(new SpanTermQuery(new Term("field", "中")));
clauses.add(new SpanTermQuery(new Term("field", "国")));
SpanNearQuery expected = new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), 3, false);
assertEquals(expected, getQuery("\"中国\"~3", analyzer));
}
@Override
public void testCJKPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
List<SpanQuery> clauses = new ArrayList<>();
clauses.add(new SpanTermQuery(new Term("field", "中")));
clauses.add(new SpanTermQuery(new Term("field", "国")));
SpanNearQuery expected = new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), 0, true);
assertEquals(expected, getQuery("\"中国\"", analyzer));
}
@Override
public void testCJKBoostedPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
List<SpanQuery> clauses = new ArrayList<>();
clauses.add(new SpanTermQuery(new Term("field", "中")));
clauses.add(new SpanTermQuery(new Term("field", "国")));
SpanQuery expected = new SpanNearQuery(clauses.toArray(new SpanQuery[clauses.size()]), 0, true);
expected = new SpanBoostQuery(expected, 0.5f);
assertEquals(expected, getQuery("\"中国\"^0.5", analyzer));
}
@Override
public void testPhraseQueryToString() throws Exception {
//no current equivalence in SpanNearQuery with stop words
}
@Override
public void testPositionIncrement() throws Exception {
//For SQP, this only tests whether stop words have been dropped.
//PositionIncrements are not available in SpanQueries yet.
CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
//qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8
SpanNearQuery pq = (SpanNearQuery) getQuery(qtxt,qp);
SpanQuery[] clauses = pq.getClauses();
assertEquals(clauses.length, 5);
Set<Term> expected = new HashSet<Term>();
expected.add(new Term("field", "words"));
expected.add(new Term("field", "poisitions"));
expected.add(new Term("field", "pos"));
expected.add(new Term("field", "stopped"));
expected.add(new Term("field", "phrasequery"));
}
@Override
public void testPositionIncrements() throws Exception {
//doesn't apply/known issue with SpanQueries and stop words
}
@Override
public void testPhraseQueryPositionIncrements() throws Exception {
//doesn't apply
}
@Override
public void testDateRange() throws Exception {
//no-op. Date are not parsed in range queries in SpanQueryParser any more.
}
//the following are added directly from TestQueryParser.
//should refactor so that getQuery etc is used.
/** adds synonym of "dog" for "dogs". */
static class MockSynonymAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
MockTokenizer tokenizer = new MockTokenizer();
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
}
}
public void testSynonyms() throws Exception {
SpanQuery expectedSpan = new SpanOrQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("field", "dog")),
new SpanTermQuery(new Term("field", "dogs"))
});
BooleanQuery.Builder expectedB = new BooleanQuery.Builder();
expectedB.add(new TermQuery(new Term("field", "dogs")), BooleanClause.Occur.SHOULD);
expectedB.add(new TermQuery(new Term("field", "dog")), BooleanClause.Occur.SHOULD);
Query expected = new SynonymQuery(
new Term("field", "dog"),
new Term("field", "dogs")
);
SpanQueryParser qp = new SpanQueryParser("field", new MockSynonymAnalyzer(), null);
assertEquals(expected, qp.parse("dogs"));
assertEquals(expectedSpan, qp.parse("\"dogs\""));
qp.setDefaultOperator(Operator.AND);
assertEquals(expected, qp.parse("dogs"));
assertEquals(expectedSpan, qp.parse("\"dogs\""));
expected = new BoostQuery(expected, 2f);
expectedSpan = new SpanBoostQuery(expectedSpan, 2f);
assertEquals(expected, qp.parse("dogs^2"));
assertEquals(expectedSpan, qp.parse("\"dogs\"^2"));
}
/**
* forms multiphrase query
*
* @throws Exception if something goes wrong
*/
public void testSynonymsPhrase() throws Exception {
SpanQuery expected = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("field", "old")),
new SpanOrQuery(
new SpanTermQuery(new Term("field", "dog")),
new SpanTermQuery(new Term("field", "dogs")))
}, 0, true
);
SpanQueryParser qp = new SpanQueryParser("field", new MockSynonymAnalyzer(), null);
assertEquals(expected, qp.parse("\"old dogs\""));
qp.setDefaultOperator(QueryParser.Operator.AND);
assertEquals(expected, qp.parse("\"old dogs\""));
expected = new SpanBoostQuery(expected, 2.0f);
assertEquals(expected, qp.parse("\"old dogs\"^2"));
expected = new SpanNearQuery(
new SpanQuery[]{
new SpanTermQuery(new Term("field", "old")),
new SpanOrQuery(
new SpanTermQuery(new Term("field", "dog")),
new SpanTermQuery(new Term("field", "dogs")))
}, 3, false
);
expected = new SpanBoostQuery(expected, 2.0f);
assertEquals(expected, qp.parse("\"old dogs\"~3^2"));
}
@Override
public void testSlop() throws Exception {
assertQueryEquals("\"term\"~2", null, "term");
assertQueryEquals("\" \"~2 germ", null, "germ");
}
//string query equality tests that have to be rewritten
//if parser is generating a SpanQuery
@Override
public void testParserSpecificSyntax() throws Exception {
assertQueryEquals("\"germ term\"^2.0", null, "(spanNear([germ, term], 0, true))^2.0");
//testSimple
assertQueryEquals("term AND \"phrase phrase\"", null,
"+term +spanNear([phrase, phrase], 0, true)");
assertQueryEquals("\"hello there\"", null, "spanNear([hello, there], 0, true)");
assertQueryEquals("\"term germ\"^2", null, "(spanNear([term, germ], 0, true))^2.0");
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
"+(apple spanNear([steve, jobs], 0, true)) -(foo bar baz)");
assertQueryEquals("+title:(dog cat) -author:\"bob dole\"", null,
"+(title:dog title:cat) -spanNear([author:bob, author:dole], 0, true)");
//regexes
CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
assertEquals(escaped, getQuery("/[a-z]\\//[123]/",qp));
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
assertEquals(escaped2, getQuery("/[a-z]\\*[123]/",qp));
BooleanQuery.Builder complex = new BooleanQuery.Builder();
complex.add(new RegexpQuery(new Term("field", "[a-z]/[123]")), BooleanClause.Occur.MUST);
complex.add(new TermQuery(new Term("path", "/etc/init.d/")), BooleanClause.Occur.MUST);
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), BooleanClause.Occur.SHOULD);
//then the simpler single quote
assertEquals(complex.build(), getQuery("/[a-z]//[123]/ AND path:'/etc/init.d/' field:'/etc/init[.]d/lucene/'",qp));
assertEquals(new TermQuery(new Term("field", "/boo/")), getQuery("'/boo/'",qp));
//testEscaped
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
//change " to ' for spanquery parser
assertQueryEquals("['c:\\temp\\~foo0.txt' TO 'c:\\temp\\~foo9.txt']", a,
"[c:\\temp\\~foo0.txt TO c:\\temp\\~foo9.txt]");
assertQueryEquals("\"a \\\"b c\\\" d\"", a, "spanNear([a, \"b, c\", d], 0, true)");
assertQueryEquals("\"a \\+b c d\"", a, "spanNear([a, +b, c, d], 0, true)");
assertQueryEquals("\"a \\\\\\u0028\\u0062\\\" c\"", a, "spanNear([a, \\(b\", c], 0, true)");
//testWildcard
//SpanQueryParser requires fuzzy marker before boosting
//assertQueryEquals("term^3~", null, "term~2^3.0");
//float values no longer available!
assertParseException("term~0.7");
/* SpanQueryParser doesn't handle ! || && syntax yet
*/
//testWildcard
//SpanQueryParser cannot parse boost before fuzzy
// assertParseException("term^3~");
//testSlop
assertQueryEquals("\"term germ\"~2 flork", null, "spanNear([term, germ], 2, false) flork");
assertQueryEquals("\"term germ\"~2^2", null, "(spanNear([term, germ], 2, false))^2.0");
}
@Ignore
public void testSpanQueryParserFail() throws Exception {
//these are tests that SQP cannot pass
//testQPA
/*
Currently, the handling of synonyms is occurs in the lower level Span parsing
component, not the higher level Boolean component.
The lower level can't return a BooleanQuery, only a SpanQuery.
This could probably be fixed.
*/
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
setDefaultOperatorAND(cqpc);
assertQueryEquals(cqpc, "field", "term phrase term",
"+term +(+phrase1 +phrase2) +term");
assertQueryEquals(cqpc, "field", "phrase",
"+phrase1 +phrase2");
//testSimple
//no plans to add this syntax unless there is interest
assertQueryEquals("a AND !b", null, "+a -b");
assertQueryEquals("a && b", null, "+a +b");
assertQueryEquals("a || b", null, "a b");
assertQueryEquals("a OR !b", null, "a -b");
}
@Override
public void testException() throws Exception {
assertParseException("term~0.7");
super.testException();
}
@Override
public void assertEmpty(Query q) {
boolean e = false;
if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size() == 0) {
e = true;
} else if (q instanceof SpanOrQuery && ((SpanOrQuery)q).getClauses().length == 0) {
e = true;
}
assertTrue("Empty: "+q.toString(), e);
}
@Override
public void testRangeWithPhrase() throws Exception {
assertQueryEquals("[\\* TO '*']",null,"[\\* TO \\*]");
assertQueryEquals("['*' TO *]",null,"[\\* TO *]");
}
}