package org.apache.lucene.queryparser.flexible.aqp;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.queryparser.flexible.aqp.AqpAdsabsQueryParser;
import org.apache.lucene.queryparser.flexible.aqp.AqpQueryParser;
import org.apache.lucene.queryparser.flexible.aqp.AqpTestAbstractCase;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpAdsabsQueryConfigHandler;
import org.apache.lucene.sandbox.queries.SlowFuzzyQuery;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
public class TestAqpAdsabs extends AqpTestAbstractCase {
public void setUp() throws Exception {
setGrammarName("ADS");
super.setUp();
}
public AqpQueryParser getParser() throws Exception {
//AqpQueryParser qp = AqpAdsabsQueryParser.init(getGrammarName());
AqpAdsabsQueryConfigHandler config = new AqpAdsabsQueryConfigHandler();
config.set(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_READY, false);
AqpSyntaxParser parser = new AqpSyntaxParserLoadableImpl().initializeGrammar(grammarName);
AqpAdsabsNodeProcessorPipeline processor = new AqpAdsabsNodeProcessorPipeline(config);
AqpAdsabsQueryTreeBuilder builder = new AqpAdsabsQueryTreeBuilder();
AqpQueryParser qp = new AqpAdsabsQueryParser(config, parser, processor, builder);
for (Entry<String, String> e: parserArgs.entrySet()) {
qp.setNamedParameter(e.getKey(), e.getValue());
}
qp.setDebug(this.debugParser);
return qp;
}
public void testAnalyzers() throws Exception {
Analyzer pa = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
PatternTokenizer filter;
filter = new PatternTokenizer(Pattern.compile("\\|"), -1);
return new TokenStreamComponents(filter);
}
};
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
assertQueryEquals("\"this\" AND that", null, "+this +that", BooleanQuery.class);
assertQueryEquals("\"this\"", null, "this");
assertQueryEquals("word:\"this \"", pa, "word:this ");
assertQueryEquals("\"this \" ", pa, "this ");
assertQueryEquals("\" this \"", pa, " this ");
}
public void testAuthorField() throws Exception {
// note: nothing too much exciting here - the real tests must be done with the
// ADS author query, and for that we will need solr unittests - so for now, just basic stuff
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("author:\"A Einstein\"", null, "author:\"a einstein\"", PhraseQuery.class);
// probably, this should construct a different query (a phrase perhaps)
assertQueryEquals("=author:\"A Einstein\"", null, "author:A Einstein", TermQuery.class);
assertQueryEquals("author:\"M. J. Kurtz\" author:\"G. Eichhorn\" 2004", wsa, "+author:\"M. J. Kurtz\" +author:\"G. Eichhorn\" +2004");
assertQueryEquals("author:\"M. J. Kurtz\" =author:\"G. Eichhorn\" 2004", null, "+author:\"m j kurtz\" +author:G. Eichhorn");
assertQueryEquals("author:\"huchra, j\"", wsa, "author:\"huchra, j\"");
assertQueryEquals("author:\"huchra, j\"", null, "author:\"huchra j\"");
assertQueryEquals("=author:\"huchra, j\"", wsa, "author:huchra, j", TermQuery.class);
assertQueryEquals("author:\"huchra, j.*\"", wsa, "author:huchra, j*", PrefixQuery.class);
}
public void testAcronyms() throws Exception {
assertQueryEquals("\"dark matter\" -LHC", null, "+\"dark matter\" -lhc");
}
/**
* OK, done 17Apr
*
*/
public void testIdentifiers() throws Exception {
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
Query q = null;
assertQueryEquals("arXiv:1012.5859", wsa, "arxiv:1012.5859");
assertQueryEquals("xfield:10.1086/345794", wsa, "xfield:10.1086/345794");
assertQueryEquals("xfield:doi:10.1086/345794", wsa, "xfield:10.1086/345794");
assertQueryEquals("arXiv:astro-ph/0601223", wsa, "arxiv:astro-ph/0601223");
q = assertQueryEquals("xfield:arXiv:0711.2886", wsa, "xfield:0711.2886");
assertQueryEquals("foo AND bar AND 2003AJ....125..525J", wsa, "+foo +bar +2003aj....125..525j");
assertQueryEquals("2003AJ….125..525J", wsa, "2003aj....125..525j");
assertQueryEquals("one x:doi:word/word doi:word/123", wsa, "+one +x:word/word +doi:word/123");
assertQueryEquals("doi:hey/156-8569", wsa, "doi:hey/156-8569");
q = assertQueryEquals("doi:10.1000/182", wsa, "doi:10.1000/182");
// pretend we are sending bibcode (this should be handled as a normal token)
assertQueryEquals("200xAJ....125..525J", wsa, "200xAJ....125..525J");
}
/**
* OK, Apr19
*
*/
public void testDateRanges() throws Exception {
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("intitle:\"QSO\" 1995-2000", null, "+intitle:qso +date:[1995 TO 2000]");
assertQueryEquals("2011-2012", null, "date:[2011 TO 2012]");
assertQueryEquals("xf:2011-2012", null, "xf:[2011 TO 2012]");
assertQueryEquals("one 2009-2012", null, "+one +date:[2009 TO 2012]");
assertQueryEquals("notdate 09-12", wsa, "+notdate +09-12");
assertQueryEquals("notdate 09-2012", wsa, "+notdate +09-2012");
//TODO - also test that warning messages were generated
//TODO - throw syntax error?
//TODO - lucene4.0 shows \* but it also has * when the value is null, the TermRanqeQueryNodeBuilder may be wrong
assertQueryEquals("2011-", null, "date:[2011 TO \\*]");
assertQueryEquals("-2011", null, "date:[\\* TO 2011]");
assertQueryEquals("-2009", null, "date:[\\* TO 2009]");
assertQueryEquals("2009-", null, "date:[2009 TO \\*]");
assertQueryEquals("year:2000-", null, "year:[2000 TO \\*]");
assertQueryEquals("2000-", null, "date:[2000 TO \\*]");
// i don't think we should try to guess this as a date
assertQueryEquals("2011", null, "MatchNoDocsQuery(\"\")");
assertQueryEquals("2011", wsa, "2011");
}
/**
* OK, 17Apr
*
*/
public void testRanges() throws Exception {
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("[20020101 TO 20030101]", null, "[20020101 TO 20030101]");
assertQueryEquals("[20020101 TO 20030101]^0.5", null, "([20020101 TO 20030101])^0.5");
assertQueryNodeException("[20020101 TO 20030101]^0.5~");
assertQueryNodeException("[20020101 TO 20030101]^0.5~");
assertQueryEquals("title:[20020101 TO 20030101]", null, "title:[20020101 TO 20030101]");
assertQueryEquals("title:[20020101 TO 20030101]^0.5", null, "(title:[20020101 TO 20030101])^0.5");
assertQueryNodeException("title:[20020101 TO 20030101]^0.5~");
assertQueryNodeException("title:[20020101 TO 20030101]^0.5~");
assertQueryEquals("[* TO 20030101]", null, "[\\* TO 20030101]");
assertQueryEquals("[20020101 TO *]^0.5", null, "([20020101 TO \\*])^0.5");
assertQueryNodeException("[* 20030101]^0.5~");
assertQueryNodeException("[20020101 *]^0.5~");
assertQueryEquals("[this TO that]", null, "[this TO that]");
assertQueryEquals("[this that]", null, "[this TO that]");
assertQueryEquals("[this TO *]", null, "[this TO \\*]");
assertQueryEquals("[this]", null, "[this TO \\*]");
assertQueryEquals("[* this]", null, "[\\* TO this]");
assertQueryEquals("[* TO this]", null, "[\\* TO this]");
assertQueryEquals("[\"this\" TO \"that*\"]", null, "[this TO that*]");
// TODO: verify this is correct (this phrase is not a phrase inside a range query)
assertQueryEquals("[\"this phrase\" TO \"that phrase*\"]", null, "[this phrase TO that phrase*]");
assertQueryEquals("[\"#$%^&\" TO \"&*()\"]", wsa, "[#$%^& TO &*()]");
assertQueryEquals("+a:[this TO that]", null, "a:[this TO that]");
assertQueryEquals("+a:[ this TO that ]", null, "a:[this TO that]");
assertQueryEquals("year:[2000 TO *]", null, "year:[2000 TO \\*]");
}
public void testModifiers() throws Exception {
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("jakarta^4 apache", null, "+(jakarta)^4.0 +apache");
assertQueryEquals("\"jakarta apache\"^4 \"Apache Lucene\"", null, "+(\"jakarta apache\")^4.0 +\"apache lucene\"");
assertQueryEquals("this +(that thus)^7", null, "+this +(+that +thus)^7.0");
assertQueryEquals("this (+(that)^7)", null, "+this +(that)^7.0");
assertQueryEquals("roam~", null, "roam~2", FuzzyQuery.class);
assertQueryEquals("roam~0.8", null, "roam~0.8", SlowFuzzyQuery.class);
assertQueryEquals("roam~0.899999999", null, "roam~0.9");
assertQueryEquals("roam^", null, "(roam)^1.0");
assertQueryEquals("roam^0.8", null, "(roam)^0.8");
assertQueryEquals("roam^0.899999999", null, "(roam)^0.9");
assertQueryEquals("roam^8", null, "(roam)^8.0");
// this should fail
assertQueryNodeException("roam^~");
assertQueryEquals("roam^0.8~", null, "(roam~2)^0.8");
assertQueryEquals("roam^0.899999999~0.5", null, "(roam~0.5)^0.9");
// should this fail?
assertQueryEquals("roam~^", null, "(roam~2)^1.0");
assertQueryEquals("roam~0.8^", null, "(roam~0.8)^1.0");
assertQueryEquals("roam~0.899999999^0.5", null, "(roam~0.9)^0.5");
// with wsa analyzer the 5 is retained as a token
assertQueryEquals("this^ 5", wsa, "+(this)^1.0 +5");
// with standard tokenizer, it goes away
assertQueryEquals("this^ 5", null, "(this)^1.0");
assertQueryEquals("this^0. 5", wsa, "+this +5");
assertQueryEquals("/this^0. 5/", wsa, "/this^0. 5/");
assertQueryEquals("this^0.4 5", wsa, "+(this)^0.4 +5");
assertQueryEquals("this^5~ 9", null, "(this~2)^5.0");
assertQueryEquals("this^5~ 9", wsa, "+(this~2)^5.0 +9");
assertQueryEquals("9999", wsa, "9999");
assertQueryEquals("9999.1", wsa, "9999.1");
assertQueryEquals("0.9999", wsa, "0.9999");
assertQueryEquals("00000000.9999", wsa, "00000000.9999");
// tilda used for phrases has a different meaning (it is not a fuzzy paramater)
// but a proximity operator, thus it can be >= 1.0
assertQueryEquals("\"weak lensing\"~", null, "\"weak lensing\"~2");
assertQueryEquals("\"jakarta apache\"~10", null, "\"jakarta apache\"~10");
assertQueryEquals("\"jakarta apache\"^10", null, "(\"jakarta apache\")^10.0");
assertQueryEquals("\"jakarta apache\"~10^", null, "(\"jakarta apache\"~10)^1.0");
assertQueryEquals("\"jakarta apache\"^10~", null, "(\"jakarta apache\"~2)^10.0");
assertQueryEquals("\"jakarta apache\"~10^0.6", null, "(\"jakarta apache\"~10)^0.6");
assertQueryEquals("\"jakarta apache\"^10~0.6", null, "(\"jakarta apache\")^10.0");
assertQueryEquals("\"jakarta apache\"^10~2.4", null, "(\"jakarta apache\"~2)^10.0");
// switching-off analysis for individual tokens:
// this is an example of how complex the query parsing can be, and impossible
// without a powerful builder (this would just be unthinkable with the standard
// lucene parser and impossible with the invenio parser)
assertQueryEquals("#5", null, "MatchNoDocsQuery(\"\")");
assertQueryEquals("#(request synonyms 5)", null, "+request +synonyms");
assertQueryEquals("this and (one #5)", null, "+this +(+one)");
assertQueryEquals("this and (one #5)", wsa, "+this +(+one +5)");
assertQueryEquals("=5", null, "5");
assertQueryEquals("=(request synonyms 5)", null, "+request +synonyms +5");
assertQueryEquals("this and (one =5)", null, "+this +(+one +5)");
assertQueryEquals("this and (one =5)", wsa, "+this +(+one +5)");
}
public void testExceptions() throws Exception {
assertQueryNodeException("this (+(((+(that))))");
assertQueryNodeException("this (++(((+(that)))))");
assertQueryNodeException("this (+(((+(that))))");
assertQueryNodeException("this (++(((+(that)))))");
//assertQueryNodeException("escape:(\\+\\-\\&\\&\\|\\|\\!\\(\\)\\{\\}\\[\\]\\^\\\"\\~\\*\\?\\:\\\\)");
assertQueryNodeException("[]");
assertQueryNodeException("+field:");
assertQueryNodeException("=");
assertQueryNodeException("one ^\"author phrase\"");
// this parses well, shall we consider it mistake?
//assertQueryNodeException("one ^\"author phrase\"$");
assertQueryNodeException("this =and that");
assertQueryNodeException("(doi:tricky:01235)");
}
public void testWildCards() throws Exception {
Query q = null;
q = assertQueryEquals("te?t", null, "te?t", WildcardQuery.class);
q = assertQueryEquals("test*", null, "test*", WildcardQuery.class);
assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
q = assertQueryEquals("test?", null, "test?", WildcardQuery.class);
assertEquals(MultiTermQuery.CONSTANT_SCORE_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
assertQueryEquals("te*t", null, "te*t", WildcardQuery.class);
assertQueryEquals("*te*t", null, "*te*t", WildcardQuery.class);
assertQueryEquals("*te*t*", null, "*te*t*", WildcardQuery.class);
assertQueryEquals("?te*t?", null, "?te*t?", WildcardQuery.class);
assertQueryEquals("te?t", null, "te?t", WildcardQuery.class);
assertQueryEquals("te??t", null, "te??t", WildcardQuery.class);
assertQueryNodeException("te*?t");
assertQueryNodeException("te?*t");
// as I am discovering, there is no such a thing as a quoted wildcard
// query, it just turns into a regular wildcard query, well...
assertQueryEquals("\"te*t phrase\"", null, "te*t phrase", WildcardQuery.class);
assertQueryEquals("\"test* phrase\"", null, "test* phrase", WildcardQuery.class);
assertQueryEquals("\"te*t phrase\"", null, "te*t phrase", WildcardQuery.class);
assertQueryEquals("\"*te*t phrase\"", null, "*te*t phrase", WildcardQuery.class);
assertQueryEquals("\"*te*t* phrase\"", null, "*te*t* phrase", WildcardQuery.class);
assertQueryEquals("\"?te*t? phrase\"", null, "?te*t? phrase", WildcardQuery.class);
assertQueryEquals("\"te?t phrase\"", null, "te?t phrase", WildcardQuery.class);
assertQueryEquals("\"te??t phrase\"", null, "te??t phrase", WildcardQuery.class);
assertQueryEquals("\"te*?t phrase\"", null, "te*?t phrase", WildcardQuery.class);
assertQueryEquals("*", null, "*:*", MatchAllDocsQuery.class);
assertQueryEquals("*:*", null, "*:*", MatchAllDocsQuery.class);
assertQueryEquals("?", null, "?", WildcardQuery.class);
// XXX: in fact, in the WildcardQuery, even escaped start \* will become *
// so it is not possible to search for words that contain * as a literal
// character, to have it differently, WildcardTermEnum class would have
// to think of skipping \* and \?
q = assertQueryEquals("*t\\*a", null, "*t*a", WildcardQuery.class);
assertQueryEquals("*t*a\\*", null, "*t*a*", WildcardQuery.class);
assertQueryEquals("*t*a\\?", null, "*t*a?", WildcardQuery.class);
assertQueryEquals("*t*\\a", null, "*t*a", WildcardQuery.class);
assertQueryEquals("title:*", null, "title:*", PrefixQuery.class);
assertQueryEquals("doi:*", null, "doi:*", PrefixQuery.class);
}
public void testEscaped() throws Exception {
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("\\(1\\+1\\)\\:2", wsa, "(1+1):2", TermQuery.class);
assertQueryEquals("th\\*is", wsa, "th*is", TermQuery.class);
assertQueryEquals("a\\\\\\\\+b", wsa, "a\\\\+b", TermQuery.class);
assertQueryEquals("a\\u0062c", wsa, "abc", TermQuery.class);
assertQueryEquals("\\*t", wsa, "*t", TermQuery.class);
}
/**
* TODO: x NEAR/2 y
* x:four -field:(-one +two x:three)
* "\"func(*) AND that\"" (should not be analyzed; AND becomes and)
*
*/
public void testBasics() throws Exception{
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
KeywordAnalyzer kwa = new KeywordAnalyzer();
assertQueryEquals("keyword:\"planets and satellites\"", wsa, "keyword:\"planets and satellites\"", PhraseQuery.class);
assertQueryEquals("full:*", null, "full:*", PrefixQuery.class);
assertQueryEquals("weak lensing", null, "+weak +lensing");
assertQueryEquals("+contact +binaries -eclipsing", null, "+contact +binaries -eclipsing");
assertQueryEquals("+contact +foo:binaries -eclipsing", null, "+contact +foo:binaries -eclipsing");
assertQueryEquals("intitle:\"yellow symbiotic\"", null, "intitle:\"yellow symbiotic\"");
assertQueryEquals("\"galactic rotation\"", null, "\"galactic rotation\"", PhraseQuery.class);
assertQueryEquals("title:\"X x\" AND text:go title:\"x y\" AND A", null, "+title:\"x x\" +text:go +title:\"x y\" +a");
assertQueryEquals("title:\"X x\" OR text:go title:\"x y\" OR A", null, "+(title:\"x x\" text:go) +(title:\"x y\" a)");
assertQueryEquals("title:X Y Z", null, "+title:x +y +z");
assertQueryEquals("title:(X Y Z)", null, "+title:x +title:y +title:z");
assertQueryEquals("\"jakarta apache\" OR jakarta", null, "\"jakarta apache\" jakarta");
assertQueryEquals("\"jakarta apache\" AND \"Apache Lucene\"", null, "+\"jakarta apache\" +\"apache lucene\"");
assertQueryEquals("\"jakarta apache\" NOT \"Apache Lucene\"", null, "+\"jakarta apache\" -\"apache lucene\"");
assertQueryEquals("(jakarta OR apache) AND website", null, "+(jakarta apache) +website");
assertQueryEquals("weak NEAR lensing", null, "spanNear([weak, lensing], 5, true)");
assertQueryEquals("weka NEAR2 lensing", null, "spanNear([weka, lensing], 2, true)");
assertQueryEquals("a -b", null, "+a -b");
assertQueryEquals("a +b", null, "+a +b");
assertQueryEquals("A–b", null, "+a +b"); // em dash is not an operator
assertQueryEquals("A + b", null, "+a +b");
assertQueryEquals("+jakarta lucene", null, "+jakarta +lucene");
assertQueryEquals("+jakarta OR lucene", null, "+jakarta lucene");
//setDebug(true);
assertQueryEquals("this (that)", null, "+this +that");
assertQueryEquals("this ((that))", null, "+this +that");
assertQueryEquals("(this) ((((((that))))))", null, "+this +that");
assertQueryEquals("(this) (that)", null, "+this +that");
assertQueryEquals("this +(that)", null, "+this +that");
assertQueryEquals("this OR +(that)", null, "this +that");
assertQueryEquals("this ((((+(that)))))", null, "+this +that");
assertQueryEquals("this (+(((+(that)))))", null, "+this +that");
assertQueryEquals("this +((((+(that)))))", null, "+this +that");
assertQueryEquals("this +(+((((that)))))", null, "+this +that");
assertQueryEquals("(this that)", null, "+this +that");
assertQueryEquals("(foo:this that)", null, "+foo:this +that");
assertQueryEquals("title:(+return +\"pink panther\")", null, "+title:return +title:\"pink panther\"");
assertQueryEquals("field:(one two three)", null, "+one +two +three");
assertQueryEquals("field:(one OR two OR three)", null, "one two three");
assertQueryEquals("fieldx:(one +two -three)", null, "+fieldx:one +fieldx:two -fieldx:three");
assertQueryEquals("+field:(-one +two three)", null, "-one +two +three");
assertQueryEquals("-field:(-one +two three)", null, "-one +two +three");
assertQueryEquals("+field:(-one +two three) x:four", null, "+(-one +two +three) +x:four");
assertQueryEquals("x:four -field:(-one +two three)", null, "+x:four -(-one +two +three)");
//TODO: the last x: field is overwritten, a bug, a feature?
assertQueryEquals("x:four -foo:(-one +two x:three)", null, "+x:four -(-foo:one +foo:two +foo:three)");
//XXX: I know about this bug, but having no time to fix, higher priorities...
assertQueryEquals("x:four -foo:(-one two x:three)", null, "+x:four -(-foo:one +foo:two +foo:three)");
assertQueryEquals("x:a -f:(-b c x:z)", null, "+x:a -(-f:b +f:c +f:z)");
assertQueryEquals("a test:(one)", null, "+a +test:one");
assertQueryEquals("a test:(a)", null, "+a +test:a");
assertQueryEquals("test:(one)", null, "test:one");
assertQueryEquals("field: (one)", null, "one");
assertQueryEquals("field:( one )", null, "one");
assertQueryEquals("+value", null, "value");
assertQueryEquals("-value", null, "value"); //? should we allow - at the beginning?
assertQueryEquals("m:(a b c)", null, "+m:a +m:b +m:c");
assertQueryEquals("+m:(a b c)", null, "+m:a +m:b +m:c");
assertQueryEquals("+m:(a b c) +x:d", null, "+(+m:a +m:b +m:c) +x:d");
assertQueryEquals("+m:(a b c) x:d", null, "+(+m:a +m:b +m:c) +x:d");
assertQueryEquals("+m:(a b c) OR x:d", null, "+(+m:a +m:b +m:c) x:d");
assertQueryEquals("+m:(a b c) -x:d", null, "+(+m:a +m:b +m:c) -x:d");
assertQueryEquals("+x:d +m:(a b c)", null, "+x:d +(+m:a +m:b +m:c)");
assertQueryEquals("x:d +m:(a b c)", null, "+x:d +(+m:a +m:b +m:c)");
assertQueryEquals("x:d OR +m:(a b c)", null, "x:d +(+m:a +m:b +m:c)");
assertQueryEquals("-x:d +m:(a b c)", null, "-x:d +(+m:a +m:b +m:c)");
assertQueryEquals("+x:d +m:(a b c) +y:e", null, "+x:d +(+m:a +m:b +m:c) +y:e");
assertQueryEquals("x:d +m:(a b c) y:e", null, "+x:d +(+m:a +m:b +m:c) +y:e");
assertQueryEquals("x:d OR +m:(a b c) OR y:e", null, "x:d +(+m:a +m:b +m:c) y:e");
assertQueryEquals("-x:d +m:(a b c) -y:e", null, "-x:d +(+m:a +m:b +m:c) -y:e");
assertQueryEquals("+m:(a OR b OR c) x:d", null, "+(m:a m:b m:c) +x:d");
assertQueryEquals("m:(+a b c)", null, "+m:a +m:b +m:c");
assertQueryEquals("m:(+a b OR c)", null, "+m:a +(m:b m:c)");
assertQueryEquals("m:(-a +b c)^0.6", null, "(-m:a +m:b +m:c)^0.6");
assertQueryEquals("m:(a b c or d)", null, "+m:a +m:b +(m:c m:d)");
assertQueryEquals("m:(a b c OR d)", null, "+m:a +m:b +(m:c m:d)");
assertQueryEquals("m:(a b c AND d)", null, "+m:a +m:b +(+m:c +m:d)");
assertQueryEquals("m:(a b c OR d NOT e)", null, "+m:a +m:b +(m:c (+m:d -m:e))");
assertQueryEquals("m:(a b NEAR c)", null, "+m:a +spanNear([m:b, m:c], 5, true)");
assertQueryEquals("m:(a b NEAR c d AND e)", null, "+m:a +spanNear([m:b, m:c], 5, true) +(+m:d +m:e)");
assertQueryEquals("-m:(a b NEAR c d AND e)", null, "+m:a +spanNear([m:b, m:c], 5, true) +(+m:d +m:e)"); //? should we allow - at the beginning?
assertQueryEquals("m:(a b NEAR2 c)", null, "+m:a +spanNear([m:b, m:c], 2, true)");
assertQueryEquals("m:(a b NEAR3 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 3, true) +(+m:d +m:e)");
assertQueryEquals("-m:(a b NEAR4 c d AND e)", null, "+m:a +spanNear([m:b, m:c], 4, true) +(+m:d +m:e)");
assertQueryNodeException("m:(a b NEAR7 c)"); // by default, only range 1-5 is allowed (in configuration)
assertQueryEquals("author:(muench-nashrallah)", wsa, "author:muench-nashrallah");
assertQueryEquals("\"dark matter\" OR (dark matter -LHC)", null, "\"dark matter\" (+dark +matter -lhc)");
assertQueryEquals("this999", wsa, "this999");
assertQueryEquals("this0.9", wsa, "this0.9");
assertQueryEquals("\"a \\\"b c\\\" d\"", wsa, "\"a \"b c\" d\"", PhraseQuery.class);
assertQueryEquals("\"a \\\"b c\\\" d\"", wsa, "\"a \"b c\" d\"", PhraseQuery.class);
assertQueryEquals("\"a \\+b c d\"", wsa, "\"a +b c d\"");
assertQueryEquals("\"+() AND that\"", wsa, "\"+() AND that\"");
assertQueryEquals("\"func(a) AND that\"", wsa, "\"func(a) AND that\"");
// TODO: something funny happens with quoted-truncated (it is analyzed)
//assertQueryEquals("\"func(*) AND that\"", wsa, "\"func(*) AND that\"");
assertQueryEquals("CO2+", wsa, "CO2+", TermQuery.class);
}
public void _testMultiToken() throws Exception{
KeywordAnalyzer a = new KeywordAnalyzer();
//setDebug(true);
assertQueryEquals("weak lensing", null, "weak lensing");
assertQueryEquals("all:weak lensing", null, "all:weak lensing");
assertQueryEquals("weak all:lensing", null, "weak all:lensing");
// the grammar parses this as: A B D -E (which is actually correct)
assertQueryEquals("A B (D -E)", null, "A B (D -E)");
assertQueryEquals("A B +(D -E)", null, "A B (D -E)");
assertQueryEquals("weak lensing", null, "+weak lensing");
assertQueryEquals("weak lensing", null, "+weak lensing");
assertQueryEquals("weak lensing", null, "+weak lensing");
}
public void testRegex() throws Exception{
WhitespaceAnalyzer wsa = new WhitespaceAnalyzer();
assertQueryEquals("/foo$/", wsa, "/foo$/", RegexpQuery.class);
assertQueryEquals("keyword:/foo$/", wsa, "keyword:/foo$/", RegexpQuery.class);
assertQueryEquals("keyword:/^foo$/", wsa, "keyword:/^foo$/", RegexpQuery.class);
assertQueryEquals("keyword:/^foo$/ AND \"foo bar\"", wsa, "+keyword:/^foo$/ +\"foo bar\"", BooleanQuery.class);
}
public void testDelimiters() throws Exception {
KeywordAnalyzer wsa = new KeywordAnalyzer();
assertQueryEquals("What , happens,with commas ,,",
null,
"+what +happens +with +commas",
BooleanQuery.class);
// this instructs parser to concatenate unfielded values
parserArgs.put("aqp.unfielded.tokens.strategy", "join");
assertQueryEquals("What , happens,with commas ,,",
null,
"+what +happens +with +commas",
BooleanQuery.class);
assertQueryEquals("What ; happens;with semicolons ;;",
null,
"+what +happens +with +semicolons",
BooleanQuery.class);
// now using different analyzer
assertQueryEquals("What , happens,with commas ,,",
wsa,
"What , happens,with commas ,,",
TermQuery.class);
assertQueryEquals("What ; happens;with semicolons ;;",
wsa,
"What ; happens;with semicolons ;;",
TermQuery.class);
// deactivate joining
parserArgs.clear();
assertQueryEquals("What , happens,with commas ,,",
null,
"+what +happens +with +commas",
BooleanQuery.class);
assertQueryEquals("What ; happens;with semicolons ;;",
null,
"+what +happens +with +semicolons",
BooleanQuery.class);
// now using different analyzer
assertQueryEquals("What , happens,with commas ,,",
wsa,
"+What +happens +with +commas",
BooleanQuery.class);
assertQueryEquals("What ; happens;with semicolons ;;",
wsa,
"+What +happens +with +semicolons",
BooleanQuery.class);
}
public void testMultipleTokenConcatenation() throws Exception {
KeywordAnalyzer kwa = new KeywordAnalyzer();
// this should be concatenated into one phrase
parserArgs.put("aqp.unfielded.tokens.strategy", "join");
parserArgs.put("aqp.unfielded.tokens.new.type", "phrase");
assertQueryEquals("foo:(A)", null, "foo:a");
assertQueryEquals("foo:(A -B)", null, "+foo:a -foo:b");
assertQueryEquals("foo:(A B D E)", null, "+foo:a +foo:b +foo:d +foo:e"); // but this is fielded
assertQueryEquals("A B D E", null, "\"a b d e\"");
assertQueryEquals("+A B D E", null, "\"a b d e\"");
assertQueryEquals("A +B D E", null, "+a +\"b d e\"");
assertQueryEquals("+(A B D E)", null, "\"a b d e\"");
//setDebug(true);
assertQueryEquals("=(A B D E)", null, "+A +B +D +E"); // '=' modifier makes it reject concatenation
assertQueryEquals("+foo:z +A B D E", null, "+foo:z +\"a b d e\"");
assertQueryEquals("+A B D E +foo:z", null, "+\"a b d e\" +foo:z");
// this should add new phrase
parserArgs.put("aqp.unfielded.tokens.strategy", "add");
parserArgs.put("aqp.unfielded.tokens.new.type", "phrase");
assertQueryEquals("foo:(A)", null, "foo:a");
assertQueryEquals("foo:(A -B)", null, "+foo:a -foo:b");
assertQueryEquals("foo:(A B D E)", null, "+foo:a +foo:b +foo:d +foo:e"); // fielded
assertQueryEquals("A B D E", null, "(+a +b +d +e) \"a b d e\"");
assertQueryEquals("+A B D E", null, "(+a +b +d +e) \"a b d e\"");
assertQueryEquals("A +B D E", null, "+a +((+b +d +e) \"b d e\")");
assertQueryEquals("+foo:z +A B D E", null, "+foo:z +((+a +b +d +e) \"a b d e\")");
assertQueryEquals("+A B D E +foo:z", null, "+((+a +b +d +e) \"a b d e\") +foo:z");
// this should add new token (which will be analyzed by analyzer and can produce st different)
parserArgs.put("aqp.unfielded.tokens.strategy", "add");
parserArgs.put("aqp.unfielded.tokens.new.type", "normal");
assertQueryEquals("foo:(A)", null, "foo:a");
assertQueryEquals("foo:(A -B)", null, "+foo:a -foo:b");
assertQueryEquals("foo:(A B D E)", kwa, "+foo:A +foo:B +foo:D +foo:E"); // fielded
assertQueryEquals("A B D E", kwa, "(+A +B +D +E) A B D E");
assertQueryEquals("+A B D E", kwa, "(+A +B +D +E) A B D E");
assertQueryEquals("A +B D E", kwa, "+A +((+B +D +E) B D E)");
assertQueryEquals("+foo:z +A B D E", null, "+foo:z +((+a +b +d +e) (+a +b +d +e))");
assertQueryEquals("+A B D E +foo:z", null, "+((+a +b +d +e) (+a +b +d +e)) +foo:z");
parserArgs.put("aqp.unfielded.tokens.strategy", "add");
parserArgs.put("aqp.unfielded.tokens.new.type", "phrase");
assertQueryEquals("author:(huchra)", null, "author:huchra");
// without field specific logic, we don't understand this as elements of the same name
assertQueryEquals("author:(huchra, j)", null, "+author:huchra +author:j");
assertQueryEquals("author:(kurtz; -eichhorn, g)", kwa, "+author:kurtz -author:eichhorn +author:g");
assertQueryEquals("author:(kurtz; -\"eichhorn, g\")", null, "+author:kurtz -author:\"eichhorn g\"");
// here the stakes are higher - we don't understand that the next value
// is not author
parserArgs.put("aqp.unfielded.tokens.strategy", "add");
parserArgs.put("aqp.unfielded.tokens.new.type", "phrase");
assertQueryEquals("author:huchra nasa", null, "(+author:huchra +nasa) author:\"huchra nasa\"");
assertQueryEquals("author:accomazzi property:refereed apj", null, "+author:accomazzi +((+property:refereed +apj) property:\"refereed apj\")");
// this is the best what we can do - it is similar to the strategy
// above, only that in SOLR the values can be analyzed properly by edismax
// in this test, lucene cant handle it well...
parserArgs.put("aqp.unfielded.tokens.strategy", "multiply");
parserArgs.put("aqp.unfielded.tokens.new.type", "phrase");
assertQueryEquals("author:huchra nasa", kwa, "author:huchra nasa author:\"huchra nasa\"");
assertQueryEquals("author:accomazzi property:refereed apj", kwa, "+author:accomazzi +(property:refereed apj property:\"refereed apj\")");
}
public static junit.framework.Test suite() {
return new junit.framework.JUnit4TestAdapter(TestAqpAdsabs.class);
}
}