package org.apache.solr.search; import java.io.File; import java.io.IOException; import monty.solr.util.MontySolrQueryTestCase; import monty.solr.util.MontySolrSetup; import org.apache.lucene.queryparser.flexible.aqp.TestAqpAdsabs; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.SecondOrderQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanPositionRangeQuery; import org.junit.BeforeClass; /** * This unittest is for queries that require solr core * * @author rchyla * <p> * XXX: I was uneasy about the family of these tests * because they depend on the settings from the other * project (contrib/examples) - on the other hand, I * don't want to duplicate the code/config files. So, * for now I resigned, and I think of contrib/examples * as a dependency for adsabs * <p> * contrib/examples should contain only a code for the * live site (setup), but we are developing components * for it here and we'll test it here * (inside contrib/adsabs) * <p> * Let's do it pragmatically (not as code puritans) * @see TestAqpAdsabs for the other tests */ public class TestAqpAdsabsSolrSearch extends MontySolrQueryTestCase { @BeforeClass public static void beforeClass() throws Exception { makeResourcesVisible(Thread.currentThread().getContextClassLoader(), new String[]{MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1", MontySolrSetup.getSolrHome() + "/example/solr/collection1" }); System.setProperty("solr.allow.unsafe.resourceloading", "true"); schemaString = getSchemaFile(); configString = MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1/solrconfig.xml"; initCore(configString, schemaString, MontySolrSetup.getSolrHome() + "/example/solr"); } public static String getSchemaFile() { /* * For purposes of the test, we make a copy of the schema.xml, * and create our own synonym files */ String configFile = MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1/schema.xml"; File newConfig; try { newConfig = duplicateFile(new File(configFile)); File multiSynonymsFile = createTempFile(new String[]{ "hubble\0space\0telescope, HST", "r\0s\0t, RST", "dark\0energy, DE" }); replaceInFile(newConfig, "synonyms=\"ads_text_multi.synonyms\"", "synonyms=\"" + multiSynonymsFile.getAbsolutePath() + "\""); File synonymsFile = createTempFile(new String[]{ "weak => lightweak", "lensing => mikrolinseneffekt", "pink => pinkish", "stephen, stephens => stephen", "bremßtrahlung => brehmen" }); replaceInFile(newConfig, "synonyms=\"ads_text_simple.synonyms\"", "synonyms=\"" + synonymsFile.getAbsolutePath() + "\""); // hand-curated synonyms File curatedSynonyms = createTempFile(new String[]{ "JONES, CHRISTINE;FORMAN, CHRISTINE" // the famous post-synonym expansion }); replaceInFile(newConfig, "synonyms=\"author_curated.synonyms\"", "synonyms=\"" + curatedSynonyms.getAbsolutePath() + "\""); } catch (IOException e) { e.printStackTrace(); throw new IllegalStateException(e.getMessage()); } return newConfig.getAbsolutePath(); } public void testUnfieldedSearch() throws Exception { /* * Unfielded search should be expanded automatically by edismax * * However, edismax is not smart enough to deal properly with boolean clauses * and default operators, so I have decided to use the edismax on the "value" * level only. First, we parse the query, then we pass it to the 'adismax' * query parser (a modified edismax) to expand it; adismax will use aqp * to build the individual queries - so it is best of both worlds * */ // first the individual elements explicitly (notice edismax differs from adismax) assertQueryEquals(req("defType", "aqp", "q", "adismax(MÜLLER)", "qf", "author^2.3 title abstract^0.4"), "(" + "((abstract:acr::müller abstract:acr::muller))^0.4 | " + "((author:müller, author:müller,* author:mueller, author:mueller,* author:muller, author:muller,*))^2.3 | " + "((title:acr::müller title:acr::muller))" + ")", DisjunctionMaxQuery.class); assertQueryEquals(req("defType", "aqp", "q", "edismax(MÜLLER)", "qf", "author^2.3 title abstract^0.4"), "(" + "(Synonym(abstract:acr::muller abstract:acr::müller))^0.4 | " + "Synonym(title:acr::muller title:acr::müller) | " + "(Synonym(author:mueller, author:muller, author:müller,))^2.3" + ")", DisjunctionMaxQuery.class); // unfielded search should handle authors like adismax (with expansions) assertQueryEquals(req("defType", "aqp", "q", "MÜLLER", "qf", "author^2.3 title abstract^0.4"), "(" + "((abstract:acr::müller abstract:acr::muller))^0.4 | " + "((author:müller, author:müller,* author:mueller, author:mueller,* author:muller, author:muller,*))^2.3 | " + "((title:acr::müller title:acr::muller))" + ")", DisjunctionMaxQuery.class); assertQueryEquals(req("defType", "aqp", "q", "\"forman, c\"", "qf", "author^2.3 title abstract^0.4"), "((abstract:\"forman c\")^0.4 | ((author:forman, c author:forman, christine author:jones, c author:jones, christine author:forman, c* author:forman,))^2.3 | title:\"forman c\")", DisjunctionMaxQuery.class); // now add a normal element assertQueryEquals(req("defType", "aqp", "q", "title:foo or MÜLLER", "qf", "author^2.3 title abstract^0.4"), "title:foo (((abstract:acr::müller abstract:acr::muller))^0.4 | ((author:müller, author:müller,* author:mueller, author:mueller,* author:muller, author:muller,*))^2.3 | ((title:acr::müller title:acr::muller)))", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "title:foo or \"forman, c\"", "qf", "author^2.3 title abstract^0.4"), "title:foo ((abstract:\"forman c\")^0.4 | ((author:forman, c author:forman, christine author:jones, c author:jones, christine author:forman, c* author:forman,))^2.3 | title:\"forman c\")", BooleanQuery.class); // this should not call edismax (because qf is missing) assertQueryEquals(req("defType", "aqp", "q", "accomazzi", "df", "author"), "author:accomazzi, author:accomazzi,*", BooleanQuery.class); /* * Now various cases of multi-token unfielded searches (incl multi-token synonyms) * and full author parse */ // this is default behaviour, if you see 'all:' it means edismax didn't parse it assertQueryEquals(req("defType", "aqp", "q", "author:accomazzi, alberto property:refereed apj"), "+(author:accomazzi, author:accomazzi,*) +all:alberto +property:refereed +all:apj", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:huchra supernova"), "+(author:huchra, author:huchra,*) +all:supernova", BooleanQuery.class); // smarter handling of missing parentheses/brackets with the special strategy // i expect following: // edismax receives: 'author:accomazzi, alberto' and also 'author:"accomazzi, alberto" // "" : 'property:refereed r s t' and 'property:"refereed r s t"' assertQueryEquals(req("defType", "aqp", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword", "q", "author:accomazzi, alberto property:refereed r s t"), "+(" + "(+((author:accomazzi, author:accomazzi,*)) +(keyword:alberto | title:alberto)) " + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)" + ") " + "+(" + "(+property:refereed +(keyword:r | title:r) +(keyword:s | title:s) +(keyword:t | title:t)) property:refereedrst" + ")", BooleanQuery.class); // the same as above + enhanced by multisynonym // i expect to see syn::r s t, syn::acr::rst assertQueryEquals(req("defType", "aqp", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "q", "author:accomazzi, alberto property:refereed r s t", "qf", "title keyword^0.5"), "+((+((author:accomazzi, author:accomazzi,*)) +((keyword:alberto)^0.5 | title:alberto)) " + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)) " +"+((+property:refereed +((keyword:r)^0.5 | title:r) +((keyword:s)^0.5 | title:s) +((keyword:t)^0.5 | title:t)) property:refereedrst)", BooleanQuery.class); //#238 - single synonyms were caught by the multi-synonym component // also note: // the 'qf' is not set, but still edismax is responsible for parsing this query // and since edismax is using default OR (that is hardcoded!), we cannot change // that, we would have to parse the query ourselves; but after a long discussion // it was decided that we'll use OR for the unfielded searches, so it should be // OK - if not, I have to rewrite edismax parsing logic myself // 22/10/13 - I've introduced a new strategy that emits both the // original query string and the phrase query, this is a workaround // for edismax // 30/09/16 - edismax is using default AND assertQueryEquals(req("defType", "aqp", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "q", "pink elephant"), "(+(((all:pink all:syn::pinkish))) +(all:elephant)) all:\"(pink syn::pinkish) elephant\"", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "pink elephant", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword"), "(+(((keyword:pink keyword:syn::pinkish)) | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:\"(pink syn::pinkish) elephant\" | title:\"(pink syn::pinkish) elephant\")", BooleanQuery.class); // when combined, the ADS's default AND operator should be visible +foo assertQueryEquals(req("defType", "aqp", "q", "pink elephant title:foo", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "qf", "title keyword"), "+((+(((keyword:pink keyword:syn::pinkish)) | ((title:pink title:syn::pinkish))) +(keyword:elephant | title:elephant)) (keyword:\"(pink syn::pinkish) elephant\" | title:\"(pink syn::pinkish) elephant\")) +title:foo", BooleanQuery.class); // multi-token combined with single token // the unfielded search should be exapnded with the phrase "x r s t" // and "r s t" should be properly analyzed into: "x rst" OR "x r s t" assertQueryEquals(req("defType", "aqp", "q", "r s t", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "qf", "title^0.9 keyword^0.7"), "(+((keyword:r)^0.7 | (title:r)^0.9) +((keyword:s)^0.7 | (title:s)^0.9) +((keyword:t)^0.7 | (title:t)^0.9)) (((keyword:\"r s t\" keyword:syn::r s t keyword:syn::acr::rst))^0.7 | ((title:\"r s t\" title:syn::r s t title:syn::acr::rst))^0.9)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "x r s t y", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.phrase.edismax.synonym.workaround", "true", "qf", "title^0.9 keyword_norm^0.7"), "(+((keyword_norm:x)^0.7 | (title:x)^0.9) +((keyword_norm:r)^0.7 | (title:r)^0.9) +((keyword_norm:s)^0.7 | (title:s)^0.9) +((keyword_norm:t)^0.7 | (title:t)^0.9) +((keyword_norm:y)^0.7 | (title:y)^0.9)) ((keyword_norm:\"x r s t y\")^0.7 | ((title:\"x r s t y\" title:\"x (syn::r s t syn::acr::rst) ? ? y\"~2))^0.9)", BooleanQuery.class); // author search, unfielded (which looks as one token) - it looks like that to // adismax, but aqp will see two tokens... // the result is crazy because of recursive parsing // 1: accomazzi,alberto // 2: author:accomazzi,alberto -> author:accomazzi AND adismax(alberto) // 3: adismax(alberto) -> title:alberto OR author:alberto^2.3 assertQueryEquals(req("defType", "aqp", "q", "accomazzi,alberto", "qf", "author^2.3 title", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple" ), "(" + "((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))^2.3 " + "| ((+title:accomazzi +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))) " + "(((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))^2.3 | ((title:\"accomazzi alberto\" title:accomazzialberto)))", BooleanQuery.class); // see what happens during normal parsing // author search, unfielded (which looks as one token) assertQueryEquals(req("defType", "aqp", "q", "author:accomazzi,alberto" ), "+(author:accomazzi, author:accomazzi,*) +all:alberto", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:accomazzi,alberto", "qf", "author^2.3 title", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple" ), "(((+(author:accomazzi, author:accomazzi,*) +(((author:alberto, author:alberto,*))^2.3 | title:alberto)))~1) (((author:accomazzi, alberto author:accomazzi, alberto * author:accomazzi, a author:accomazzi, a * author:accomazzi,))~1)", BooleanQuery.class); } public void testSpecialCases() throws Exception { // inconsistency disabling synonyms: #39 assertQueryEquals(req("defType", "aqp", "q", "full:bremßtrahlung"), "(ack:bremßtrahlung ack:bremsstrahlung ack:syn::brehmen) " + "(abstract:bremßtrahlung abstract:bremsstrahlung abstract:syn::brehmen)^2.0 " + "(title:bremßtrahlung title:bremsstrahlung title:syn::brehmen)^2.0 " + "(body:bremßtrahlung body:bremsstrahlung body:syn::brehmen) " + "(keyword:bremßtrahlung keyword:bremsstrahlung keyword:syn::brehmen)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=full:bremßtrahlung"), "(ack:bremßtrahlung ack:bremsstrahlung) " + "(abstract:bremßtrahlung abstract:bremsstrahlung)^2.0 " + "(title:bremßtrahlung title:bremsstrahlung)^2.0 " + "(body:bremßtrahlung body:bremsstrahlung) " + "(keyword:bremßtrahlung keyword:bremsstrahlung)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "body:bremßtrahlung"), "body:bremßtrahlung body:bremsstrahlung body:syn::brehmen", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=body:bremßtrahlung"), "body:bremßtrahlung body:bremsstrahlung", BooleanQuery.class); // disable synonyms (also for virtual fiels) - #36 assertQueryEquals(req("defType", "aqp", "q", "abs:\"dark energy\""), "(abstract:\"dark energy\" abstract:syn::dark energy abstract:syn::acr::de) " + "(title:\"dark energy\" title:syn::dark energy title:syn::acr::de) " + "(keyword:\"dark energy\" keyword:syn::dark energy keyword:syn::acr::de)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=abs:\"dark energy\""), "abstract:\"dark energy\" title:\"dark energy\" keyword:\"dark energy\"", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=abs:(\"dark energy\")"), "abstract:\"dark energy\" title:\"dark energy\" keyword:\"dark energy\"", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "abs:(weak)"), "(abstract:weak abstract:syn::lightweak) (title:weak title:syn::lightweak) (keyword:weak keyword:syn::lightweak)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "=abs:(weak)"), "abstract:weak title:weak keyword:weak", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "abs:(=weak weak)"), "+(abstract:weak title:weak keyword:weak) +((abstract:weak abstract:syn::lightweak) (title:weak title:syn::lightweak) (keyword:weak keyword:syn::lightweak))", BooleanQuery.class); // full - virtual field with wrong date assertQueryEquals(req("defType", "aqp", "q", "full:(\"15-52-15050\" OR \"15-32-21062\")"), "((ack:\"15 52 15050\" ack:155215050) (abstract:\"15 52 15050\" abstract:155215050)^2.0 (title:\"15 52 15050\" title:155215050)^2.0 (body:\"15 52 15050\" body:155215050) (keyword:\"15 52 15050\" keyword:155215050)) ((ack:\"15 32 21062\" ack:153221062) (abstract:\"15 32 21062\" abstract:153221062)^2.0 (title:\"15 32 21062\" title:153221062)^2.0 (body:\"15 32 21062\" body:153221062) (keyword:\"15 32 21062\" keyword:153221062))", BooleanQuery.class); // nested functions should parse well: citations(author:"^kurtz") assertQueryEquals(req("defType", "aqp", "q", "citations(author:\"^kurtz\")"), "SecondOrderQuery(spanPosRange(spanOr([author:kurtz,, SpanMultiTermQueryWrapper(author:kurtz,*)]), 0, 1), collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "citations(citations(author:\"^kurtz\"))"), "SecondOrderQuery(SecondOrderQuery(spanPosRange(spanOr([author:kurtz,, SpanMultiTermQueryWrapper(author:kurtz,*)]), 0, 1), collector=SecondOrderCollectorCitedBy(cache:citations-cache)), collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); // #30 - first_author and author:"^fooo" give diff results assertQueryEquals(req("defType", "aqp", "q", "first_author:\"kurtz, m j\"" ), "first_author:kurtz, m j first_author:kurtz, m j* first_author:/kurtz, m[^\\s]+ j.*/ first_author:kurtz, m first_author:kurtz,", BooleanQuery.class ); assertQueryEquals(req("defType", "aqp", "q", "author:\"^kurtz, m j\"" ), "spanPosRange(spanOr([author:kurtz, m j, SpanMultiTermQueryWrapper(author:kurtz, m j*), SpanMultiTermQueryWrapper(author:/kurtz, m[^\\s]+ j.*/), author:kurtz, m, author:kurtz,]), 0, 1)", SpanPositionRangeQuery.class ); // strange effect of paranthesis - github #23; we want to see this even (inside brackets) // +( // ( // ( // DisjunctionMaxQuery((((author:stephen, author:stephen,*)) | ((title:stephen title:syn::stephen)))) // DisjunctionMaxQuery((((author:murray, author:murray, margaret a author:murray, m a author:hanson, m m author:hanson, margaret m author:murray,*)) | title:murray)) // )~2 // ) // DisjunctionMaxQuery(( // ((author:stephen murray, author:stephen murray,* author:murray, stephen author:murray, stephen * author:murray, stephen * author:murray, s author:murray, s * author:murray, s * author:murray, author:murray,*)) // |title:\"(stephen syn::stephen) murray\" // )) // ) // +author_facet_hier:0/Murray, S //setDebug(true); assertQueryEquals(req("defType", "aqp", "q", "stephen murray author_facet_hier:\"0/Murray, S\"", "qf", "abstract title", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.tokens.function.name", "edismax_combined_aqp" ), "+(" + "(+(((abstract:stephen abstract:syn::stephen)) | ((title:stephen title:syn::stephen))) " + "+(abstract:murray | title:murray)) " + "(abstract:\"(stephen syn::stephen) murray\" | title:\"(stephen syn::stephen) murray\")" + ") " + "+author_facet_hier:0/Murray, S", BooleanQuery.class ); assertQueryEquals(req("defType", "aqp", "q", "((stephen murray)) author_facet_hier:\"0/Murray, S\"", "qf", "title abstract", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.tokens.function.name", "edismax_combined_aqp" ), //"+((((((abstract:stephen abstract:syn::stephen)) | ((title:stephen title:syn::stephen))) (abstract:murray | title:murray))~2) (abstract:\"(stephen syn::stephen) murray\" | title:\"(stephen syn::stephen) murray\")) +author_facet_hier:0/Murray, S", "+(" + "(+(((abstract:stephen abstract:syn::stephen)) | ((title:stephen title:syn::stephen))) " + "+(abstract:murray | title:murray)) " + "(abstract:\"(stephen syn::stephen) murray\" | title:\"(stephen syn::stephen) murray\")" + ") " + "+author_facet_hier:0/Murray, S", BooleanQuery.class ); assertQueryEquals(req("defType", "aqp", "q", "=(stephen murray) author_facet_hier:\"0/Murray, S\"", "qf", "title abstract", "aqp.unfielded.tokens.strategy", "multiply", "aqp.unfielded.tokens.new.type", "simple", "aqp.unfielded.tokens.function.name", "edismax_combined_aqp" ), "+(+(abstract:stephen | title:stephen) +(abstract:murray | title:murray)) +author_facet_hier:0/Murray, S", BooleanQuery.class ); // virtual fields (their definition is in the solrconfig.xml) assertQueryEquals(req("defType", "aqp", "q", "full:foo"), "ack:foo (abstract:foo)^2.0 (title:foo)^2.0 body:foo keyword:foo", BooleanQuery.class ); assertQueryEquals(req("defType", "aqp", "q", "full:\"foo phrase\""), "ack:\"foo phrase\" (abstract:\"foo phrase\")^2.0 (title:\"foo phrase\")^2.0 body:\"foo phrase\" keyword:\"foo phrase\"", BooleanQuery.class ); assertQueryEquals(req("defType", "aqp", "q", "abs:foo"), "abstract:foo title:foo keyword:foo", BooleanQuery.class ); // unbalanced brackets for functions assertQueryEquals(req("defType", "aqp", "q", "topn(201, ((\"foo bar\") AND database:astronomy), date asc)"), "SecondOrderQuery(+all:\"foo bar\" +database:astronomy, collector=SecondOrderCollectorTopN(201, info=date asc))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(201, ((\"foo bar\") AND database:astronomy), date asc )"), "SecondOrderQuery(+all:\"foo bar\" +database:astronomy, collector=SecondOrderCollectorTopN(201, info=date asc))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(201,( ((\"foo bar\") AND database:astronomy)),date asc)"), "SecondOrderQuery(+all:\"foo bar\" +database:astronomy, collector=SecondOrderCollectorTopN(201, info=date asc))", SecondOrderQuery.class); // added ability to interactively tweak queries assertQueryEquals(req("defType", "aqp", "q", "tweak(collector_final_value=ARITHM_MEAN, citations(author:foo))"), "SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); // # 389 // make sure the functional parsing is handling things well assertQueryEquals(req("defType", "aqp", "q", "topn(200, ((title:foo OR topn(10, title:bar OR title:baz))))"), "SecondOrderQuery(title:foo SecondOrderQuery(title:bar title:baz, collector=SecondOrderCollectorTopN(10)), collector=SecondOrderCollectorTopN(200))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(200, ((title:foo AND topn(10, title:bar OR title:baz))))"), "SecondOrderQuery(+title:foo +SecondOrderQuery(title:bar title:baz, collector=SecondOrderCollectorTopN(10)), collector=SecondOrderCollectorTopN(200))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(200, title:foo, date desc)"), "SecondOrderQuery(title:foo, collector=SecondOrderCollectorTopN(200, info=date desc))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(200, (title:foo), date desc)"), "SecondOrderQuery(title:foo, collector=SecondOrderCollectorTopN(200, info=date desc))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(200, \"foo bar\", \"date desc\")"), "SecondOrderQuery(all:\"foo bar\", collector=SecondOrderCollectorTopN(200, info=date desc))", SecondOrderQuery.class); // trendy() - what people read, it reads data from index assertU(addDocs("author", "muller", "reader", "bibcode1", "reader", "bibcode2")); assertU(addDocs("author", "muller", "reader", "bibcode2", "reader", "bibcode4")); assertU(addDocs("author", "muller", "reader", "bibcode5", "reader", "bibcode2")); assertU(commit()); assertQueryEquals(req("defType", "aqp", "q", "trending(author:muller)"), "(like:bibcode1 bibcode2 bibcode2 bibcode4 bibcode5 bibcode2)^2.0", BoostQuery.class); // pos() operator assertQueryEquals(req("defType", "aqp", "q", "pos(author:\"Accomazzi, A\", 1, 100)"), "spanPosRange(spanOr([author:accomazzi, a, SpanMultiTermQueryWrapper(author:accomazzi, a*), author:accomazzi,]), 0, 100)", SpanPositionRangeQuery.class); // notice the use of modifier '=' (if it is lowercased, it means _nosyn analyzer // was used) assertQueryEquals(req("defType", "aqp", "q", "pos(=author:\"Accomazzi, A\", 1)"), "spanPosRange(author:accomazzi, a, 0, 1)", SpanPositionRangeQuery.class); assertQueryEquals(req("defType", "aqp", "q", "pos(+author:\"Accomazzi, A\", 1, 1)"), "spanPosRange(spanOr([author:accomazzi, a, SpanMultiTermQueryWrapper(author:accomazzi, a*), author:accomazzi,]), 0, 1)", SpanPositionRangeQuery.class); assertQueryParseException(req("defType", "aqp", "q", "pos(author:\"Accomazzi, A\", 1, -1)")); assertQueryParseException(req("defType", "aqp", "q", "pos(author:\"Accomazzi, A\", 1, 1, 1)")); assertQueryParseException(req("defType", "aqp", "q", "pos(author:\"Accomazzi, A\")")); assertQueryParseException(req("defType", "aqp", "q", "^two$")); assertQueryParseException(req("defType", "aqp", "q", "two$")); assertQueryParseException(req("defType", "aqp", "q", "\"two phrase$\"")); // old positional search // TODO: check for the generated warnings assertQueryEquals(req("defType", "aqp", "q", "^two"), "spanPosRange(spanOr([author:two,, SpanMultiTermQueryWrapper(author:two,*)]), 0, 1)", SpanPositionRangeQuery.class); assertQueryEquals(req("defType", "aqp", "q", "one ^two, j k"), "+all:one +spanPosRange(spanOr([author:two, j k, SpanMultiTermQueryWrapper(author:two, j k*), SpanMultiTermQueryWrapper(author:/two, j[^\\s]+ k.*/), author:two, j, author:two,]), 0, 1)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "one \"^phrase, author\"", "qf", "title author"), "+(((author:one, author:one,*)) | title:one) +spanPosRange(spanOr([author:phrase, author, SpanMultiTermQueryWrapper(author:phrase, author *), author:phrase, a, SpanMultiTermQueryWrapper(author:phrase, a *), author:phrase,]), 0, 1)", BooleanQuery.class); // author expansion can generate regexes, so we should deal with them (actually we ignore them) assertQueryEquals(req("defType", "aqp", "q", "pos(author:\"Accomazzi, A. K. B.\", 1)"), "spanPosRange(spanOr([author:accomazzi, a k b, SpanMultiTermQueryWrapper(author:accomazzi, a k b*), SpanMultiTermQueryWrapper(author:/accomazzi, a[^\\s]+ k[^\\s]+ b.*/), author:accomazzi, a, author:accomazzi,]), 0, 1)", SpanPositionRangeQuery.class); //#322 - trailing comma assertQueryEquals(req("defType", "aqp", "q", "author:\"^roberts\", author:\"ables\""), "+spanPosRange(spanOr([author:roberts,, SpanMultiTermQueryWrapper(author:roberts,*)]), 0, 1) +(author:ables, author:ables,*)", BooleanQuery.class); /* TODO: i don't yet have the implementations for these assertQueryEquals("funcA(funcB(funcC(value, \"phrase value\", nestedFunc(0, 2))))", null, ""); assertQueryEquals("simbad(20 54 05.689 +37 01 17.38)", null, ""); assertQueryEquals("simbad(10:12:45.3-45:17:50)", null, ""); assertQueryEquals("simbad(15h17m-11d10m)", null, ""); assertQueryEquals("simbad(15h17+89d15)", null, ""); assertQueryEquals("simbad(275d11m15.6954s+17d59m59.876s)", null, ""); assertQueryEquals("simbad(12.34567h-17.87654d)", null, ""); assertQueryEquals("simbad(350.123456d-17.33333d <=> 350.123456-17.33333)", null, ""); */ //topn sorted - added 15Aug2013 assertQueryEquals(req("defType", "aqp", "q", "topn(5, *:*, date desc)"), "SecondOrderQuery(*:*, collector=SecondOrderCollectorTopN(5, info=date desc))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(5, author:civano, \"date desc\")"), "SecondOrderQuery(author:civano, author:civano,*, collector=SecondOrderCollectorTopN(5, info=date desc))", SecondOrderQuery.class); // topN - added Aug2013 assertQueryEquals(req("defType", "aqp", "q", "topn(5, *:*)"), "SecondOrderQuery(*:*, collector=SecondOrderCollectorTopN(5))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(5, (foo bar))"), "SecondOrderQuery(+all:foo +all:bar, collector=SecondOrderCollectorTopN(5))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(5, edismax(dog OR cat))", "qf", "title^1 abstract^0.5"), "SecondOrderQuery(((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat), collector=SecondOrderCollectorTopN(5))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "topn(5, author:accomazzi)"), "SecondOrderQuery(author:accomazzi, author:accomazzi,*, collector=SecondOrderCollectorTopN(5))", SecondOrderQuery.class); /* * It is different if Aqp handles the boolean operations or if * edismax() does it. * * Aqp has more control, see: https://issues.apache.org/jira/browse/SOLR-4141 */ assertQueryEquals(req("defType", "aqp", "q", "edismax(dog OR cat)", "qf", "title^1 abstract^0.5"), //edismax "((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "dog OR cat", "qf", "title^1 abstract^0.5"), //aqp "((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "edismax(dog AND cat)", "qf", "title^1 abstract^0.5"), //edismax "+((abstract:dog)^0.5 | title:dog) +((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "dog AND cat", "qf", "title^1 abstract^0.5"), //aqp "+((abstract:dog)^0.5 | title:dog) +((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "edismax(dog OR cat)", "qf", "title^1 abstract^0.5"), //edismax "((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "dog OR cat", "qf", "title^1 abstract^0.5"), //aqp "((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "edismax(dog cat)", "qf", "title^1 abstract^0.5"), //edismax "+((abstract:dog)^0.5 | title:dog) +((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "dog cat", "qf", "title^1 abstract^0.5", "q.op", "OR"), //aqp "((abstract:dog)^0.5 | title:dog) ((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "dog cat", "qf", "title^1 abstract^0.5"), //aqp "+((abstract:dog)^0.5 | title:dog) +((abstract:cat)^0.5 | title:cat)", BooleanQuery.class); // make sure the *:* query is not parsed by edismax assertQueryEquals(req("defType", "aqp", "q", "*", "qf", "author^2.3 title abstract^0.4"), "*:*", MatchAllDocsQuery.class); assertQueryEquals(req("defType", "aqp", "q", "*:*", "qf", "author^2.3 title abstract^0.4"), "*:*", MatchAllDocsQuery.class); /* * raw() function operator */ // TODO: #234 // need to add a processor which puts these local values into a request object // {!raw f=myfield}Foo Bar creates TermQuery(Term("myfield","Foo Bar")) // <astLOCAL_PARAMS value="{!f=myfield}" start="4" end="15" name="LOCAL_PARAMS" type="27" /> // assertQueryEquals(req("defType", "aqp", "f", "myfield", "q", "raw({!f=myfield}Foo Bar)"), "myfield:Foo Bar", TermQuery.class); // assertQueryEquals(req("defType", "aqp", "f", "myfield", "q", "raw({!f=x}\"Foo Bar\")"), "x:\"Foo Bar\"", TermQuery.class); assertQueryParseException(req("defType", "aqp", "f", "myfield", "q", "raw(Foo Bar)")); // if we use the solr analyzer to parse the query, all is configured to remove stopwords assertQueryEquals(req("defType", "aqp", "q", "edismax(dog OR cat) OR title:bat all:but"), "((all:dog) (all:cat)) title:bat", BooleanQuery.class); // but pub is normalized_string with a different analyzer and should retain 'but' assertQueryEquals(req("defType", "aqp", "q", "edismax(dog OR cat) OR title:bat OR pub:but"), "((all:dog) (all:cat)) title:bat pub:but", BooleanQuery.class); /** * new function queries, the 2nd order citation operators */ // references() assertQueryEquals(req("defType", "aqp", "q", "references(author:foo)"), "SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorCitesRAM(cache:citations-cache))", SecondOrderQuery.class); // various searches assertQueryEquals(req("defType", "aqp", "q", "all:x OR all:z references(author:foo OR title:body)"), "+(all:x all:z) +SecondOrderQuery((author:foo, author:foo,*) title:body, collector=SecondOrderCollectorCitesRAM(cache:citations-cache))", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "citations((title:(lectures physics) and author:Feynman))"), "SecondOrderQuery(+(+title:lectures +title:physics) +(author:feynman, author:feynman,*), collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); // citations() assertQueryEquals(req("defType", "aqp", "q", "citations(author:foo)"), "SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); // useful() - ads classic implementation assertQueryEquals(req("defType", "aqp", "q", "useful(author:foo)"), "SecondOrderQuery(SecondOrderQuery(SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5)), collector=SecondOrderCollectorTopN(200)), collector=SecondOrderCollectorCitesRAM(cache:citations-cache))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "all:(x OR z) useful(author:foo OR title:body)"), "+(all:x all:z) +SecondOrderQuery(SecondOrderQuery(SecondOrderQuery((author:foo, author:foo,*) title:body, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5)), collector=SecondOrderCollectorTopN(200)), collector=SecondOrderCollectorCitesRAM(cache:citations-cache))", BooleanQuery.class); // useful2() - original implementation assertQueryEquals(req("defType", "aqp", "q", "useful2(author:foo)"), "SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorOperatorExpertsCiting(cache=citations-cache, boost=float[] cite_read_boost))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "all:(x OR z) useful2(author:foo OR title:body)"), "+(all:x all:z) +SecondOrderQuery((author:foo, author:foo,*) title:body, collector=SecondOrderCollectorOperatorExpertsCiting(cache=citations-cache, boost=float[] cite_read_boost))", BooleanQuery.class); // reviews() - ADS classic impl assertQueryEquals(req("defType", "aqp", "q", "reviews(author:foo)"), "SecondOrderQuery(SecondOrderQuery(SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5)), collector=SecondOrderCollectorTopN(200)), collector=SecondOrderCollectorCitedBy(cache:citations-cache))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "all:(x OR z) reviews(author:foo OR title:body)"), "+(all:x all:z) +SecondOrderQuery(SecondOrderQuery(SecondOrderQuery((author:foo, author:foo,*) title:body, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5)), collector=SecondOrderCollectorTopN(200)), collector=SecondOrderCollectorCitedBy(cache:citations-cache))", BooleanQuery.class); // reviews2() - original impl assertQueryEquals(req("defType", "aqp", "q", "reviews2(author:foo)"), "SecondOrderQuery(author:foo, author:foo,*, collector=SecondOrderCollectorCitingTheMostCited(cache=citations-cache, boost=float[] cite_read_boost))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "all:(x OR z) reviews2(author:foo OR title:body)"), "+(all:x all:z) +SecondOrderQuery((author:foo, author:foo,*) title:body, collector=SecondOrderCollectorCitingTheMostCited(cache=citations-cache, boost=float[] cite_read_boost))", BooleanQuery.class); // classic_relevance() - cr() assertQueryEquals(req("defType", "aqp", "q", "classic_relevance(title:foo)"), "SecondOrderQuery(title:foo, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "cr(title:foo)"), "SecondOrderQuery(title:foo, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5))", SecondOrderQuery.class); assertQueryEquals(req("defType", "aqp", "q", "cr(title:foo, 0.4)"), "SecondOrderQuery(title:foo, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.4, adsPart=0.6))", SecondOrderQuery.class); } public void test() throws Exception { // search for all docs with a field assertQueryEquals(req("defType", "aqp", "q", "title:*"), "title:*", PrefixQuery.class); assertQueryEquals(req("defType", "aqp", "q", "title:?"), "title:?", WildcardQuery.class); // fun test of a crazy span query assertQueryEquals(req("defType", "aqp", "q", "(consult* or advis*) NEAR4 (fee or retainer or salary or bonus)"), "spanNear([spanOr([SpanMultiTermQueryWrapper(all:consult*), SpanMultiTermQueryWrapper(all:advis*)]), spanOr([all:fee, all:retainer, all:salary, all:bonus])], 4, true)", SpanNearQuery.class); assertQueryEquals(req("defType", "aqp", "q", "(consult* and advis*) NEAR4 (fee or retainer or salary or bonus)"), "spanNear([spanNear([SpanMultiTermQueryWrapper(all:consult*), SpanMultiTermQueryWrapper(all:advis*)], 4, true), spanOr([all:fee, all:retainer, all:salary, all:bonus])], 4, true)", SpanNearQuery.class); // #375 assertQueryEquals(req("defType", "aqp", "q", "author:\"Civano, F\" -author_facet_hier:(\"Civano, Fa\" OR \"Civano, Da\")"), "+(author:civano, f author:civano, f* author:civano,) -(author_facet_hier:Civano, Fa author_facet_hier:Civano, Da)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:\"Civano, F\" +author_facet_hier:(\"Civano, Fa\" OR \"Civano, Da\")"), "+(author:civano, f author:civano, f* author:civano,) +(author_facet_hier:Civano, Fa author_facet_hier:Civano, Da)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "title:xxx -title:(foo OR bar)"), "+title:xxx -(title:foo title:bar)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "title:xxx +title:(foo OR bar)"), "+title:xxx +(title:foo title:bar)", BooleanQuery.class); assertQueryEquals(req("defType", "aqp", "q", "title:xxx +title:(-foo OR bar)"), "+title:xxx +(-title:foo title:bar)", BooleanQuery.class); // TO FINISH, it will cause build failure // assertQueryEquals(req("defType", "aqp", "q", "title:xxx -title:(foo bar)"), // "+title:xxx -title:foo -title:bar", // BooleanQuery.class); // assertQueryEquals(req("defType", "aqp", "q", "title:xxx +title:(foo bar)"), // "+title:xxx +title:foo +title:bar", // BooleanQuery.class); // assertQueryEquals(req("defType", "aqp", "q", "title:xxx +title:(-foo bar)"), // "+title:xxx -title:foo +title:bar", // BooleanQuery.class); // regex assertQueryEquals(req("defType", "aqp", "q", "author:/^Kurtz,\\WM./"), "author:/^Kurtz,\\WM./", RegexpQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:/^kurtz,\\Wm./"), "author:/^kurtz,\\Wm./", RegexpQuery.class); // this is treated as regex, but because it is unfielded search // it ends up in the unfielded_search field. Feature or a bug? assertQueryEquals(req("defType", "aqp", "q", "/^Kurtz, M./"), "unfielded_search:/^Kurtz, M./", RegexpQuery.class); // regex ignores unfielded queries even when qf is set assertQueryEquals(req("defType", "aqp", "q", "/^Kurtz, M./", "qf", "title^0.5 author^0.8"), "unfielded_search:/^Kurtz, M./", RegexpQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:/kurtz, m.*/"), "author:/kurtz, m.*/", RegexpQuery.class); assertQueryEquals(req("defType", "aqp", "q", "author:(/kurtz, m.*/)"), "author:/kurtz, m.*/", RegexpQuery.class); assertQueryEquals(req("defType", "aqp", "q", "abstract:/nas\\S+/"), "abstract:/nas\\S+/", RegexpQuery.class); // NEAR queries are little bit too crazy and will need taming // and *more* unittest examples assertQueryEquals(req("defType", "aqp", "q", "author:(accomazzi NEAR5 kurtz)"), "spanNear([spanOr([author:accomazzi,, SpanMultiTermQueryWrapper(author:accomazzi,*)]), " + "spanOr([author:kurtz,, SpanMultiTermQueryWrapper(author:kurtz,*)])], 5, true)", SpanNearQuery.class); assertQueryEquals(req("defType", "aqp", "q", "\"NASA grant\"~3 NEAR N*"), "spanNear([spanNear([all:acr::nasa, all:grant], 3, true), SpanMultiTermQueryWrapper(all:n*)], 5, true)", SpanNearQuery.class); assertQueryEquals(req("defType", "aqp", "q", "\"NASA grant\"^0.9 NEAR N*"), "spanNear([(spanNear([all:acr::nasa, all:grant], 1, true))^0.9, SpanMultiTermQueryWrapper(all:n*)], 5, true)", SpanNearQuery.class); assertQueryEquals(req("defType", "aqp", "q", "\"NASA grant\"~3^0.9 NEAR N*"), "spanNear([(spanNear([all:acr::nasa, all:grant], 3, true))^0.9, SpanMultiTermQueryWrapper(all:n*)], 5, true)", SpanNearQuery.class); // identifiers assertQueryEquals(req("defType", "aqp", "q", "identifier:2011A&A...536A..89G"), "identifier:2011a&a...536a..89g", TermQuery.class); assertQueryEquals(req("defType", "aqp", "q", "identifier:2011A&A" + "\u2026" + "536A..89G"), "identifier:2011a&a...536a..89g", TermQuery.class); /* * translation of the fields (on the fly) */ // field_map is set to translate arxiv->identifier assertQueryEquals(req("defType", "aqp", "q", "arxiv:1002.1524"), "identifier:1002.1524", TermQuery.class); assertQueryParseException(req("defType", "aqp", "q", "arxivvvv:1002.1524")); } public static junit.framework.Test suite() { return new junit.framework.JUnit4TestAdapter(TestAqpAdsabsSolrSearch.class); } }