package org.adsabs; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import monty.solr.util.MontySolrQueryTestCase; import monty.solr.util.MontySolrSetup; import org.apache.lucene.search.TermRangeQuery; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.TrieIntField; import org.apache.solr.servlet.DirectSolrConnection; import org.junit.BeforeClass; /** * This test verifies all indexes are in place and the search against * them works. This is the main test for the whole ADS search. * * Exercesis both indexing and searching, as configured * for the ADS. The test does not need a working solr installation, * it is using both solr example config and the specific ads config. * * KEEP IT FREE FROM DEPENDENCIES!!! * **/ public class TestAdsAllFields extends MontySolrQueryTestCase { @BeforeClass public static void beforeClass() throws Exception { makeResourcesVisible(Thread.currentThread().getContextClassLoader(), new String[] { MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1", MontySolrSetup.getSolrHome() + "/example/solr/collection1" }); System.setProperty("solr.allow.unsafe.resourceloading", "true"); schemaString = MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1/schema.xml"; configString = MontySolrSetup.getMontySolrHome() + "/contrib/examples/adsabs/server/solr/collection1/solrconfig.xml"; initCore(configString, schemaString, MontySolrSetup.getSolrHome() + "/example/solr"); } public void test() throws Exception { DirectSolrConnection direct = getDirectServer(); EmbeddedSolrServer embedded = getEmbeddedServer(); // checking the schema IndexSchema schema = h.getCore().getLatestSchema(); SchemaField field = schema.getField("id"); assertTrue(field.indexed() == true && field.stored() == true && field.isRequired() == true && field.multiValued() == false); field = schema.getUniqueKeyField(); field.getName().equals("id"); field = schema.getField("bibcode"); assertTrue(field.indexed() == true && field.stored() == true && field.isRequired() == true && field.multiValued() == false); field.checkSortability(); field = schema.getField("recid"); assertTrue(field.indexed() == true && field.stored() == true && field.isRequired() == false && field.multiValued() == false); field.checkSortability(); assertTrue(field.getType().getClass().isAssignableFrom(TrieIntField.class)); // check field ID is copied to field RECID // List<CopyField> copyFields = schema.getCopyFieldsList("id"); // assertTrue(copyFields.size() == 1); // CopyField cField = copyFields.get(0); // cField.getSource().getName().equals("id"); // cField.getDestination().getName().equals(F.RECID); // field = cField.getDestination(); // check authors are correctly indexed/searched assertU(adoc("id", "0", "bibcode", "b1", "author", "Dall'oglio, Antonella")); assertU(adoc("id", "1", "bibcode", "b2", "author", "VAN DER KAMP, A; Von Accomazzi, Alberto, III, Dr.;Kao, P'ing-Tzu")); assertU(adoc("id", "2", "bibcode", "b3", "author", "'t Hooft, Furst Middle")); assertU(adoc("id", "3", "bibcode", "b4", "author", "O, Paul S.; Last, Furst Middle More")); assertU(adoc("id", "4", "bibcode", "b5", "author", "O, Paul S.", "author", "Last, Furst Middle More")); assertU(adoc("id", "5", "bibcode", "b6", "author", "van Tiggelen, Bart A., Jr.")); assertU(adoc("id", "6", "bibcode", "b7", "author", "Łuczak, Andrzej;John Doe Jr;Mac Low, Furst Middle;'t Hooft, Furst Middle")); assertU(adoc("id", "7", "bibcode", "b8", "author", "Łuczak, Andrzej", "author", "John Doe Jr", "author", "Mac Low, Furst Middle", "author", "'t Hooft, Furst Middle")); // this one JSON document shows our fields and their values (what is sent to /solr/update) String json = "{\"add\": {" + "\"doc\": {" + "\"id\": 100" + // not needed; it will be taken from 'id' //", \"recid\": 100" + ", \"bibcode\": \"2014JNuM..455...10B\"" + ", \"alternate_bibcode\": [\"2014JNuM..455...1a1\", \"2014JNuM..455...1a2\"]" + ", \"doi\": \"doi:ŽŠČŘĎŤŇ:123456789\"" + ", \"identifier\": [\"arxiv:1234.5678\", \"ARXIV:hep-ph/1234\"]" + /* * Bibstem is derived from bibcode, it is either the bibcode[4:9] OR * bibcode[4:13] when the volume information is NOT present * * So this bibcode: 2012yCat..35a09143M * has bibstem: yCat, yCat..35a * * But this bicode: 2012yCat..35009143M * has bibstem: yCat * * Bibstem is not case sensitive (at least for now, so the above values * are lowercased) * */ ", \"bibstem\": [\"JNuM\", \"JNuM..455\"]" + // order and length must be the same for author,aff, email // missing value must be indicated by '-' ", \"author\": [\"t' Hooft, van X\", \"Anders, John Michael\", \"Einstein, A\"]" + // in the future, this can contain normalized author names ", \"author_norm\": [\"t' Hooft, van X\", \"Anders, John Michael\", \"Einstein, A\"]" + ", \"aff\": [\"-\", \"NASA Kavli space center, Cambridge, MA 02138, USA\", \"Einstein institute, Zurych, Switzerland\"]" + ", \"email\": [\"-\", \"anders@email.com\", \"-\"]" + // author_facet_hier must be generated (solr doesn't modify it) ", \"author_facet_hier\": [\"0/T Hooft, V\", \"1/T Hooft, V/T Hooft, Van X\", \"0/Anders, J M\", \"1/Anders, J M/Anders, John Michael\", \"0/Einstein, A\"]" + ", \"author_count\": 5" + // must be: "yyyy-MM-dd (metadata often is just: yyyy-MM|yyyy) ", \"pubdate\": \"2013-08-05\"" + ", \"year\": \"2013\"" + // it is solr format for the pubdate, must be in the right format // we need to add 30 minutes to every day; this allows us to search // for ranges effectively; thus: // 2013-08-5 -> 2013-08-05T00:30:00Z // 2013-08 -> 2013-08-01T00:30:00Z // 2013 -> 2013-01-01T00:30:00Z ", \"date\": \"2013-08-05T00:30:00Z\"" + // Field that contains both grant ids and grant agencies. ", \"grant\": [\"NASA\", \"123456-78\", \"NSF-AST\", \"0618398\"]" + // grant_agency/grant_id ", \"grant_facet_hier\": [\"0/NASA\", \"1/NASA/123456-78\"]" + ", \"read_count\": 50" + ", \"cite_read_boost\": 0.52" + ", \"classic_factor\": 5002" + ", \"citation_count\": 10" + ", \"simbid\": [5, 3000001]" + ", \"reader\": [\"abaesrwersdlfkjsd\", \"asfasdflkjsdfsldj\"]" + ", \"citation\": [\"2014JNuM..455...10C\", \"2014JNuM..455...10D\"]" + ", \"reference\": [\"2014JNuM..455...10R\", \"2014JNuM..455...10T\"]" + // we actually index only the first token '2056' ", \"page\": [\"2056-2078\", \"55\"]" + ", \"eid\": \"00001\"" + ", \"volume\": \"l24\"" + ", \"issue\": \"24i\"" + // this list should contain normalized values ", \"property\": [\"Catalog\", \"Nonarticle\"]" + ", \"bibgroup\": [\"Cfa\"]" + ", \"bibgroup_facet\": [\"Cfa\"]" + ", \"database\": [\"ASTRONOMY\", \"PHYSICS\"]" + ", \"comment\": [\"comment1 commentFoo\", \"comment2\"]" + ", \"pubnote\": [\"pubnote1 pubnoteFoo\", \"pubnote2\"]" + ", \"caption\": [\"caption1 captionFoo\", \"caption2\"]" + ", \"body\": \"Some fulltext hashimoto\"" + ", \"title\": \"This is of the title\"" + ", \"alternate_title\": \"This is of the alternate\"" + ", \"abstract\": \"all no-sky survey q'i quotient\"" + ", \"keyword\": [\"Classical statistical mechanics\", \"foo bar\"]" + ", \"keyword_norm\": [\"angular momentum\", \"89.20.Hh\"]" + ", \"keyword_schema\": [\"ADS\", \"PACS Codes\"]" + ", \"keyword_facet\": [\"angular momentum kw\"]" + // ["{whatever: here there MAST}", // {"foo": ["bar", "baz"], "one": {"two": "three"}} ", \"links_data\": [\"{whatever: here there MAST}\"," + "\"{\\\"foo\\\": [\\\"bar\\\", \\\"baz\\\"], \\\"one\\\": {\\\"two\\\": \\\"three\\\"}}\"]" + ", \"ids_data\": [\"{whatever: here there MAST}\"]" + ", \"simbid\": [9000000, 1]" + ", \"simbtype\": [\"Galaxy\", \"HII Region\"]" + ", \"orcid_pub\": [\"1111-2222-3333-4444\", \"-\", \"0000-0002-4110-3511\"]" + ", \"orcid_user\": [\"-\", \"-\", \"0000-0002-4110-3511\"]" + ", \"orcid_other\": [\"1111-2222-3333-4444\", \"1111-2222-3333-5555\", \"-\"]" + ", \"simbad_object_facet_hier\": [\"0/HII Region\", \"1/HII Region/9000000\"]" + ", \"doctype\": \"article\"" + ", \"doctype_facet_hier\": [\"0/Article\", \"1/Article/Book chapter\"]" + ", \"update_timestamp\": \"2010-03-04T22:01:32.809Z\"" + "}" + "}}"; updateJ(json, null); assertU(adoc("id", "101", "bibcode", "2014JNuM..455...10C", "title", "citation 1", "read_count", "0", "cite_read_boost", "0.4649", "classic_factor", "5000", "citation", "2014JNuM..455...10B", "reader", "0xeeeeeeee", "reader", "1xeeeeeeee", "reader", "2xeeeeeeee")); assertU(adoc("id", "102", "bibcode", "2014JNuM..455...10D", "title", "citation 2", "read_count", "1", "cite_read_boost", "0.373", "classic_factor", "1500", "citation", "2014JNuM..455...10B")); assertU(adoc("id", "103", "bibcode", "2014JNuM..455...10R", "title", "reference 1", "read_count", "19", "cite_read_boost", "0.2416", "classic_factor", "0", "reader", "4xeeeeeeee", "reader", "1xeeeeeeee")); assertU(adoc("id", "104", "bibcode", "2014JNuM..455...10T", "title", "reference 2", "read_count", "15", "cite_read_boost", "0.4104")); assertU(commit()); assertU(adoc("id", "20", "bibcode", "b20", "title", "datetest", "pubdate", "1976-01-01", "date", "1976-01-01T00:30:00Z")); assertU(adoc("id", "21", "bibcode", "b21", "title", "datetest", "pubdate", "1976-01-02", "date", "1976-01-02T00:30:00Z")); assertU(adoc("id", "22", "bibcode", "b22", "title", "datetest", "pubdate", "1976-02-01", "date", "1976-02-01T00:30:00Z")); assertU(adoc("id", "23", "bibcode", "b23", "title", "datetest", "pubdate", "1976-01-02", "date", "1976-01-02T00:30:00Z")); assertU(adoc("id", "24", "bibcode", "b24", "title", "datetest", "pubdate", "1976-30-12", "date", "1976-12-30T00:30:00Z")); // year 76 had only 30 days in Dec assertU(adoc("id", "25", "bibcode", "b25", "title", "datetest", "pubdate", "1977-01-01", "date", "1977-01-01T00:30:00Z")); assertU(adoc("id", "50", "bibcode", "b50", "author", "Bond, E J")); assertU(adoc("id", "51", "bibcode", "b51", "author", "Bond, Edwin James")); assertU(adoc("id", "52", "bibcode", "b52", "author", "Bond, E James")); assertU(adoc("id", "53", "bibcode", "b53", "author", "Bond, Edwin J")); assertU(adoc("id", "54", "bibcode", "b54", "author", "Bond, EJames")); assertU(adoc("id", "55", "bibcode", "b55", "author", "Bond, E")); assertU(adoc("id", "56", "bibcode", "b56", "author", "Bond, J")); assertU(adoc("id", "57", "bibcode", "b57", "author", "Bond,")); assertU(commit("waitSearcher", "true")); assertQ(req("q", "*:*"), "//*[@numFound>='19']" ); assertQ(req("q", "id:100"), "//*[@numFound='1']" ); /* * id - str type, the unique id key, we do no processing */ assertQ(req("q", "id:100"), "//*[@numFound='1']"); assertQ(req("q", "id:0100"), "//*[@numFound='0']"); /* * recid - recid is a int field */ assertQ(req("q", "recid:100"), "//*[@numFound='1']"); assertQ(req("q", "recid:0100"), "//*[@numFound='1']"); assertQ(req("q", "recid:[99 TO 100]"), "//*[@numFound='1']"); assertQ(req("q", "recid:[99 TO *]"), "//*[@numFound>='1']"); /* * bibcodes */ assertQ(req("q", "bibcode:2014JNuM..455...10B"), "//*[@numFound='1']"); assertQ(req("q", "bibcode:2014Jnum..455...10b"), "//*[@numFound='1']"); assertQ(req("q", "bibcode:2014JNuM..*"), "//*[@numFound='5']"); assertQ(req("q", "bibcode:2014JnUm..*"), "//*[@numFound='5']"); assertQ(req("q", "bibcode:2014JNu?..455...10B"), "//*[@numFound='1']"); /* * alternate_bibcode */ assertQ(req("q", "alternate_bibcode:2014JNuM..455...1a2"), "//*[@numFound='1']"); assertQ(req("q", "identifier:2014JNuM..455...1a2"), "//*[@numFound='1']"); /* * bibstem */ assertQ(req("q", "bibstem:JNUM"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum..455"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum..45*"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum..45?"), "//*[@numFound='1']"); //XXX: this has changed, the last dot gets removed when we try to guess regex query // need a better solution for this ambiguity yCat..* becomes 'yCat.*' assertQ(req("q", "bibstem:jnum..*"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum.*"), "//*[@numFound='1']"); assertQ(req("q", "bibstem:jnum*"), "//*[@numFound='1']"); /* * caption */ assertQ(req("q", "caption:caption1"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "caption:captionfoo"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * comment */ assertQ(req("q", "comment:comment1"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "comment:commentfoo"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * doi: * * According to the standard, doi can contain almost any utf-8 * char */ assertQ(req("q", "doi:\"doi:ŽŠČŘĎŤŇ:123456789\""), "//*[@numFound='1']"); assertQ(req("q", "doi:ŽŠČŘĎŤŇ\\:123456789"), "//*[@numFound='1']"); assertQ(req("q", "doi:\"doi:žščřďťň:123456789\""), "//*[@numFound='1']"); //assertQ(req("q", "doi:\"doi:žščŘĎŤŇ?123456789\""), "//*[@numFound='1']"); assertQ(req("q", "doi:\"doi:žščŘĎŤŇ\\?123456789\""), "//*[@numFound='0']"); /* * author * * here we really test only the import mechanism, the order of authors * and duplication. The parsing logic has its own unittest */ assertQ(req("q", "author:\"Einstein, A\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "author:\"Einstein, A\" AND author:\"Anders\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assert h.query(req("q", "author:\"Einstein, A\"")) .contains("<arr name=\"author_norm\">" + "<str>t' Hooft, van X</str>" + "<str>Anders, John Michael</str>" + "<str>Einstein, A</str></arr>"); /* * author_count */ assertQ(req("q", "author_count:5"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "author_count:[0 TO 6]"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * pos() testing on the author search */ assertQ(req("q", "pos(author:\"Anders, John Michael\", 2)"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "pos(author:\"Anders, John Michael\", 1, 2)"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "pos(author:\"Einstein, A\", 1, 2)"), "//*[@numFound='0']" ); /* * author facets */ assertQ(req("q", "author_facet_hier:\"0/Anders, J M\""), "//*[@numFound='1']"); assertQ(req("q", "author_facet_hier:\"1/Anders, J M/Anders, John Michael\""), "//*[@numFound='1']"); assertQ(req("q", "author_facet_hier:\"1/Einstein, A\""), "//*[@numFound='0']"); /* * aff - must be the same order as authors */ assertQ(req("q", "aff:NASA"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "aff:NASA AND author:\"Anders\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "aff:SPACE"), "//*[@numFound='0']"); // be case sensitive with uppercased query terms assertQ(req("q", "aff:KAVLI"), "//*[@numFound='0']"); // same here assertQ(req("q", "aff:kavli"), // otherwise case-insensitive "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "aff:Kavli"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "aff:\"kavli space\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); //the order/gaps need to be preserved assert h.query(req("q", "recid:100")) .contains("<arr name=\"aff\">" + "<str>-</str>" + "<str>NASA Kavli space center, Cambridge, MA 02138, USA</str>" + "<str>Einstein institute, Zurych, Switzerland</str></arr>" ); assertQ(req("q", "pos(aff:kavli, 2) AND recid:100"), "//*[@numFound='1']" ); assertQ(req("q", "=aff:\"acr::nasa\" AND recid:100"), "//*[@numFound='1']" ); /* * email */ assertQ(req("q", "email:anders@email.com"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "pos(email:anders@email.com, 2)"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "pos(email:anders@email.com, 1)"), "//*[@numFound='0']" ); assertQ(req("q", "email:anders@*"), "//*[@numFound='1']"); // one has to use pos() to combine author and email assertQ(req("q", "email:anders@email.com AND author:\"Einstein, A\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "pos(email:anders@email.com, 2) AND pos(author:\"Anders\", 2)"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); // order/gaps are important assert h.query(req("q", "recid:100")) .contains("<arr name=\"email\">" + "<str>-</str>" + "<str>anders@email.com</str>" + "<str>-</str></arr>" ); /* * orcid, added 30/12/14; they must correspond to the author array * - updated 13/11/15 - orcid field is now a virtual one; and we have * orcid_pub,_user,_other */ assertQ(req("q", "orcid_pub:1111-2222-3333-4444"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "orcid_pub:1111*"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assert h.query(req("q", "recid:100")) .contains("<arr name=\"orcid_pub\">" + "<str>1111-2222-3333-4444</str>" + "<str>-</str>" + "<str>0000-0002-4110-3511</str></arr>" ); // this is only present in orcid_other assertQ(req("q", "orcid:1111-2222-3333-5555"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "orcid_other:1111-2222-3333-5555"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * page */ assertQ(req("q", "page:2056"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "page:2056-xxxxx"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "page:2056 AND page:55"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * eid */ assertQ(req("q", "eid:00001"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * volume */ assertQ(req("q", "volume:l24"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "volume:24"), "//*[@numFound='0']"); /* * issue */ assertQ(req("q", "issue:24i"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * database & bibgroup */ assertQ(req("q", "database:astronomy"), "//*[@numFound='1']"); assertQ(req("q", "database:ASTRONOMY"), "//*[@numFound='1']"); assertQ(req("q", "database:ASTRONOM*"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup:cfa"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup:CFA"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup:cf*"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup:CF*"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup:?FA"), "//*[@numFound='1']"); // facets are case sensitive and you must get the exact wording // TODO: shall we be consistent and turn *everything* to lowercase? assertQ(req("q", "bibgroup_facet:Cfa"), "//*[@numFound='1']"); assertQ(req("q", "bibgroup_facet:cfa"), "//*[@numFound='0']"); /* * property */ assertQ(req("q", "property:catalog AND property:nonarticle"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "property:CATALOG AND property:NONARTICLE"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * keywords */ assertQ(req("q", "keyword:\"classical statistical mechanics\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "keyword:\"foo bar\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "keyword:\"Classical Statistical Mechanics\""), // should be case-insensitive "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "keyword_norm:\"89.20.Hh\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "keyword_norm:\"89.20.Hh\" AND keyword_schema:\"PACS Codes\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "keyword_norm:classical"), "//*[@numFound='0']"); // should not contain keywords assertQ(req("q", "keyword:89.20.Hh"), "//*[@numFound='0']"); // should not contain keywords_norm /* * keyword_facet (in marc used to be 695__b) */ assertQ(req("q", "keyword_facet:\"angular momentum kw\""), "//*[@numFound='1']"); assertQ(req("q", "keyword_facet:\"angular momentum\""), "//*[@numFound='0']"); assertQ(req("q", "keyword_facet:angular"), "//*[@numFound='0']"); /* * identifier * * should be translated into the correct field (currently, the grammar * understands only arxiv: and doi: (and doi gets handled separately) * */ assertQ(req("q", "arxiv:1234.5678"), "//*[@numFound='1']"); assertQ(req("q", "arxiv:\"arXiv:1234.5678\""), "//*[@numFound='1']"); assertQ(req("q", "arXiv:1234.5678"), "//*[@numFound='1']"); assertQ(req("q", "identifier:1234.5678"), "//*[@numFound='1']"); assertQ(req("q", "arXiv:hep-ph/1234"), "//*[@numFound='1']"); assertQ(req("q", "arxiv:\"ARXIV:hep-ph/1234\""), "//*[@numFound='1']"); assertQ(req("q", "arxiv:hep-ph/1234"), "//*[@numFound='1']"); assertQ(req("q", "identifier:hep-ph/1234"), "//*[@numFound='1']"); assertQ(req("q", "identifier:2014JNuM..455...10B"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /* * grants * */ assertQ(req("q", "grant:\"NSF-AST 0618398\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "grant:(NSF-AST 0618398)"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "grant:0618398"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "grant:NSF-AST"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * grant_facet_hier */ assertQ(req("q", "grant_facet_hier:\"0/NASA\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "grant_facet_hier:1/NASA/123456-78"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "grant_facet_hier:NASA"), "//*[@numFound='0']" ); /* * title * * just basics here, the parsing tests are inside TestAdstypeFulltextParsing * */ assertQ(req("q", "title:\"this title\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "title:\"this is of the title\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * alternate_title * * should be copied into main title field */ assertQ(req("q", "alternate_title:\"this alternate\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "alternate_title:\"this is of the alternate\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "title:\"this alternate\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * abstract */ assertQ(req("q", "abstract:no-sky"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "abstract:nosky"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); // tokens with special characters inside must be searched as a phrase, otherwise it // becomes: abstract:q'i abstract:q abstract:i abstract:qi // but even as a phrase, it will search for: "q (i qi)" assertQ(req("q", "abstract:\"q\\'i\"", "fl", "recid,abstract,title"), "//*[@numFound='1']"); assertQ(req("q", "abstract:\"q'i\"", "fl", "recid,abstract,title"), "//*[@numFound='1']"); assertQ(req("q", "abstract:\"q\\\\'i\"", "fl", "recid,abstract,title"), "//*[@numFound='1']"); /* * pubnote */ assertQ(req("q", "pubnote:pubnotefoo"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "pubnote:pubnote2"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * reference */ assertQ(req("q", "reference:2014JNuM..455...10R"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * unfielded search * * test we get records without specifying the field (depends on the current * solrconfig.xml setup) * * author^2 title^1.4 abstract^1.3 keyword^1.4 keyword_norm^1.4 all full^0.1 */ String qf = "author^2 title^1.4 abstract^1.3 keyword^1.4 keyword_norm^1.4 all full^0.1"; // author assertQ(req("q", "einstein", "qf", qf), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); // title assertQ(req("q", "title", "qf", qf), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); // abstract assertQ(req("q", "\"q'i\"", "qf", qf), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * body */ assertQ(req("q", "body:hashimoto"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * citations()/references() queries (use special dummy records) */ // XXX:rca - to activate after fixing citation search /*assertQ(req("q", "recid:[101 TO 104]"), "//*[@numFound='4']"); assertQ(req("q", "citations(recid:100)"), "//*[@numFound='2']", "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']" ); assertQ(req("q", "references(recid:100)"), "//*[@numFound='2']", "//doc/int[@name='recid'][.='103']", "//doc/int[@name='recid'][.='104']" );*/ /* * read_count (int type) */ assertQ(req("q", "read_count:[0 TO 19]", "fl", "recid,bibcode,title,read_count"), "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']", "//doc/int[@name='recid'][.='103']", "//doc/int[@name='recid'][.='104']", "//*[@numFound='4']"); assertQ(req("q", "read_count:19"), "//doc/int[@name='recid'][.='103']", "//*[@numFound='1']"); assertQ(req("q", "read_count:15.0"), "//doc/int[@name='recid'][.='104']", "//*[@numFound='1']"); assertQ(req("q", "read_count:1.0"), "//doc/int[@name='recid'][.='102']", "//*[@numFound='1']"); assertQ(req("q", "read_count:0.0"), "//doc/int[@name='recid'][.='101']", "//*[@numFound='1']"); assertQ(req("q", "read_count:[0.0 TO *]"), "//doc/int[@name='recid'][.='101']", "//*[@numFound>='4']"); /* * cite_read_boost */ //dumpDoc(null, "recid", "read_count", "cite_read_boost"); assertQ(req("q", "cite_read_boost:[0.0 TO 1.0]"), "//doc/int[@name='recid'][.='100']", "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']", "//doc/int[@name='recid'][.='103']", "//doc/int[@name='recid'][.='104']", "//*[@numFound='5']"); assertQ(req("q", "cite_read_boost:0.4649"), "//doc/int[@name='recid'][.='101']", "//*[@numFound='1']"); assertQ(req("q", "cite_read_boost:0.373"), "//doc/int[@name='recid'][.='102']", "//*[@numFound='1']"); assertQ(req("q", "cite_read_boost:0.2416"), "//doc/int[@name='recid'][.='103']", "//*[@numFound='1']"); assertQ(req("q", "cite_read_boost:0.4104"), "//doc/int[@name='recid'][.='104']", "//*[@numFound='1']"); assertQ(req("q", "cite_read_boost:[0.1 TO 0.373]"), "//doc/int[@name='recid'][.='102']", "//doc/int[@name='recid'][.='103']", "//*[@numFound='2']"); assertQ(req("q", "cite_read_boost:[0.4103 TO 0.410399999999]"), "//doc/int[@name='recid'][.='104']", "//*[@numFound='1']"); assertQ(req("q", "cite_read_boost:[0.41039999 TO 0.4648999999]"), "//doc/int[@name='recid'][.='104']", "//doc/int[@name='recid'][.='101']", "//*[@numFound='2']"); assertQ(req("q", "cite_read_boost:[0.0 TO *]"), "//*[@numFound>='5']"); /* * classic_factor */ assertQ(req("q", "classic_factor:5000"), "//doc/int[@name='recid'][.='101']", "//*[@numFound='1']" ); assertQ(req("q", "classic_factor:1500"), "//doc/int[@name='recid'][.='102']", "//*[@numFound='1']" ); assertQ(req("q", "classic_factor:0"), "//doc/int[@name='recid'][.='103']", "//*[@numFound='1']" ); assertQ(req("q", "classic_factor:[0 TO 5001]", "indent", "true"), "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']", "//doc/int[@name='recid'][.='103']", "//*[@numFound='3']" ); assertQ(req("q", "classic_factor:[0 TO *]", "indent", "true"), "//*[@numFound>='3']" ); /* * simbid - simbad_object_ids */ //dumpDoc(null, "bibcode", "simbid"); assertQ(req("q", "simbid:5 AND simbid:3000001"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "simbid:[0 TO 9000001]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "simbid:[0 TO *]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * simbtype - simbad object types, added 30/12/14 */ assertQ(req("q", "simbtype:HII"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "simbtype:hii"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "simbtype:\"HiI Region\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * simbad_object_facet_hier, added 30/12/14 */ assertQ(req("q", "simbad_object_facet_hier:\"0/HII Region\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "simbad_object_facet_hier:\"1/HII Region/9000000\""), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * citations - added 10/12/13 */ assertQ(req("q", "citation:2014JNuM..455...10C"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * citation_count */ assertQ(req("q", "citation_count:[0 TO 10]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); assertQ(req("q", "citation_count:[0 TO *]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * indexstamp */ assertQ(req("q", "indexstamp:[\"2012-10-01T00:00:00\" TO *]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound>='20']" ); /* * date */ assertQ(req("q", "date:[\"2012-10-01T00:00:00\" TO *]"), "//doc/int[@name='recid'][.='100']", "//*[@numFound>='1']" ); /* * reference */ assertQ(req("q", "reference:2014JNuM..455...10R"), "//doc/int[@name='recid'][.='100']", "//*[@numFound='1']" ); /* * pubdate - 17/12/2012 changed to be the date type * * we have records with these dates: * 20: 1976-01-01 * 21: 1976-01-02 * 22: 1976-02-01 * 23: 1976-01-02 * 24: 1976-31-12 * 25: 1977-01-01 * * for more complete tests, look at: TestAdsabsTypeDateParsing */ assertQ(req("q", "title:datetest"), "//*[@numFound='6']"); assertQ(req("q", "pubdate:[1976 TO 1977]"), "//*[@numFound='6']"); assertQ(req("q", "pubdate:1976"), "//*[@numFound='5']", "//doc/int[@name='recid'][.='20']", "//doc/int[@name='recid'][.='21']", "//doc/int[@name='recid'][.='22']", "//doc/int[@name='recid'][.='23']", "//doc/int[@name='recid'][.='24']" ); /* * update_timestamp */ assertQ(req("q", "update_timestamp:[\"2010-01-04T22:01:32\" TO \"2010-08-04T22:01:32\"]"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); assertQ(req("q", "update_timestamp:[\"2010-03-04T22:01:32.109Z\" TO \"2010-03-04T22:01:33.099Z\"]"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']" ); /* * year */ assertQ(req("q", "year:2013"), "//doc[1]/int[@name='recid'][.='100']" ); assertQ(req("q", "year:[2011 TO 2014]"), "//doc[1]/int[@name='recid'][.='100']" ); assertQ(req("q", "year:1995-2013"), "//doc[1]/int[@name='recid'][.='100']" ); assertQueryEquals(req("q", "year:1995-1996", "debugQuery", "true"), "year:[1995 TO 1996]", TermRangeQuery.class); /* * links_data (generated and stored as JSON for display purposes) * ids_data (generated and stored as JSON for display purposes) */ assertQ(req("q", "id:100"), "//doc/arr[@name='links_data']/str[contains(text(),'MAST')]", "//doc/arr[@name='links_data']/str[contains(text(),'{\"foo\": [\"bar\", \"baz\"], \"one\": {\"two\": \"three\"}}')]" ); /* * 2nd order queries */ // references/citations() - see TestSolrCitationQuery // what other papers we cite assertQ(req("q", "references(*:*)"), "//*[@numFound='3']"); assertQ(req("q", "references(id:100)"), "//*[@numFound='2']", "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']"); // make sure the citation search optimization (short clause first) // works well assertQ(req("q", "citations(*:*) id:100"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "references(id:100) *:*"), "//*[@numFound='2']", "//doc/int[@name='recid'][.='101']", "//doc/int[@name='recid'][.='102']"); // who cites us assertQ(req("q", "citations(*:*)"), "//*[@numFound='3']"); assertQ(req("q", "citations(id:101)"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); // just check they are working assertQ(req("q", "useful(*:*)"), "//*[@numFound='3']"); assertQ(req("q", "reviews(*:*)"), "//*[@numFound='3']"); // cut only the first n results assertQ(req("q", "topn(2, reviews(*:*))"), "//*[@numFound='2']"); //dumpDoc(null, "id", "recid", "title"); assertQ(req("q", "topn(5, recid:[1 TO 10], id asc)"), "//*[@numFound='5']", "//doc[1]/int[@name='recid'][.='1']", "//doc[2]/int[@name='recid'][.='2']", "//doc[3]/int[@name='recid'][.='3']", "//doc[4]/int[@name='recid'][.='4']"); // TODO: I am too tired now to find out why the sorting is weird // but found it must be! //assertQ(req("q", "topn(5, recid:[1 TO 10], \"recid desc\")", "fl", "recid"), // "//*[@numFound='5']", // "//doc[1]/int[@name='recid'][.='7']", // "//doc[2]/int[@name='recid'][.='6']", // "//doc[3]/int[@name='recid'][.='5']", // "//doc[4]/int[@name='recid'][.='4']"); // trending() - what people read assertQ(req("q", "trending(*:*)"), "//*[@numFound>='2']", "//doc[1]/int[@name='recid'][.='101']", "//doc[2]/int[@name='recid'][.='103']" ); // test we can search for all docs that have certain field assertQ(req("q", "reference:*"), "//doc[1]/int[@name='recid'][.='100']" ); assertQ(req("q", "id:?"), // but works only for text fields "//*[@numFound='8']" ); /** * doctype */ assertQ(req("q", "doctype:article"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /** * doctype_facet_hier */ assertQ(req("q", "doctype_facet_hier:\"0/Article\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); assertQ(req("q", "doctype_facet_hier:\"1/Article/Book chapter\""), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']"); /** * Author search must give the same results if we use * pos() or ^author */ assertQ(req("q", "author:bond"), "//*[@numFound='8']" ); assertQ(req("q", "author:\"^bond\""), "//*[@numFound='8']" ); assertQ(req("q", "author:\"bond, edwin james\""), "//*[@numFound='6']", "//doc/int[@name='recid'][.='50']", "//doc/int[@name='recid'][.='51']", "//doc/int[@name='recid'][.='52']", "//doc/int[@name='recid'][.='53']", "//doc/int[@name='recid'][.='55']", "//doc/int[@name='recid'][.='57']" ); assertQ(req("q", "author:\"^bond, edwin james\""), "//*[@numFound='6']", "//doc/int[@name='recid'][.='50']", "//doc/int[@name='recid'][.='51']", "//doc/int[@name='recid'][.='52']", "//doc/int[@name='recid'][.='53']", "//doc/int[@name='recid'][.='55']", "//doc/int[@name='recid'][.='57']" ); assertQ(req("q", "pos(author:\"bond, edwin james\", 1, 2)"), "//*[@numFound='6']", "//doc/int[@name='recid'][.='50']", "//doc/int[@name='recid'][.='51']", "//doc/int[@name='recid'][.='52']", "//doc/int[@name='recid'][.='53']", "//doc/int[@name='recid'][.='55']", "//doc/int[@name='recid'][.='57']" ); } }