package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.lucene.document.Document; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.SecondOrderCollector.FinalValueType; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.CitationLRUCache; import org.apache.solr.search.QParser; import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; import org.apache.solr.uninverting.UninvertingReader; import org.junit.BeforeClass; import monty.solr.util.MontySolrAbstractTestCase; import monty.solr.util.MontySolrSetup; @SuppressCodecs("SimpleText") @SuppressWarnings({"rawtypes", "unchecked"}) public class TestSecondOrderQueryTypesAds extends MontySolrAbstractTestCase { @BeforeClass public static void beforeClass() throws Exception { System.setProperty("solr.allow.unsafe.resourceloading", "true"); schemaString = MontySolrSetup.getMontySolrHome() + "/contrib/adsabs/src/test-files/solr/collection1/conf/" + "schema-citations-transformer.xml"; configString = MontySolrSetup.getMontySolrHome() + "/contrib/adsabs/src/test-files/solr/collection1/conf/" + "citation-cache-solrconfig.xml"; initCore(configString, schemaString, MontySolrSetup.getSolrHome() + "/example/solr"); } private SolrQueryRequest tempReq; @Override public void setUp() throws Exception { super.setUp(); addDocs(); tempReq = req("test"); } @Override public void tearDown() throws Exception { if (tempReq != null) { tempReq.close(); } tempReq = null; super.tearDown(); } private void addDocs() throws IOException { assertU(adoc("id", "0", "bibcode", "b0", "boost_const", "1.0f", "boost_2", "0.5f", "boost_1", "0.1f", "date", "1966-01-01T00:00:00Z")); assertU(adoc("id", "1", "bibcode", "b1", "boost_const", "1.0f", "boost_2", "0.5f", "reference", "b2", "reference", "b3", "reference", "b4", "reference", "b5", "boost_1", "0.1f", "date", "1966-01-02T00:00:00Z")); assertU(adoc("id", "2", "bibcode", "b2", "boost_const", "1.0f", "boost_2", "0.2f", "boost_1", "0.1f", "date", "1966-01-03T00:00:00Z")); assertU(adoc("id", "3", "bibcode", "b3", "boost_const", "1.0f", "boost_2", "0.3f", "boost_1", "0.9f", "date", "1966-01-03T01:00:00Z")); assertU(adoc("id", "4", "bibcode", "b4", "boost_const", "1.0f", "boost_2", "0.1f", "boost_1", "0.1f", "reference", "b100", "date", "1966-01-03T01:01:00Z")); assertU(adoc("id", "5", "bibcode", "b5", "boost_const", "1.0f", "boost_2", "0.8f", "boost_1", "0.0f", "date", "1966-01-03T01:01:01Z")); assertU(commit()); assertU(adoc("id", "6", "bibcode", "b6", "boost_const", "1.0f", "boost_2", "0.1f", "boost_1", "0.5f", "reference", "b5")); assertU(adoc("id", "7", "bibcode", "b7", "boost_const", "1.0f", "boost_2", "0.1f", "boost_1", "0.9f")); assertU(adoc("id", "8", "bibcode", "b8", "boost_const", "1.0f", "boost_2", "0.1f", "boost_1", "0.9f", "reference", "b9")); assertU(adoc("id", "9", "bibcode", "b9", "boost_const", "1.0f", "boost_2", "0.1f", "reference", "b10", "boost_1", "0.9f")); assertU(adoc("id", "10", "bibcode", "b10", "boost_const", "1.0f", "boost_2", "0.5f", "boost_1", "0.1f", "reference", "b9")); assertU(adoc("id", "11", "bibcode", "b11", "boost_const", "1.0f", "boost_2", "0.5f", "boost_1", "0.1f", "reference", "b9", "reference", "b6")); assertU(commit()); // close the writer, create a new segment assertU(adoc("xkw", "x", "xka", "b", "id", "16", "bibcode", "b16", "reference", "b17", "reference", "b18", "reference", "b20", "boost_1", "0.9f")); // links: 1 assertU(adoc("xkw", "x", "xka", "a", "id", "17", "bibcode", "b17", "reference", "b16", "reference", "b18", "reference", "b20", "boost_1", "0.7f")); // links: 2 assertU(adoc("xkw", "x", "xka", "b", "id", "18", "bibcode", "b18", "reference", "b20", "boost_1", "0.5f")); // links: 3 assertU(adoc("xkw", "x", "xka", "b", "id", "19", "bibcode", "b19", "reference", "b17", "reference", "b18", "reference", "b20", "boost_1", "0.3f")); // links: 0 assertU(adoc("xkw", "x", "xka", "b", "id", "20", "bibcode", "b20", "reference", "b20", "boost_1", "0.1f")); // links: 5 assertU(commit()); } public void testADSOperators() throws Exception { final CitationLRUCache cache = (CitationLRUCache) tempReq.getSearcher().getCache("citations-cache-from-references"); assert cache != null; SolrCacheWrapper citationsWrapper = new SolrCacheWrapper.CitationsCache(cache); SolrCacheWrapper referencesWrapper = new SolrCacheWrapper.ReferencesCache(cache); LuceneCacheWrapper<NumericDocValues> boostConstant = LuceneCacheWrapper.getFloatCache( "boost_const", UninvertingReader.Type.SORTED_SET_FLOAT, tempReq.getSearcher().getSlowAtomicReader()); LuceneCacheWrapper<NumericDocValues> boostOne = LuceneCacheWrapper.getFloatCache( "boost_1", UninvertingReader.Type.SORTED_SET_FLOAT, tempReq.getSearcher().getSlowAtomicReader()); LuceneCacheWrapper<NumericDocValues> boostTwo = LuceneCacheWrapper.getFloatCache( "boost_2", UninvertingReader.Type.SORTED_SET_FLOAT, tempReq.getSearcher().getSlowAtomicReader()); // expecting 4 results with various order, simply based on the boost factor testQ2("id:1", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostConstant), Arrays.asList(2, 3, 4, 5)); testQ2("id:1", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostOne), Arrays.asList(3, 2, 4, 5)); testQ2("id:1", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostTwo), Arrays.asList(5, 3, 2, 4)); // 5 is referenced from two docs, the rest only by one doc testQ2("id:1 OR id:6", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostConstant), Arrays.asList(5,2,3,4)); testQ2("id:1 OR id:6", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostOne), Arrays.asList(5,3,2,4)); testQ2("id:1 OR id:6", new SecondOrderCollectorOperatorExpertsCiting(referencesWrapper, boostTwo), Arrays.asList(5,3,2,4)); // the most cited papers (the score comes from the source that cites them; so there is no // change in order testQ2("id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostConstant), Arrays.asList(8,10,11)); testQ2("id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostOne), Arrays.asList(8,10,11)); testQ2("id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostTwo), Arrays.asList(8,10,11)); // 11 is referenced twice (but we should see no change in order) testQ2("id:6 OR id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostConstant), Arrays.asList(8,10,11)); testQ2("id:6 OR id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostOne), Arrays.asList(8,10,11)); testQ2("id:6 OR id:9", new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostTwo), Arrays.asList(8,10,11)); // ADS Classic scoring formula testQ2("*:*", new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostConstant), Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20)); testQ2("*:*", new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostOne), Arrays.asList(3, 7, 8, 9, 16, 17, 6, 18, 19, 0, 1, 2, 4, 10, 11, 20, 5)); testQ2("*:*", new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostTwo), Arrays.asList(5, 0, 1, 10, 11, 3, 2, 4, 6, 7, 8, 9, 16, 17, 18, 19, 20)); // topN() testQ2((Query) new SecondOrderQuery(new MatchAllDocsQuery(), new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostTwo)), new SecondOrderCollectorTopN(2), Arrays.asList(5,0)); testQ2("*:*", new SecondOrderCollectorTopN(2), Arrays.asList(0,1)); } @SuppressWarnings("serial") public void testFinalValueAlgorithms() throws Exception { // various algorithms for compacting the hits testQ3(FinalValueType.ABS_COUNT, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f}); put(1, new Float[] {5.0f}); }}, Arrays.asList(0,1), Arrays.asList(2.0f, 1f) ); testQ3(FinalValueType.ABS_COUNT_NORM, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f}); put(1, new Float[] {5.0f}); }}, Arrays.asList(0,1), Arrays.asList(1.0f, 0.5f) ); testQ3(FinalValueType.MIN_VALUE, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {5.0f, 0.1f}); }}, Arrays.asList(1, 0), Arrays.asList(0.1f, 0.01f) ); testQ3(FinalValueType.MAX_VALUE, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {5.0f, 0.1f}); }}, Arrays.asList(1, 0), Arrays.asList(5f, 1f) ); testQ3(FinalValueType.GEOM_MEAN, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {1.0f, 1.0f, 0.011f}); }}, Arrays.asList(1, 0), Arrays.asList(0.222f, 0.215f) ); testQ3(FinalValueType.GEOM_MEAN_NORM, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {1.0f, 1.0f, 0.011f}); }}, Arrays.asList(1, 0), Arrays.asList(1.0f, 0.969f) ); testQ3(FinalValueType.ARITHM_MEAN, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {1.0f, 1.0f, 0.011f}); }}, Arrays.asList(1, 0), Arrays.asList(0.6703f, 0.67f) ); testQ3(FinalValueType.ARITHM_MEAN_NORM, new HashMap() {{ put(0, new Float[] {1.0f, 1.0f, 0.01f}); put(1, new Float[] {1.0f, 1.0f, 0.011f}); }}, Arrays.asList(1, 0), Arrays.asList(1.0f, 0.9995f) ); testQ3(FinalValueType.AGRESTI_COULL, new HashMap() {{ put(0, new Float[] {2f}); put(1, new Float[] {1.0f, 1.0f}); }}, Arrays.asList(1, 0), Arrays.asList(-0.082f, -0.399f) ); } private void testQ2(Object firstQuery, SecondOrderCollector collector, List<Integer> expectedIds) throws IOException, SyntaxError { SolrQueryRequest r = req("test"); try { SolrIndexSearcher searcher = r.getSearcher(); Query q; if (firstQuery instanceof String) { QParserPlugin qParser = r.getCore().getQueryPlugin("lucene"); QParser qp = qParser.createParser((String) firstQuery, null, r.getParams(), r); q = qp.parse(); } else { q = (Query) firstQuery; } final ArrayList<ScoreDoc> results = new ArrayList<ScoreDoc>(); searcher.search(new SecondOrderQuery(q, collector), new SimpleCollector() { private Scorer scorer; private LeafReader reader; @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override public void doSetNextReader(LeafReaderContext context) throws IOException { reader = context.reader(); } @Override public void collect(int doc) throws IOException { Document d = reader.document(doc); String idValue = d.get("id"); // store 'id' instead of docid results.add(new ScoreDoc(Integer.parseInt(idValue), scorer .score())); } @Override public boolean needsScores() { return false; } }); Collections.sort(results, new Comparator() { @Override public int compare(Object o1, Object o2) { float a = ((ScoreDoc) o1).score; float b = ((ScoreDoc) o2).score; return a < b ? 1 : a > b ? -1 : 0; } }); int[] resultIds = new int[results.size()]; int i = 0; for (ScoreDoc sd : results) { resultIds[i] = sd.doc; i++; } int[] arrExpected = new int[expectedIds.size()]; i = 0; for (Integer u : expectedIds) { arrExpected[i] = u; i++; } //System.out.println("expected:" + Arrays.toString(arrExpected)); ; //System.out.println("results:" + Arrays.toString(resultIds)); ; //System.out.println(results); assertArrayEquals(arrExpected, resultIds); } finally { r.close(); } } private void testQ3(FinalValueType finalType, Map<Integer, Float[]> mockValues, List<Integer> expectedIds, List<Float> expectedScores ) throws ParseException, IOException { SolrQueryRequest r = req("test"); try { final FinalValueType testFinalType = finalType; final Iterator<Entry<Integer, Float[]>> es = mockValues.entrySet().iterator(); SolrIndexSearcher searcher = r.getSearcher(); final ArrayList<ScoreDoc> results = new ArrayList<ScoreDoc>(); searcher.search(new SecondOrderQuery(new MatchAllDocsQuery(), new AbstractSecondOrderCollector() { @Override public void doSetNextReader(LeafReaderContext context) throws IOException { setFinalValueType(testFinalType); } @Override public void collect(int doc) throws IOException { if (es.hasNext()) { Entry<Integer, Float[]> nextItem = es.next(); Integer docid = nextItem.getKey(); for (Float f: nextItem.getValue()) { hits.add(new CollectorDoc(docid, f, -1, 1)); } } } @Override public boolean needsScores() { return false; } }), new SimpleCollector() { // this one collects results private int docBase; private Scorer scorer; private LeafReader reader; @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } @Override public void doSetNextReader(LeafReaderContext context) throws IOException { docBase = context.docBase; reader = context.reader(); } @Override public void collect(int doc) throws IOException { Document d = reader.document(doc+docBase); String idValue = d.get("id"); // store 'id' instead of docid results.add(new ScoreDoc(Integer.parseInt(idValue), scorer.score())); } @Override public boolean needsScores() { return false; } }); Collections.sort(results, new Comparator() { @Override public int compare(Object o1, Object o2) { float a = ((ScoreDoc) o1).score; float b = ((ScoreDoc) o2).score; return a < b ? 1 : a > b ? -1 : 0; } }); int[] resultIds = new int[results.size()]; int i = 0; for (ScoreDoc sd : results) { resultIds[i] = sd.doc; i++; } int[] arrExpected = new int[expectedIds.size()]; i = 0; for (Integer v : expectedIds) { arrExpected[i] = v; i++; } float[] resultScores = new float[results.size()]; i = 0; for (ScoreDoc sd: results) { resultScores[i] = sd.score; i++; } float[] scoresExpected = new float[expectedScores.size()]; i = 0; for (Float x: expectedScores) { scoresExpected[i] = x; i++; } //System.out.println("expected (ids):" + Arrays.toString(arrExpected)); //System.out.println("results:" + Arrays.toString(resultIds)); //System.out.println("expected (scores):" + Arrays.toString(scoresExpected));; //System.out.println("results:" + Arrays.toString(resultScores));; //System.out.println(results); assertArrayEquals(scoresExpected , resultScores, 0.001f); assertArrayEquals(arrExpected, resultIds); } finally { r.close(); } } // Uniquely for Junit 3 public static junit.framework.Test suite() { return new junit.framework.JUnit4TestAdapter( TestSecondOrderQueryTypesAds.class); } }