/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.search.node; import static org.sindice.siren.analysis.MockSirenToken.node; import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.must; import static org.sindice.siren.search.AbstractTestSirenScorer.NodeBooleanQueryBuilder.nbq; import java.io.IOException; import java.util.ArrayList; import org.junit.Test; import org.sindice.siren.analysis.AnyURIAnalyzer; import org.sindice.siren.analysis.TupleAnalyzer; import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation; import org.sindice.siren.index.DocsAndNodesIterator; import org.sindice.siren.index.codecs.RandomSirenCodec.PostingsFormatType; import org.sindice.siren.search.AbstractTestSirenScorer; import org.sindice.siren.util.XSDDatatype; public class TestNodeConjunctionScorer extends AbstractTestSirenScorer { @Override protected void configure() throws IOException { this.setAnalyzer(AnalyzerType.TUPLE); // TODO: remove when TupleAnalyzer is no more used final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT); uriAnalyzer.setUriNormalisation(URINormalisation.FULL); ((TupleAnalyzer) analyzer).registerDatatype(XSDDatatype.XSD_ANY_URI.toCharArray(), uriAnalyzer); this.setPostingsFormat(PostingsFormatType.RANDOM); } @Test public void testNextWithTermConjunction() throws Exception { this.addDocuments(new String[] { "<http://renaud.delbru.fr/> . ", "<http://sindice.com/test/name> \"Renaud Delbru\" . ", "<http://sindice.com/test/type> <http://sindice.com/test/Person> . " + "<http://sindice.com/test/name> \"Renaud Delbru\" . " }); final NodeScorer scorer = this.getScorer( nbq(must("renaud"), must("renaud")) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.nextNode()); assertEquals(node(0,0), scorer.node()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.nextNode()); assertEquals(node(0,1), scorer.node()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertTrue(scorer.nextCandidateDocument()); assertEquals(2, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.nextNode()); assertEquals(node(1,1), scorer.node()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertEndOfStream(scorer); } @Test public void testNoNode() throws IOException { this.addDocument("\"eee\" . \"ddd\" . "); final NodeScorer scorer = this.getScorer( nbq(must("ddd"), must("eee")) ); assertTrue(scorer.nextCandidateDocument()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertEndOfStream(scorer); } @Test public void testNoNextCandidate() throws IOException { this.addDocument("\"eee\" . \"ddd\" . "); this.addDocument("\"eee\" . \"fff\" . "); final NodeScorer scorer = this.getScorer( nbq(must("ddd"), must("fff")) ); assertEndOfStream(scorer); } // TODO: To update when phrase query implemented // @Test // public void testNextWithPhraseConjunction() // throws Exception { // this.deleteAll(writer); // this.addDocumentsWithIterator(new String[] { "\"aaa bbb aaa\". ", // "\"aaa bbb aba\" \"aaa ccc bbb aaa\" . ", // "\"aaa bbb ccc\" \"aaa ccc aaa aaa ccc\" . " + // "\" bbb ccc aaa \" \"aaa bbb bbb ccc aaa ccc\" . "}); // // final NodeBooleanScorer scorer = // this.getConjunctionScorer(new String[][] {{"aaa", "bbb"}, {"aaa", "ccc"}}); // // assertFalse(scorer.nextDocument() == DocIdSetIterator.NO_MORE_DOCS); // assertEquals(2, scorer.doc()); // assertEquals(1, scorer.node()[0]); // assertEquals(1, scorer.node()[1]); // assertTrue(scorer.nextDocument() == DocIdSetIterator.NO_MORE_DOCS); // } @Test public void testSkipToCandidate() throws Exception { final ArrayList<String> docs = new ArrayList<String>(); for (int i = 0; i < 32; i++) { docs.add("<http://sindice.com/test/name> \"Renaud Delbru\" . "); docs.add("<http://sindice.com/test/type> <http://sindice.com/test/Person> . "); } this.addDocuments(docs); final NodeScorer scorer = this.getScorer( nbq(must("renaud"), must("delbru")) ); assertTrue(scorer.skipToCandidate(16)); assertEquals(16, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.nextNode()); assertEquals(node(0,1), scorer.node()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertTrue(scorer.skipToCandidate(41)); // should jump to next candidate doc 42 assertEquals(42, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.skipToCandidate(42)); // should stay at the same position assertEquals(42, scorer.doc()); assertEquals(node(-1), scorer.node()); assertTrue(scorer.nextNode()); assertEquals(node(0,1), scorer.node()); assertFalse(scorer.nextNode()); assertEquals(DocsAndNodesIterator.NO_MORE_NOD, scorer.node()); assertFalse(scorer.skipToCandidate(75)); assertEndOfStream(scorer); } /** * The score increases, even though the frequency of each term remains the same. * This is due to the length of the document which gets longer. */ @Test public void testScoreWithTermConjunction() throws Exception { final String[] docs = new String[] { "<http://renaud.delbru.fr/> . ", "<http://sindice.com/test/name> \"Renaud Delbru\" . ", "<http://sindice.com/test/type> <http://sindice.com/test/Person> . " + "<http://sindice.com/test/name> \"Renaud Delbru\" . ", "<http://sindice.com/test/type> <http://sindice.com/test/Person> . " + "<http://sindice.com/test/homepage> <http://renaud.delbru.fr/> . " + "<http://sindice.com/test/name> \"Renaud Delbru\" ." }; this.addDocuments(docs); final LuceneProxyNodeScorer scorer = new LuceneProxyNodeScorer(this.getScorer(nbq(must("renaud"), must("delbru")))); float lastLastScore = 0; float lastScore = 0; assertTrue(scorer.nextDoc() != DocsAndNodesIterator.NO_MORE_DOC); lastLastScore = scorer.score(); assertTrue(scorer.nextDoc() != DocsAndNodesIterator.NO_MORE_DOC); lastScore = scorer.score(); assertTrue("doc=" + scorer.docID() + " lastScore=" + lastLastScore + " score=" + lastScore, lastLastScore > lastScore); assertTrue(scorer.nextDoc() != DocsAndNodesIterator.NO_MORE_DOC); lastLastScore = lastScore; lastScore = scorer.score(); assertTrue("lastScore=" + lastLastScore + " score=" + lastScore, lastLastScore > lastScore); lastLastScore = scorer.score(); assertTrue(scorer.nextDoc() != DocsAndNodesIterator.NO_MORE_DOC); lastLastScore = lastScore; lastScore = scorer.score(); // score() sums the score of both nodes assertTrue("lastScore=" + lastLastScore + " score=" + lastScore, lastLastScore < lastScore); assertFalse(scorer.nextDoc() != DocsAndNodesIterator.NO_MORE_DOC); } }