/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.search.node; import static org.sindice.siren.analysis.MockSirenToken.node; import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.must; import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.not; import static org.sindice.siren.search.AbstractTestSirenScorer.BooleanClauseBuilder.should; import static org.sindice.siren.search.AbstractTestSirenScorer.NodeBooleanQueryBuilder.nbq; import static org.sindice.siren.search.AbstractTestSirenScorer.TupleQueryBuilder.tuple; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; import org.junit.Test; import org.sindice.siren.analysis.AnyURIAnalyzer; import org.sindice.siren.analysis.TupleAnalyzer; import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation; import org.sindice.siren.index.codecs.RandomSirenCodec.PostingsFormatType; import org.sindice.siren.search.AbstractTestSirenScorer; import org.sindice.siren.util.XSDDatatype; public class TestTupleScorer extends AbstractTestSirenScorer { @Override protected void configure() throws IOException { this.setAnalyzer(AnalyzerType.TUPLE); // TODO: remove when TupleAnalyzer is no more used final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT); uriAnalyzer.setUriNormalisation(URINormalisation.FULL); ((TupleAnalyzer) analyzer).registerDatatype(XSDDatatype.XSD_ANY_URI.toCharArray(), uriAnalyzer); this.setPostingsFormat(PostingsFormatType.RANDOM); } @Test public void testUnaryClause() throws IOException { this.addDocument("\"aaa ccc\" \"bbb ccc\" . \"aaa bbb\" \"ccc eee\" . "); NodeScorer scorer = this.getScorer( tuple().optional(nbq(must("aaa"), must("ccc"))) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); assertEndOfStream(scorer); scorer = this.getScorer( tuple().optional(nbq(must("aaa"), must("bbb"))) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); assertEndOfStream(scorer); scorer = this.getScorer( tuple().optional(nbq(must("aaa"), must("eee"))) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testMoreThanOneClause() throws IOException { this.addDocument("\"aaa ccc\" \"bbb ccc\" . \"aaa bbb\" \"ccc eee\" . "); NodeScorer scorer = this.getScorer( tuple().with(nbq(must("aaa"), must("ccc"))) .with(nbq(must("aaa"), must("bbb"))) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); scorer = this.getScorer( tuple().with(nbq(must("aaa"), must("ccc"))) .with(nbq(must("bbb"), must("ccc"))) ); assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); assertEndOfStream(scorer); } /** * <code>{+[ddd] +[eee]}</code> */ @Test public void testMust() throws IOException { this.addDocument("\"eee\" . \"ddd\" . "); this.addDocument("\"bbb\" . \"ddd eee\" . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(should("ddd"))) .with(nbq(should("eee"))) ); // first candidate document does not match assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertFalse(scorer.nextNode()); // second candidate document is matching assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); assertEndOfStream(scorer); } /** * <code>{+[ddd] [eee]}</code> */ @Test public void testMustShould() throws IOException { this.addDocument("\"eee\" \"ddd\" . "); this.addDocument("\"bbb\" . \"ddd\" . "); this.addDocument("\"bbb\" . \"eee\" . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(should("ddd"))) .optional(nbq(should("eee"))) ); // first candidate is matching assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); // second candidate is matching assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); // third document is not a candidate assertFalse(scorer.nextCandidateDocument()); assertEndOfStream(scorer); } /** * <code>{+[ddd] -[eee]}</code> */ @Test public void testMustMustNot() throws IOException { this.addDocument("\"eee\" \"ddd aaa\" . "); this.addDocument("\"bbb\" \"ddd eee\" . "); this.addDocument("\"bbb\" \"ddd\" . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(should("ddd"))) .without(nbq(should("eee"))) ); // first and second candidate documents do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertFalse(scorer.nextNode()); assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); // third candidate document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(2, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); assertEndOfStream(scorer); } /** * <code>{[ddd] [eee]}</code> */ @Test public void testShould() throws IOException { this.addDocument("\"eee\" \"ddd\" . "); this.addDocument("\"bbb\" \"ddd\" . "); final NodeScorer scorer = this.getScorer( tuple().optional(nbq(should("ddd"))) .optional(nbq(should("eee"))) ); // the two documents match assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); assertEndOfStream(scorer); } /** * <code>{[ddd] -[eee]}</code> */ @Test public void testShouldMustNot() throws IOException { this.addDocument("\"eee\" . \"ddd\" . "); this.addDocument("\"bbb\" . \"ddd eee\" . "); final NodeScorer scorer = this.getScorer( tuple().optional(nbq(should("ddd"))) .without(nbq(should("eee"))) ); // first document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); // second candidate document does not match assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testTupleConstraintOneClause() throws IOException { this.addDocument("<aaa> <bbb> . <ccc> <ddd> . "); this.addDocument("<ccc> . <aaa> <bbb> <ddd> . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("ccc"))) .bound(1, 1) ); // first document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); // second candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testTupleConstraintTwoClauses() throws IOException { this.addDocument("<aaa> <bbb> . <ccc> <ddd> . "); this.addDocument("<ccc> <ddd> . <aaa> <bbb> <ddd> . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("ccc")).bound(0,0)) .with(nbq(must("ddd")).bound(1,1)) .bound(1, 1) ); // first document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); // second candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } /** * Test conjunction with exhausted scorer. * The scorer of ddd got exhausted, and * {@link SirenCellConjunctionScorer#doNext()} was trying to retrieve the * entity id from the exhausted scorer. */ @Test public void testConjunctionWithExhaustedScorer() throws IOException { this.addDocument("\"ccc\" . <aaa> \"ddd\" . "); this.addDocument("\"ccc\" . <aaa> \"ddd eee\" . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("aaa")).bound(0,0)) .with(nbq(must("ddd"), not("eee")).bound(1,Integer.MAX_VALUE)) ); // first candidate document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(1), scorer.node()); // second candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testMultiValuedPredicate() throws CorruptIndexException, IOException { this.addDocument("<aaa> \"ddd eee\" \"ddd ccc\" \"ccc eee\" \"eee bbb\" . "); this.addDocument("<aaa> \"ddd bbb\" \"ddd bbb\" \"eee bbb\" \"eee ccc\" . "); this.addDocument("<aaa> \"ddd ccc\" \"ddd bbb eee\" \"eee ccc bbb\" \"eee ccc\" . "); this.addDocument("<aaa> \"ddd eee\" \"ddd eee\" \"eee ccc bbb\" \"eee ccc\" . "); this.addDocument("<bbb> \"ddd eee\" \"ddd eee\" \"eee ccc ddd\" \"eee ccc\" . "); this.addDocument("<aaa> \"ddd eee\" \"ddd eee\" \"eee ccc bbb\" \"eee ccc\" . \n" + "<bbb> \"ddd ccc\" \"ddd bbb eee\" \"eee ccc bbb\" \"eee ccc\" .\n" + "<ccc> \"aaa eee ccc\" \"bbb eee ccc\" . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("aaa")).bound(0,0)) .with(nbq(must("ddd"), must("ccc")).bound(1,Integer.MAX_VALUE)) ); // first candidate document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); // second candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertFalse(scorer.nextNode()); // third candidate document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(2, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); // fourth candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertFalse(scorer.nextNode()); // fifth candidate document should be skipped // sixth candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testTuple2ReqCell1Excl() throws CorruptIndexException, IOException { this.addDocument("<aaa> <bbb> <ddd> <eee> . "); this.addDocument("<aaa> <ccc> <eee> . "); this.addDocument("<aaa> <ccc> <ddd> . "); this.addDocument("<aaa> <ccc> <eee> <ddd> . "); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("aaa")).bound(0,0)) .with(nbq(must("eee")).bound(1,Integer.MAX_VALUE)) .without(nbq(must("ddd")).bound(1,Integer.MAX_VALUE)) ); // first candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertEquals(0, scorer.doc()); assertFalse(scorer.nextNode()); // second candidate document matches assertTrue(scorer.nextCandidateDocument()); assertEquals(1, scorer.doc()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); // third candidate document should be skipped // fourth candidate document do not match assertTrue(scorer.nextCandidateDocument()); assertFalse(scorer.nextNode()); assertEndOfStream(scorer); } @Test public void testMultiValuedPredicate2() throws CorruptIndexException, IOException { final String[] docs = new String[300]; for (int i = 0; i < 100; i++) { docs[i * 3] = "<http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#publicationTag> \"data data figure obtained\" \"belief tln parameters graphical\" \"incorrect rose proportions feature\" ."; docs[i * 3 + 1] = "<http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#publicationTag> \"statistical determining data ylx\" \"presented assumed mit factors\" \"jolla developed positive functions\" ."; docs[i * 3 + 2] = "<http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#publicationTag> \"data accuracy minutes applying\" \"focus perceive em parameterization\" \"yield learning separation rule\" ."; } this.addDocuments(docs); final NodeScorer scorer = this.getScorer( tuple().with(nbq(must("http://www.lehigh.edu/~zhp2/2004/0401/univ-bench.owl#publicationtag")).bound(0,0)) .with(nbq(must("data"), must("accuracy")).bound(1,Integer.MAX_VALUE)) ); for (int i = 0; i < 100; i++) { // first and second documents should be skipped // third candidate document matches assertTrue(scorer.nextCandidateDocument()); assertTrue(scorer.nextNode()); assertEquals(node(0), scorer.node()); } assertEndOfStream(scorer); } }