/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.search.node;
import static org.sindice.siren.analysis.MockSirenToken.node;
import static org.sindice.siren.search.AbstractTestSirenScorer.NodePhraseQueryBuilder.npq;
import java.io.IOException;
import java.util.ArrayList;
import org.junit.Test;
import org.sindice.siren.analysis.AnyURIAnalyzer;
import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation;
import org.sindice.siren.analysis.TupleAnalyzer;
import org.sindice.siren.index.codecs.RandomSirenCodec.PostingsFormatType;
import org.sindice.siren.search.AbstractTestSirenScorer;
import org.sindice.siren.util.XSDDatatype;
public class TestNodeExactPhraseScorer extends AbstractTestSirenScorer {
@Override
protected void configure() throws IOException {
this.setAnalyzer(AnalyzerType.TUPLE);
// TODO: remove when TupleAnalyzer is no more used
final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
uriAnalyzer.setUriNormalisation(URINormalisation.FULL);
((TupleAnalyzer) analyzer).registerDatatype(XSDDatatype.XSD_ANY_URI.toCharArray(), uriAnalyzer);
this.setPostingsFormat(PostingsFormatType.RANDOM);
}
/**
* Test exact phrase scorer: should not match two words in separate nodes
*/
@Test
public void testEmptyResult1() throws Exception {
this.addDocument("\"word1 word2 word3\" \"word4 word5\" . ");
final NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(npq("word1", "word4"));
assertTrue(scorer.nextCandidateDocument());
assertFalse(scorer.nextNode());
}
/**
* Test exact phrase scorer: should not match phrase with a gap of 1 between
* the two phrase query terms
*/
@Test
public void testEmptyResult2() throws Exception {
this.addDocument("\"word1 word2 word3\" \"word4 word5\" . ");
final NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(npq("word4", "", "word5"));
assertTrue(scorer.nextCandidateDocument());
assertFalse(scorer.nextNode());
}
@Test
public void testNodeConstraint() throws Exception {
this.addDocument("\"word1 word2 word3\" \"word4 word5\" . ");
NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(npq("word4", "word5"));
assertTrue(scorer.nextCandidateDocument());
assertTrue(scorer.nextNode());
scorer = (NodePhraseScorer) this.getScorer(npq("word4", "word5").level(2));
assertTrue(scorer.nextCandidateDocument());
assertTrue(scorer.nextNode());
scorer = (NodePhraseScorer) this.getScorer(npq("word4", "word5").level(1));
assertTrue(scorer.nextCandidateDocument());
assertFalse(scorer.nextNode());
scorer = (NodePhraseScorer) this.getScorer(npq("word4", "word5").bound(0,0));
assertTrue(scorer.nextCandidateDocument());
assertFalse(scorer.nextNode());
scorer = (NodePhraseScorer) this.getScorer(npq("word4", "word5").bound(0,1));
assertTrue(scorer.nextCandidateDocument());
assertTrue(scorer.nextNode());
}
@Test
public void testMultipleOccurrences() throws Exception {
this.addDocument("<http://renaud.delbru.fr/> \"renaud delbru delbru renaud renaud delbru\" . ");
NodeQuery q = npq("renaud", "delbru").getNodeQuery();
NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(q);
assertTrue(scorer.nextCandidateDocument());
assertEquals(0, scorer.doc());
assertTrue(scorer.nextNode());
assertEquals(node(0,0), scorer.node());
assertEquals(1.0f, scorer.freqInNode(), 0);
assertTrue(scorer.nextNode());
assertEquals(node(0,1), scorer.node());
assertEquals(2.0f, scorer.freqInNode(), 0);
assertFalse(scorer.nextNode());
assertFalse(scorer.nextCandidateDocument());
q = npq("renaud", "", "delbru").getNodeQuery();
scorer = (NodePhraseScorer) this.getScorer(q);
assertTrue(scorer.nextCandidateDocument());
assertEquals(0, scorer.doc());
assertTrue(scorer.nextNode());
assertEquals(node(0,1), scorer.node());
assertEquals(2.0f, scorer.freqInNode(), 0);
assertFalse(scorer.nextNode());
assertFalse(scorer.nextCandidateDocument());
}
@Test
public void testSkipToCandidate() throws Exception {
final ArrayList<String> docs = new ArrayList<String>();
for (int i = 0; i < 32; i++) {
docs.add("<http://renaud.delbru.fr/> . ");
}
this.addDocuments(docs);
final NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(npq("renaud", "delbru"));
assertTrue(scorer.skipToCandidate(16));
assertEquals(16, scorer.doc());
assertTrue(scorer.nextNode());
assertEquals(node(0,0), scorer.node());
}
@Test
public void testSkipToCandidateNext() throws Exception {
final ArrayList<String> docs = new ArrayList<String>();
for (int i = 0; i < 32; i++)
docs.add("<http://renaud.delbru.fr/> . ");
this.addDocuments(docs);
final NodePhraseScorer scorer = (NodePhraseScorer) this.getScorer(npq("renaud", "delbru"));
assertTrue(scorer.nextCandidateDocument());
assertTrue(scorer.skipToCandidate(16));
assertEquals(16, scorer.doc());
assertTrue(scorer.nextNode());
assertEquals(node(0,0), scorer.node());
assertTrue(scorer.nextCandidateDocument());
assertEquals(17, scorer.doc());
assertTrue(scorer.nextNode());
assertEquals(node(0,0), scorer.node());
}
// @Test(expected=InvalidCallException.class)
// public void testInvalidScoreCall() throws IOException {
// _helper.addDocument("\"Renaud Delbru\" . ");
//
// final Term t1 = new Term(QueryTestingHelper.DEFAULT_FIELD, "renaud");
// final Term t2 = new Term(QueryTestingHelper.DEFAULT_FIELD, "delbru");
// final NodePhraseQuery query = new NodePhraseQuery();
// query.add(t1); query.add(t2);
// final Weight w = query.createWeight(_helper.getIndexSearcher());
//
// final IndexReader reader = _helper.getIndexReader();
// final DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[2];
// tps[0] = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), t1.field(), t1.bytes());
// tps[1] = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), t2.field(), t2.bytes());
//
// final NodePhraseScorer scorer = new NodeExactPhraseScorer(w, tps, new int[] {0, 1},
// _helper.getIndexSearcher().getSimilarityProvider().get(QueryTestingHelper.DEFAULT_FIELD),
// MultiNorms.norms(reader, QueryTestingHelper.DEFAULT_FIELD));
// assertNotNull("ts is null and it shouldn't be", scorer);
//
// // Invalid call
// scorer.score();
// }
//
// @Test
// public void testScore() throws IOException {
// _helper.addDocument("\"Renaud Delbru\" . <http://renaud.delbru.fr> . ");
//
// final Term t1 = new Term(QueryTestingHelper.DEFAULT_FIELD, "renaud");
// final Term t2 = new Term(QueryTestingHelper.DEFAULT_FIELD, "delbru");
// final NodePhraseQuery query = new NodePhraseQuery();
// query.add(t1); query.add(t2);
//
// final IndexReader reader = _helper.getIndexReader();
// final DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[2];
// tps[0] = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), t1.field(), t1.bytes());
// tps[1] = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), t2.field(), t2.bytes());
//
// final NodePhraseScorer scorer = new NodeExactPhraseScorer(
// new ConstantWeight(), tps, new int[] {0, 1},
// _helper.getIndexSearcher().getSimilarityProvider().get(QueryTestingHelper.DEFAULT_FIELD),
// MultiNorms.norms(reader, QueryTestingHelper.DEFAULT_FIELD));
// assertNotNull("ts is null and it shouldn't be", scorer);
//
// assertFalse("no doc returned", scorer.nextDocument() == DocIdSetIterator.NO_MORE_DOCS);
// assertEquals(0, scorer.doc());
// assertEquals(0.70, scorer.score(), 0.01);
// }
}