/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.search.node; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import static org.sindice.siren.search.AbstractTestSirenScorer.dq; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermRangeTermsEnum; import org.sindice.siren.analysis.AnyURIAnalyzer; import org.sindice.siren.analysis.TupleAnalyzer; import org.sindice.siren.index.codecs.RandomSirenCodec.PostingsFormatType; import org.sindice.siren.util.BasicSirenTestCase; import org.sindice.siren.util.XSDDatatype; public class TestNodeTermRangeQuery extends BasicSirenTestCase { @Override protected void configure() throws IOException { final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT); final TupleAnalyzer tupleAnalyzer = new TupleAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), uriAnalyzer); tupleAnalyzer.registerDatatype(XSDDatatype.XSD_ANY_URI.toCharArray(), uriAnalyzer); this.setAnalyzer(tupleAnalyzer); this.setPostingsFormat(PostingsFormatType.RANDOM); } public void testExclusive1() throws Exception { this.addDocument("</computera>"); this.addDocument("</computerb>"); this.addDocument("</computerc>"); this.addDocument("</computerd>"); final NodePrimitiveQuery q = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", false, false); final ScoreDoc[] hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("A,B,C,D, only B in range", 1, hits.length); } public void testExclusive2() throws Exception { this.addDocument("</computera>"); this.addDocument("</computerb>"); this.addDocument("</computerc>"); final NodePrimitiveQuery q = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", false, false); ScoreDoc[] hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("A,B,D, only B in range", 1, hits.length); this.addDocument("</computerc>"); hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("C added, still only B in range", 1, hits.length); } public void testInclusive1() throws Exception { this.addDocument("</computera>"); this.addDocument("</computerb>"); this.addDocument("</computerc>"); this.addDocument("</computerd>"); final NodePrimitiveQuery q = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", true, true); final ScoreDoc[] hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); } public void testInclusive2() throws Exception { this.addDocument("</computera>"); this.addDocument("</computerb>"); this.addDocument("</computerd>"); final NodePrimitiveQuery q = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", true, true); ScoreDoc[] hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("A,B,D - A and B in range", 2, hits.length); this.addDocument("</computerc>"); hits = searcher.search(dq(q), null, 1000).scoreDocs; assertEquals("C added - A, B, C in range", 3, hits.length); } public void testAllDocs() throws Exception { this.addDocuments(new String[]{"</computera>", "</computerb>", "</computerc>", "</computerd>"}); NodeTermRangeQuery query = new NodeTermRangeQuery(DEFAULT_TEST_FIELD, null, null, true, true); final Terms terms = MultiFields.getTerms(searcher.getIndexReader(), DEFAULT_TEST_FIELD); assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(dq(query), null, 1000).scoreDocs.length); query = new NodeTermRangeQuery(DEFAULT_TEST_FIELD, null, null, false, false); assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(dq(query), null, 1000).scoreDocs.length); query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "", null, true, false); assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(4, searcher.search(dq(query), null, 1000).scoreDocs.length); // and now an other one query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerb", null, true, false); assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum); assertEquals(3, searcher.search(dq(query), null, 1000).scoreDocs.length); reader.close(); } /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE) * with constant score and checks, that only the lower end of terms is put into the range */ public void testTopTermsRewrite() throws Exception { this.addDocuments(new String[]{"</computera>", "</computerb>", "</computerc>", "</computerd>", "</computere>", "</computerf>", "</computerg>", "</computerh>", "</computeri>", "</computerj>", "</computerk>"}); final NodeTermRangeQuery query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerb", "/computerj", true, true); this.checkBooleanTerms(query, "/computerb", "/computerc", "/computerd", "/computere", "/computerf", "/computerg", "/computerh", "/computeri", "/computerj"); final int savedClauseCount = NodeBooleanQuery.getMaxClauseCount(); try { NodeBooleanQuery.setMaxClauseCount(3); this.checkBooleanTerms(query, "/computerb", "/computerc", "/computerd"); } finally { NodeBooleanQuery.setMaxClauseCount(savedClauseCount); } } private void checkBooleanTerms(final NodeTermRangeQuery query, final String... terms) throws IOException { query.setRewriteMethod(new MultiNodeTermQuery.TopTermsScoringNodeBooleanQueryRewrite(50)); final NodeBooleanQuery bq = (NodeBooleanQuery) searcher.rewrite(query); final Set<String> allowedTerms = new HashSet<String>(Arrays.asList(terms)); assertEquals(allowedTerms.size(), bq.clauses().size()); for (final NodeBooleanClause c : bq.clauses()) { assertTrue(c.getQuery() instanceof NodeTermQuery); final NodeTermQuery tq = (NodeTermQuery) c.getQuery(); final String term = tq.getTerm().text(); assertTrue("invalid term: "+ term, allowedTerms.contains(term)); allowedTerms.remove(term); // remove to fail on double terms } assertEquals(0, allowedTerms.size()); } public void testEqualsHashcode() { Query query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", true, true); query.setBoost(1.0f); Query other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", true, true); other.setBoost(1.0f); assertEquals("query equals itself is true", query, query); assertEquals("equivalent queries are equal", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); other.setBoost(2.0f); assertFalse("Different boost queries are not equal", query.equals(other)); other = NodeTermRangeQuery.newStringRange("notcontent", "/computera", "/computerc", true, true); assertFalse("Different fields are not equal", query.equals(other)); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerx", "/computerc", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerz", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, null, "/computerc", true, true); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, null, "/computerc", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerc", null, true, true); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerc", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, null, "/computerc", true, true); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computerc", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); query = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", false, false); other = NodeTermRangeQuery.newStringRange(DEFAULT_TEST_FIELD, "/computera", "/computerc", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); } }