/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search; import java.io.IOException; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; public class TestTermRangeQuery extends LuceneTestCase { private int docCount = 0; private Directory dir; @Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); } @Override public void tearDown() throws Exception { dir.close(); super.tearDown(); } public void testExclusive() throws Exception { Query query = TermRangeQuery.newStringRange("content", "A", "C", false, false); initializeIndex(new String[] {"A", "B", "C", "D"}); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals("A,B,C,D, only B in range", 1, hits.length); reader.close(); initializeIndex(new String[] {"A", "B", "D"}); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals("A,B,D, only B in range", 1, hits.length); reader.close(); addDoc("C"); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals("C added, still only B in range", 1, hits.length); reader.close(); } public void testInclusive() throws Exception { Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); initializeIndex(new String[]{"A", "B", "C", "D"}); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals("A,B,C,D - A,B,C in range", 3, hits.length); reader.close(); initializeIndex(new String[]{"A", "B", "D"}); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals("A,B,D - A and B in range", 2, hits.length); reader.close(); addDoc("C"); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); hits = searcher.search(query, 1000).scoreDocs; assertEquals("C added - A, B, C in range", 3, hits.length); reader.close(); } public void testAllDocs() throws Exception { initializeIndex(new String[]{"A", "B", "C", "D"}); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); TermRangeQuery query = new TermRangeQuery("content", null, null, true, true); assertEquals(4, searcher.search(query, 1000).scoreDocs.length); query = TermRangeQuery.newStringRange("content", "", null, true, true); assertEquals(4, searcher.search(query, 1000).scoreDocs.length); query = TermRangeQuery.newStringRange("content", "", null, true, false); assertEquals(4, searcher.search(query, 1000).scoreDocs.length); // and now another one query = TermRangeQuery.newStringRange("content", "B", null, true, true); assertEquals(3, searcher.search(query, 1000).scoreDocs.length); reader.close(); } /** This test should not be here, but it tests the fuzzy query rewrite mode (TOP_TERMS_SCORING_BOOLEAN_REWRITE) * with constant score and checks, that only the lower end of terms is put into the range */ public void testTopTermsRewrite() throws Exception { initializeIndex(new String[]{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K"}); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); TermRangeQuery query = TermRangeQuery.newStringRange("content", "B", "J", true, true); checkBooleanTerms(searcher, query, "B", "C", "D", "E", "F", "G", "H", "I", "J"); final int savedClauseCount = BooleanQuery.getMaxClauseCount(); try { BooleanQuery.setMaxClauseCount(3); checkBooleanTerms(searcher, query, "B", "C", "D"); } finally { BooleanQuery.setMaxClauseCount(savedClauseCount); } reader.close(); } private void checkBooleanTerms(IndexSearcher searcher, TermRangeQuery query, String... terms) throws IOException { query.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50)); final BooleanQuery bq = (BooleanQuery) searcher.rewrite(query); final Set<String> allowedTerms = asSet(terms); assertEquals(allowedTerms.size(), bq.clauses().size()); for (BooleanClause c : bq.clauses()) { assertTrue(c.getQuery() instanceof TermQuery); final TermQuery tq = (TermQuery) c.getQuery(); final String term = tq.getTerm().text(); assertTrue("invalid term: "+ term, allowedTerms.contains(term)); allowedTerms.remove(term); // remove to fail on double terms } assertEquals(0, allowedTerms.size()); } public void testEqualsHashcode() { Query query = TermRangeQuery.newStringRange("content", "A", "C", true, true); Query other = TermRangeQuery.newStringRange("content", "A", "C", true, true); assertEquals("query equals itself is true", query, query); assertEquals("equivalent queries are equal", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); other = TermRangeQuery.newStringRange("notcontent", "A", "C", true, true); assertFalse("Different fields are not equal", query.equals(other)); other = TermRangeQuery.newStringRange("content", "X", "C", true, true); assertFalse("Different lower terms are not equal", query.equals(other)); other = TermRangeQuery.newStringRange("content", "A", "Z", true, true); assertFalse("Different upper terms are not equal", query.equals(other)); query = TermRangeQuery.newStringRange("content", null, "C", true, true); other = TermRangeQuery.newStringRange("content", null, "C", true, true); assertEquals("equivalent queries with null lowerterms are equal()", query, other); assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode()); query = TermRangeQuery.newStringRange("content", "C", null, true, true); other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertEquals("equivalent queries with null upperterms are equal()", query, other); assertEquals("hashcode returns same value", query.hashCode(), other.hashCode()); query = TermRangeQuery.newStringRange("content", null, "C", true, true); other = TermRangeQuery.newStringRange("content", "C", null, true, true); assertFalse("queries with different upper and lower terms are not equal", query.equals(other)); query = TermRangeQuery.newStringRange("content", "A", "C", false, false); other = TermRangeQuery.newStringRange("content", "A", "C", true, true); assertFalse("queries with different inclusive are not equal", query.equals(other)); } private static class SingleCharAnalyzer extends Analyzer { private static class SingleCharTokenizer extends Tokenizer { char[] buffer = new char[1]; boolean done = false; CharTermAttribute termAtt; public SingleCharTokenizer() { super(); termAtt = addAttribute(CharTermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (done) return false; else { int count = input.read(buffer); clearAttributes(); done = true; if (count == 1) { termAtt.copyBuffer(buffer, 0, 1); } return true; } } @Override public void reset() throws IOException { super.reset(); done = false; } } @Override public TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents(new SingleCharTokenizer()); } } private void initializeIndex(String[] values) throws IOException { initializeIndex(values, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)); } private void initializeIndex(String[] values, Analyzer analyzer) throws IOException { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(analyzer).setOpenMode(OpenMode.CREATE)); for (int i = 0; i < values.length; i++) { insertDoc(writer, values[i]); } writer.close(); } // shouldnt create an analyzer for every doc? private void addDoc(String content) throws IOException { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); insertDoc(writer, content); writer.close(); } private void insertDoc(IndexWriter writer, String content) throws IOException { Document doc = new Document(); doc.add(newStringField("id", "id" + docCount, Field.Store.YES)); doc.add(newTextField("content", content, Field.Store.NO)); writer.addDocument(doc); docCount++; } // LUCENE-38 public void testExclusiveLowerNull() throws Exception { Analyzer analyzer = new SingleCharAnalyzer(); //http://issues.apache.org/jira/browse/LUCENE-38 Query query = TermRangeQuery.newStringRange("content", null, "C", false, false); initializeIndex(new String[] {"A", "B", "", "C", "D"}, analyzer); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); int numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 3, numHits); // until Lucene-38 is fixed, use this assert: //assertEquals("A,B,<empty string>,C,D => A, B & <empty string> are in range", 2, hits.length()); reader.close(); initializeIndex(new String[] {"A", "B", "", "D"}, analyzer); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 3, numHits); // until Lucene-38 is fixed, use this assert: //assertEquals("A,B,<empty string>,D => A, B & <empty string> are in range", 2, hits.length()); reader.close(); addDoc("C"); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("C added, still A, B & <empty string> are in range", 3, numHits); // until Lucene-38 is fixed, use this assert //assertEquals("C added, still A, B & <empty string> are in range", 2, hits.length()); reader.close(); } // LUCENE-38 public void testInclusiveLowerNull() throws Exception { //http://issues.apache.org/jira/browse/LUCENE-38 Analyzer analyzer = new SingleCharAnalyzer(); Query query = TermRangeQuery.newStringRange("content", null, "C", true, true); initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); int numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits); // until Lucene-38 is fixed, use this assert //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length()); reader.close(); initializeIndex(new String[]{"A", "B", "", "D"}, analyzer); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits); // until Lucene-38 is fixed, use this assert //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length()); reader.close(); addDoc("C"); reader = DirectoryReader.open(dir); searcher = newSearcher(reader); numHits = searcher.search(query, 1000).totalHits; // When Lucene-38 is fixed, use the assert on the next line: assertEquals("C added => A,B,<empty string>,C in range", 4, numHits); // until Lucene-38 is fixed, use this assert //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length()); reader.close(); } }