/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.CountDownLatch; import java.util.function.LongSupplier; import java.util.function.Supplier; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.TestUtil; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.RegExp; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** * Abstract class to do basic tests for a docvalues format. * NOTE: This test focuses on the docvalues impl, nothing else. * The [stretch] goal is for this test to be * so thorough in testing a new DocValuesFormat that if this * test passes, then all Lucene/Solr tests should also pass. Ie, * if there is some bug in a given DocValuesFormat that this * test fails to catch then this test needs to be improved! */ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTestCase { @Override protected void addRandomFields(Document doc) { if (usually()) { doc.add(new NumericDocValuesField("ndv", random().nextInt(1 << 12))); doc.add(new BinaryDocValuesField("bdv", new BytesRef(TestUtil.randomSimpleString(random())))); doc.add(new SortedDocValuesField("sdv", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } int numValues = random().nextInt(5); for (int i = 0; i < numValues; ++i) { doc.add(new SortedSetDocValuesField("ssdv", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } numValues = random().nextInt(5); for (int i = 0; i < numValues; ++i) { doc.add(new SortedNumericDocValuesField("sndv", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } } public void testOneNumber() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new NumericDocValuesField("dv", 5)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); int docID = hits.scoreDocs[i].doc; assertEquals(docID, dv.advance(docID)); assertEquals(5, dv.longValue()); } ireader.close(); directory.close(); } public void testOneFloat() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new FloatDocValuesField("dv", 5.7f)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); assertEquals(docID, dv.advance(docID)); assertEquals(Float.floatToRawIntBits(5.7f), dv.longValue()); } ireader.close(); directory.close(); } public void testTwoNumbers() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 5)); doc.add(new NumericDocValuesField("dv2", 17)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1"); assertEquals(docID, dv.advance(docID)); assertEquals(5, dv.longValue()); dv = ireader.leaves().get(0).reader().getNumericDocValues("dv2"); assertEquals(docID, dv.advance(docID)); assertEquals(17, dv.longValue()); } ireader.close(); directory.close(); } public void testTwoBinaryValues() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef(longTerm))); doc.add(new BinaryDocValuesField("dv2", new BytesRef(text))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int hitDocID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(hitDocID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv1"); assertEquals(hitDocID, dv.advance(hitDocID)); BytesRef scratch = dv.binaryValue(); assertEquals(new BytesRef(longTerm), scratch); dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv2"); assertEquals(hitDocID, dv.advance(hitDocID)); scratch = dv.binaryValue(); assertEquals(new BytesRef(text), scratch); } ireader.close(); directory.close(); } public void testTwoFieldsMixed() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 5)); doc.add(new BinaryDocValuesField("dv2", new BytesRef("hello world"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv1"); assertEquals(docID, dv.advance(docID)); assertEquals(5, dv.longValue()); BinaryDocValues dv2 = ireader.leaves().get(0).reader().getBinaryDocValues("dv2"); assertEquals(docID, dv2.advance(docID)); assertEquals(new BytesRef("hello world"), dv2.binaryValue()); } ireader.close(); directory.close(); } public void testThreeFieldsMixed() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new SortedDocValuesField("dv1", new BytesRef("hello hello"))); doc.add(new NumericDocValuesField("dv2", 5)); doc.add(new BinaryDocValuesField("dv3", new BytesRef("hello world"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv1"); assertEquals(docID, dv.advance(docID)); int ord = dv.ordValue(); BytesRef scratch = dv.lookupOrd(ord); assertEquals(new BytesRef("hello hello"), scratch); NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv2"); assertEquals(docID, dv2.advance(docID)); assertEquals(5, dv2.longValue()); BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv3"); assertEquals(docID, dv3.advance(docID)); assertEquals(new BytesRef("hello world"), dv3.binaryValue()); } ireader.close(); directory.close(); } public void testThreeFieldsMixed2() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef("hello world"))); doc.add(new SortedDocValuesField("dv2", new BytesRef("hello hello"))); doc.add(new NumericDocValuesField("dv3", 5)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); BytesRef scratch = new BytesRef(); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv2"); assertEquals(docID, dv.advance(docID)); int ord = dv.ordValue(); scratch = dv.lookupOrd(ord); assertEquals(new BytesRef("hello hello"), scratch); NumericDocValues dv2 = ireader.leaves().get(0).reader().getNumericDocValues("dv3"); assertEquals(docID, dv2.advance(docID)); assertEquals(5, dv2.longValue()); BinaryDocValues dv3 = ireader.leaves().get(0).reader().getBinaryDocValues("dv1"); assertEquals(docID, dv3.advance(docID)); assertEquals(new BytesRef("hello world"), dv3.binaryValue()); } ireader.close(); directory.close(); } public void testTwoDocumentsNumeric() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", 1)); iwriter.addDocument(doc); doc = new Document(); doc.add(new NumericDocValuesField("dv", 2)); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(1, dv.longValue()); assertEquals(1, dv.nextDoc()); assertEquals(2, dv.longValue()); ireader.close(); directory.close(); } public void testTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(newField("id", "0", StringField.TYPE_STORED)); doc.add(new NumericDocValuesField("dv", -10)); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); doc.add(new NumericDocValuesField("dv", 99)); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); for(int i=0;i<2;i++) { Document doc2 = ireader.leaves().get(0).reader().document(i); long expected; if (doc2.get("id").equals("0")) { expected = -10; } else { expected = 99; } assertEquals(i, dv.nextDoc()); assertEquals(expected, dv.longValue()); } ireader.close(); directory.close(); } public void testBigNumericRange() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", Long.MIN_VALUE)); iwriter.addDocument(doc); doc = new Document(); doc.add(new NumericDocValuesField("dv", Long.MAX_VALUE)); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(Long.MIN_VALUE, dv.longValue()); assertEquals(1, dv.nextDoc()); assertEquals(Long.MAX_VALUE, dv.longValue()); ireader.close(); directory.close(); } public void testBigNumericRange2() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new NumericDocValuesField("dv", -8841491950446638677L)); iwriter.addDocument(doc); doc = new Document(); doc.add(new NumericDocValuesField("dv", 9062230939892376225L)); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; NumericDocValues dv = ireader.leaves().get(0).reader().getNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(-8841491950446638677L, dv.longValue()); assertEquals(1, dv.nextDoc()); assertEquals(9062230939892376225L, dv.longValue()); ireader.close(); directory.close(); } public void testBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new BinaryDocValuesField("dv", new BytesRef("hello world"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int hitDocID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(hitDocID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); assertEquals(hitDocID, dv.advance(hitDocID)); assertEquals(new BytesRef("hello world"), dv.binaryValue()); } ireader.close(); directory.close(); } public void testBytesTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(newField("id", "0", StringField.TYPE_STORED)); doc.add(new BinaryDocValuesField("dv", new BytesRef("hello world 1"))); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); doc.add(new BinaryDocValuesField("dv", new BytesRef("hello 2"))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); for(int i=0;i<2;i++) { Document doc2 = ireader.leaves().get(0).reader().document(i); String expected; if (doc2.get("id").equals("0")) { expected = "hello world 1"; } else { expected = "hello 2"; } assertEquals(i, dv.nextDoc()); assertEquals(expected, dv.binaryValue().utf8ToString()); } ireader.close(); directory.close(); } public void testBytesMergeAwayAllValues() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new BinaryDocValuesField("field", new BytesRef("hi"))); iwriter.addDocument(doc); iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); BinaryDocValues dv = getOnlyLeafReader(ireader).getBinaryDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = new IndexSearcher(ireader); assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits); BytesRef scratch = new BytesRef(); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { int docID = hits.scoreDocs[i].doc; Document hitDoc = isearcher.doc(docID); assertEquals(text, hitDoc.get("fieldname")); assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); assertEquals(docID, dv.advance(docID)); scratch = dv.lookupOrd(dv.ordValue()); assertEquals(new BytesRef("hello world"), scratch); } ireader.close(); directory.close(); } public void testSortedBytesTwoDocuments() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1"))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2"))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); BytesRef scratch = new BytesRef(); assertEquals(0, dv.nextDoc()); scratch = dv.lookupOrd(dv.ordValue()); assertEquals("hello world 1", scratch.utf8ToString()); assertEquals(1, dv.nextDoc()); scratch = dv.lookupOrd(dv.ordValue()); assertEquals("hello world 2", scratch.utf8ToString()); ireader.close(); directory.close(); } public void testSortedBytesThreeDocuments() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1"))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2"))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1"))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); assertEquals(2, dv.getValueCount()); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.ordValue()); BytesRef scratch = dv.lookupOrd(0); assertEquals("hello world 1", scratch.utf8ToString()); assertEquals(1, dv.nextDoc()); assertEquals(1, dv.ordValue()); scratch = dv.lookupOrd(1); assertEquals("hello world 2", scratch.utf8ToString()); assertEquals(2, dv.nextDoc()); assertEquals(0, dv.ordValue()); ireader.close(); directory.close(); } public void testSortedBytesTwoDocumentsMerged() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(newField("id", "0", StringField.TYPE_STORED)); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 1"))); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2"))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); assertEquals(2, dv.getValueCount()); // 2 ords assertEquals(0, dv.nextDoc()); BytesRef scratch = dv.binaryValue(); assertEquals(new BytesRef("hello world 1"), scratch); scratch = dv.lookupOrd(1); assertEquals(new BytesRef("hello world 2"), scratch); for(int i=0;i<2;i++) { Document doc2 = ireader.leaves().get(0).reader().document(i); String expected; if (doc2.get("id").equals("0")) { expected = "hello world 1"; } else { expected = "hello world 2"; } if (dv.docID() < i) { assertEquals(i, dv.nextDoc()); } scratch = dv.lookupOrd(dv.ordValue()); assertEquals(expected, scratch.utf8ToString()); } ireader.close(); directory.close(); } public void testSortedMergeAwayAllValues() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testBytesWithNewline() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new BinaryDocValuesField("dv", new BytesRef("hello\nworld\r1"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef("hello\nworld\r1"), dv.binaryValue()); ireader.close(); directory.close(); } public void testMissingSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("hello world 2"))); iwriter.addDocument(doc); // 2nd doc missing the DV field iwriter.addDocument(new Document()); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); assertEquals(0, dv.nextDoc()); BytesRef scratch = dv.lookupOrd(dv.ordValue()); assertEquals(new BytesRef("hello world 2"), scratch); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testSortedTermsEnum() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("world"))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("field", new BytesRef("beer"))); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field"); assertEquals(3, dv.getValueCount()); TermsEnum termsEnum = dv.termsEnum(); // next() assertEquals("beer", termsEnum.next().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); // seekCeil() assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("aba"))); assertEquals(0, termsEnum.ord()); // seekExact() assertTrue(termsEnum.seekExact(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("hello"))); assertEquals(Codec.getDefault().toString(), "hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("world"))); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); assertFalse(termsEnum.seekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.seekExact(0); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); termsEnum.seekExact(1); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); termsEnum.seekExact(2); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); // NORMAL automaton termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton())); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); assertNull(termsEnum.next()); // SINGLE automaton termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton())); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertNull(termsEnum.next()); ireader.close(); directory.close(); } public void testEmptySortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef(""))); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef(""))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.ordValue()); assertEquals(1, dv.nextDoc()); assertEquals(0, dv.ordValue()); BytesRef scratch = dv.lookupOrd(0); assertEquals("", scratch.utf8ToString()); ireader.close(); directory.close(); } public void testEmptyBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new BinaryDocValuesField("dv", new BytesRef(""))); iwriter.addDocument(doc); doc = new Document(); doc.add(new BinaryDocValuesField("dv", new BytesRef(""))); iwriter.addDocument(doc); iwriter.forceMerge(1); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals("", dv.binaryValue().utf8ToString()); assertEquals(1, dv.nextDoc()); assertEquals("", dv.binaryValue().utf8ToString()); ireader.close(); directory.close(); } public void testVeryLargeButLegalBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); byte bytes[] = new byte[32766]; BytesRef b = new BytesRef(bytes); random().nextBytes(bytes); doc.add(new BinaryDocValuesField("dv", b)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef(bytes), dv.binaryValue()); ireader.close(); directory.close(); } public void testVeryLargeButLegalSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); byte bytes[] = new byte[32766]; BytesRef b = new BytesRef(bytes); random().nextBytes(bytes); doc.add(new SortedDocValuesField("dv", b)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = DocValues.getBinary(ireader.leaves().get(0).reader(), "dv"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef(bytes), dv.binaryValue()); ireader.close(); directory.close(); } public void testCodecUsesOwnBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new BinaryDocValuesField("dv", new BytesRef("boo!"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = ireader.leaves().get(0).reader().getBinaryDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals("boo!", dv.binaryValue().utf8ToString()); ireader.close(); directory.close(); } public void testCodecUsesOwnSortedBytes() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("boo!"))); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true assert ireader.leaves().size() == 1; BinaryDocValues dv = DocValues.getBinary(ireader.leaves().get(0).reader(), "dv"); byte mybytes[] = new byte[20]; assertEquals(0, dv.nextDoc()); assertEquals("boo!", dv.binaryValue().utf8ToString()); assertFalse(dv.binaryValue().bytes == mybytes); ireader.close(); directory.close(); } /* * Simple test case to show how to use the API */ public void testDocValuesSimple() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, conf); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.add(new NumericDocValuesField("docId", i)); doc.add(new TextField("docId", "" + i, Field.Store.NO)); writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); assertEquals(1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD); TopDocs search = searcher.search(query.build(), 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; NumericDocValues docValues = getOnlyLeafReader(reader).getNumericDocValues("docId"); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); assertEquals(i, docValues.advance(i)); assertEquals(i, docValues.longValue()); } reader.close(); dir.close(); } public void testRandomSortedBytes() throws IOException { Directory dir = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter w = new RandomIndexWriter(random(), dir, cfg); int numDocs = atLeast(100); BytesRefHash hash = new BytesRefHash(); Map<String, String> docToString = new HashMap<>(); int maxLength = TestUtil.nextInt(random(), 1, 50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newTextField("id", "" + i, Field.Store.YES)); String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength); BytesRef br = new BytesRef(string); doc.add(new SortedDocValuesField("field", br)); hash.add(br); docToString.put("" + i, string); w.addDocument(doc); } if (rarely()) { w.commit(); } int numDocsNoValue = atLeast(10); for (int i = 0; i < numDocsNoValue; i++) { Document doc = new Document(); doc.add(newTextField("id", "noValue", Field.Store.YES)); w.addDocument(doc); } if (rarely()) { w.commit(); } for (int i = 0; i < numDocs; i++) { Document doc = new Document(); String id = "" + i + numDocs; doc.add(newTextField("id", id, Field.Store.YES)); String string = TestUtil.randomRealisticUnicodeString(random(), 1, maxLength); BytesRef br = new BytesRef(string); hash.add(br); docToString.put(id, string); doc.add(new SortedDocValuesField("field", br)); w.addDocument(doc); } w.commit(); IndexReader reader = w.getReader(); SortedDocValues docValues = MultiDocValues.getSortedValues(reader, "field"); int[] sort = hash.sort(); BytesRef expected = new BytesRef(); assertEquals(hash.size(), docValues.getValueCount()); for (int i = 0; i < hash.size(); i++) { hash.get(sort[i], expected); final BytesRef actual = docValues.lookupOrd(i); assertEquals(expected.utf8ToString(), actual.utf8ToString()); int ord = docValues.lookupTerm(expected); assertEquals(i, ord); } Set<Entry<String, String>> entrySet = docToString.entrySet(); for (Entry<String, String> entry : entrySet) { // pk lookup PostingsEnum termPostingsEnum = TestUtil.docs(random(), reader, "id", new BytesRef(entry.getKey()), null, 0); int docId = termPostingsEnum.nextDoc(); expected = new BytesRef(entry.getValue()); docValues = MultiDocValues.getSortedValues(reader, "field"); assertEquals(docId, docValues.advance(docId)); final BytesRef actual = docValues.binaryValue(); assertEquals(expected, actual); } reader.close(); w.close(); dir.close(); } private void doTestNumericsVsStoredFields(double density, LongSupplier longs) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = newStringField("stored", "", Field.Store.YES); Field dvField = new NumericDocValuesField("dv", 0); doc.add(idField); doc.add(storedField); doc.add(dvField); // index some docs int numDocs = atLeast(300); // numDocs should be always > 256 so that in case of a codec that optimizes // for numbers of values <= 256, all storage layouts are tested assert numDocs > 256; for (int i = 0; i < numDocs; i++) { if (random().nextDouble() > density) { writer.addDocument(new Document()); continue; } idField.setStringValue(Integer.toString(i)); long value = longs.getAsLong(); storedField.setStringValue(Long.toString(value)); dvField.setLongValue(value); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.forceMerge(numDocs / 256); writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); NumericDocValues docValues = DocValues.getNumeric(r, "dv"); docValues.nextDoc(); for (int i = 0; i < r.maxDoc(); i++) { String storedValue = r.document(i).get("stored"); if (storedValue == null) { assertTrue(docValues.docID() > i); } else { assertEquals(i, docValues.docID()); assertEquals(Long.parseLong(storedValue), docValues.longValue()); docValues.nextDoc(); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID()); } ir.close(); dir.close(); } private void doTestSortedNumericsVsStoredFields(LongSupplier counts, LongSupplier values) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); // index some docs int numDocs = atLeast(300); // numDocs should be always > 256 so that in case of a codec that optimizes // for numbers of values <= 256, all storage layouts are tested assert numDocs > 256; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); int valueCount = (int) counts.getAsLong(); long valueArray[] = new long[valueCount]; for (int j = 0; j < valueCount; j++) { long value = values.getAsLong(); valueArray[j] = value; doc.add(new SortedNumericDocValuesField("dv", value)); } Arrays.sort(valueArray); for (int j = 0; j < valueCount; j++) { doc.add(new StoredField("stored", Long.toString(valueArray[j]))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // merge some segments and ensure that at least one of them has more than // 256 values writer.forceMerge(numDocs / 256); writer.close(); // compare DirectoryReader ir = DirectoryReader.open(dir); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv"); for (int i = 0; i < r.maxDoc(); i++) { if (i > docValues.docID()) { docValues.nextDoc(); } String expected[] = r.document(i).getValues("stored"); if (i < docValues.docID()) { assertEquals(0, expected.length); } else { String actual[] = new String[docValues.docValueCount()]; for (int j = 0; j < actual.length; j++) { actual[j] = Long.toString(docValues.nextValue()); } assertArrayEquals(expected, actual); } } } ir.close(); dir.close(); } public void testBooleanNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(1, () -> random().nextInt(2)); } } public void testSparseBooleanNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(random().nextDouble(), () -> random().nextInt(2)); } } public void testByteNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(1, () -> TestUtil.nextInt(random(), Byte.MIN_VALUE, Byte.MAX_VALUE)); } } public void testSparseByteNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(random().nextDouble(), () -> TestUtil.nextInt(random(), Byte.MIN_VALUE, Byte.MAX_VALUE)); } } public void testShortNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(1, () -> TestUtil.nextInt(random(), Short.MIN_VALUE, Short.MAX_VALUE)); } } public void testSparseShortNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(random().nextDouble(), () -> TestUtil.nextInt(random(), Short.MIN_VALUE, Short.MAX_VALUE)); } } public void testIntNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(1, random()::nextInt); } } public void testSparseIntNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(random().nextDouble(), random()::nextInt); } } public void testLongNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(1, random()::nextLong); } } public void testSparseLongNumericsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestNumericsVsStoredFields(random().nextDouble(), random()::nextLong); } } private void doTestBinaryVsStoredFields(double density, Supplier<byte[]> bytes) throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new BinaryDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(storedField); doc.add(dvField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { if (random().nextDouble() > density) { writer.addDocument(new Document()); continue; } idField.setStringValue(Integer.toString(i)); byte[] buffer = bytes.get(); storedField.setBytesValue(buffer); dvField.setBytesValue(buffer); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare DirectoryReader ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = DocValues.getBinary(r, "dv"); docValues.nextDoc(); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); if (binaryValue == null) { assertTrue(docValues.docID() > i); } else { assertEquals(i, docValues.docID()); assertEquals(binaryValue, docValues.binaryValue()); docValues.nextDoc(); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID()); } ir.close(); // compare again writer.forceMerge(1); ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = DocValues.getBinary(r, "dv"); docValues.nextDoc(); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); if (binaryValue == null) { assertTrue(docValues.docID() > i); } else { assertEquals(i, docValues.docID()); assertEquals(binaryValue, docValues.binaryValue()); docValues.nextDoc(); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID()); } ir.close(); writer.close(); dir.close(); } public void testBinaryFixedLengthVsStoredFields() throws Exception { doTestBinaryFixedLengthVsStoredFields(1); } public void testSparseBinaryFixedLengthVsStoredFields() throws Exception { doTestBinaryFixedLengthVsStoredFields(random().nextDouble()); } private void doTestBinaryFixedLengthVsStoredFields(double density) throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 0, 10); doTestBinaryVsStoredFields(density, () -> { byte buffer[] = new byte[fixedLength]; random().nextBytes(buffer); return buffer; }); } } public void testBinaryVariableLengthVsStoredFields() throws Exception { doTestBinaryVariableLengthVsStoredFields(1); } public void testSparseBinaryVariableLengthVsStoredFields() throws Exception { doTestBinaryVariableLengthVsStoredFields(random().nextDouble()); } public void doTestBinaryVariableLengthVsStoredFields(double density) throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestBinaryVsStoredFields(density, () -> { final int length = random().nextInt(10); byte buffer[] = new byte[length]; random().nextBytes(buffer); return buffer; }); } } protected void doTestSortedVsStoredFields(int numDocs, double density, Supplier<byte[]> bytes) throws Exception { Directory dir = newFSDirectory(createTempDir("dvduel")); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.add(idField); doc.add(storedField); doc.add(dvField); // index some docs for (int i = 0; i < numDocs; i++) { if (random().nextDouble() > density) { writer.addDocument(new Document()); continue; } idField.setStringValue(Integer.toString(i)); byte[] buffer = bytes.get(); storedField.setBytesValue(buffer); dvField.setBytesValue(buffer); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare DirectoryReader ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = DocValues.getBinary(r, "dv"); docValues.nextDoc(); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); if (binaryValue == null) { assertTrue(docValues.docID() > i); } else { assertEquals(i, docValues.docID()); assertEquals(binaryValue, docValues.binaryValue()); docValues.nextDoc(); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID()); } ir.close(); writer.forceMerge(1); // compare again ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues docValues = DocValues.getBinary(r, "dv"); docValues.nextDoc(); for (int i = 0; i < r.maxDoc(); i++) { BytesRef binaryValue = r.document(i).getBinaryValue("stored"); if (binaryValue == null) { assertTrue(docValues.docID() > i); } else { assertEquals(i, docValues.docID()); assertEquals(binaryValue, docValues.binaryValue()); docValues.nextDoc(); } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID()); } ir.close(); writer.close(); dir.close(); } public void testSortedFixedLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); doTestSortedVsStoredFields(atLeast(300), 1, fixedLength, fixedLength); } } public void testSparseSortedFixedLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); doTestSortedVsStoredFields(atLeast(300), random().nextDouble(), fixedLength, fixedLength); } } public void testSortedVariableLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedVsStoredFields(atLeast(300), 1, 1, 10); } } public void testSparseSortedVariableLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedVsStoredFields(atLeast(300), random().nextDouble(), 1, 10); } } protected void doTestSortedVsStoredFields(int numDocs, double density, int minLength, int maxLength) throws Exception { doTestSortedVsStoredFields(numDocs, density, () -> { int length = TestUtil.nextInt(random(), minLength, maxLength); byte[] buffer = new byte[length]; random().nextBytes(buffer); return buffer; }); } public void testSortedSetOneValue() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoFields() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); doc.add(new SortedSetDocValuesField("field2", new BytesRef("world"))); iwriter.addDocument(doc); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field2"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); bytes = dv.lookupOrd(0); assertEquals(new BytesRef("world"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoDocumentsMerged() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("world"))); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(2, dv.getValueCount()); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); assertEquals(1, dv.nextDoc()); assertEquals(1, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); bytes = dv.lookupOrd(1); assertEquals(new BytesRef("world"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoValues() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("world"))); iwriter.addDocument(doc); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); bytes = dv.lookupOrd(1); assertEquals(new BytesRef("world"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoValuesUnordered() throws IOException { Directory directory = newDirectory(); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("world"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); bytes = dv.lookupOrd(1); assertEquals(new BytesRef("world"), bytes); ireader.close(); directory.close(); } public void testSortedSetThreeValuesTwoDocs() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("world"))); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("beer"))); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(3, dv.getValueCount()); assertEquals(0, dv.nextDoc()); assertEquals(1, dv.nextOrd()); assertEquals(2, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); assertEquals(1, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(1, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("beer"), bytes); bytes = dv.lookupOrd(1); assertEquals(new BytesRef("hello"), bytes); bytes = dv.lookupOrd(2); assertEquals(new BytesRef("world"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoDocumentsLastMissing() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); doc = new Document(); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(1, dv.getValueCount()); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoDocumentsLastMissingMerge() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(1, dv.getValueCount()); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoDocumentsFirstMissing() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); iwriter.addDocument(doc); doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(1, dv.getValueCount()); assertEquals(1, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); ireader.close(); directory.close(); } public void testSortedSetTwoDocumentsFirstMissingMerge() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); iwriter.addDocument(doc); iwriter.commit(); doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(1, dv.getValueCount()); assertEquals(1, dv.nextDoc()); assertEquals(0, dv.nextOrd()); assertEquals(NO_MORE_ORDS, dv.nextOrd()); BytesRef bytes = dv.lookupOrd(0); assertEquals(new BytesRef("hello"), bytes); ireader.close(); directory.close(); } public void testSortedSetMergeAwayAllValues() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(0, dv.getValueCount()); ireader.close(); directory.close(); } public void testSortedSetTermsEnum() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("world"))); doc.add(new SortedSetDocValuesField("field", new BytesRef("beer"))); iwriter.addDocument(doc); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(3, dv.getValueCount()); TermsEnum termsEnum = dv.termsEnum(); // next() assertEquals("beer", termsEnum.next().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); // seekCeil() assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); // seekExact() assertTrue(termsEnum.seekExact(new BytesRef("beer"))); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("hello"))); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); assertTrue(termsEnum.seekExact(new BytesRef("world"))); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); assertFalse(termsEnum.seekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.seekExact(0); assertEquals("beer", termsEnum.term().utf8ToString()); assertEquals(0, termsEnum.ord()); termsEnum.seekExact(1); assertEquals("hello", termsEnum.term().utf8ToString()); assertEquals(1, termsEnum.ord()); termsEnum.seekExact(2); assertEquals("world", termsEnum.term().utf8ToString()); assertEquals(2, termsEnum.ord()); // NORMAL automaton termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton())); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertEquals("world", termsEnum.next().utf8ToString()); assertEquals(2, termsEnum.ord()); assertNull(termsEnum.next()); // SINGLE automaton termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton())); assertEquals("hello", termsEnum.next().utf8ToString()); assertEquals(1, termsEnum.ord()); assertNull(termsEnum.next()); ireader.close(); directory.close(); } protected void doTestSortedSetVsStoredFields(int numDocs, int minLength, int maxLength, int maxValuesPerDoc, int maxUniqueValues) throws Exception { Directory dir = newFSDirectory(createTempDir("dvduel")); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Set<String> valueSet = new HashSet<String>(); for (int i = 0; i < 10000 && valueSet.size() < maxUniqueValues; ++i) { final int length = TestUtil.nextInt(random(), minLength, maxLength); valueSet.add(TestUtil.randomSimpleString(random(), length)); } String[] uniqueValues = valueSet.toArray(new String[0]); // index some docs for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Integer.toString(i), Field.Store.NO); doc.add(idField); int numValues = TestUtil.nextInt(random(), 0, maxValuesPerDoc); // create a random set of strings Set<String> values = new TreeSet<>(); for (int v = 0; v < numValues; v++) { values.add(RandomPicks.randomFrom(random(), uniqueValues)); } // add ordered to the stored field for (String v : values) { doc.add(new StoredField("stored", v)); } // add in any order to the dv field ArrayList<String> unordered = new ArrayList<>(values); Collections.shuffle(unordered, random()); for (String v : unordered) { doc.add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare DirectoryReader ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedSetDocValues docValues = r.getSortedSetDocValues("dv"); for (int i = 0; i < r.maxDoc(); i++) { String stringValues[] = r.document(i).getValues("stored"); if (docValues != null) { if (docValues.docID() < i) { docValues.nextDoc(); } } if (docValues != null && stringValues.length > 0) { assertEquals(i, docValues.docID()); for (int j = 0; j < stringValues.length; j++) { assert docValues != null; long ord = docValues.nextOrd(); assert ord != NO_MORE_ORDS; BytesRef scratch = docValues.lookupOrd(ord); assertEquals(stringValues[j], scratch.utf8ToString()); } assertEquals(NO_MORE_ORDS, docValues.nextOrd()); } } } ir.close(); writer.forceMerge(1); // compare again ir = writer.getReader(); TestUtil.checkReader(ir); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedSetDocValues docValues = r.getSortedSetDocValues("dv"); for (int i = 0; i < r.maxDoc(); i++) { String stringValues[] = r.document(i).getValues("stored"); if (docValues.docID() < i) { docValues.nextDoc(); } if (docValues != null && stringValues.length > 0) { assertEquals(i, docValues.docID()); for (int j = 0; j < stringValues.length; j++) { assert docValues != null; long ord = docValues.nextOrd(); assert ord != NO_MORE_ORDS; BytesRef scratch = docValues.lookupOrd(ord); assertEquals(stringValues[j], scratch.utf8ToString()); } assertEquals(NO_MORE_ORDS, docValues.nextOrd()); } } } ir.close(); writer.close(); dir.close(); } public void testSortedSetFixedLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 16, 100); } } public void testSortedNumericsSingleValuedVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( () -> 1, random()::nextLong ); } } public void testSortedNumericsSingleValuedMissingVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( () -> random().nextBoolean() ? 0 : 1, random()::nextLong ); } } public void testSortedNumericsMultipleValuesVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( () -> TestUtil.nextLong(random(), 0, 50), random()::nextLong ); } } public void testSortedNumericsFewUniqueSetsVsStoredFields() throws Exception { final long[] values = new long[TestUtil.nextInt(random(), 2, 6)]; for (int i = 0; i < values.length; ++i) { values[i] = random().nextLong(); } int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedNumericsVsStoredFields( () -> TestUtil.nextLong(random(), 0, 6), () -> values[random().nextInt(values.length)] ); } } public void testSortedSetVariableLengthVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 16, 100); } } public void testSortedSetFixedLengthSingleValuedVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { int fixedLength = TestUtil.nextInt(random(), 1, 10); doTestSortedSetVsStoredFields(atLeast(300), fixedLength, fixedLength, 1, 100); } } public void testSortedSetVariableLengthSingleValuedVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 1, 100); } } public void testSortedSetFixedLengthFewUniqueSetsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 10, 10, 6, 6); } } public void testSortedSetVariableLengthFewUniqueSetsVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(300), 1, 10, 6, 6); } } public void testSortedSetVariableLengthManyValuesPerDocVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(20), 1, 10, 500, 1000); } } public void testSortedSetFixedLengthManyValuesPerDocVsStoredFields() throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { doTestSortedSetVsStoredFields(atLeast(20), 10, 10, 500, 1000); } } public void testGCDCompression() throws Exception { doTestGCDCompression(1); } public void testSparseGCDCompression() throws Exception { doTestGCDCompression(random().nextDouble()); } private void doTestGCDCompression(double density) throws Exception { int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { final long min = - (((long) random().nextInt(1 << 30)) << 32); final long mul = random().nextInt() & 0xFFFFFFFFL; final LongSupplier longs = () -> { return min + mul * random().nextInt(1 << 20); }; doTestNumericsVsStoredFields(density, longs); } } public void testZeros() throws Exception { doTestNumericsVsStoredFields(1, () -> 0); } public void testSparseZeros() throws Exception { doTestNumericsVsStoredFields(random().nextDouble(), () -> 0); } public void testZeroOrMin() throws Exception { // try to make GCD compression fail if the format did not anticipate that // the GCD of 0 and MIN_VALUE is negative int numIterations = atLeast(1); for (int i = 0; i < numIterations; i++) { final LongSupplier longs = () -> { return random().nextBoolean() ? 0 : Long.MIN_VALUE; }; doTestNumericsVsStoredFields(1, longs); } } public void testTwoNumbersOneMissing() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 0)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); NumericDocValues dv = ar.getNumericDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.longValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ir.close(); directory.close(); } public void testTwoNumbersOneMissingWithMerging() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 0)); iw.addDocument(doc); iw.commit(); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); NumericDocValues dv = ar.getNumericDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.longValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ir.close(); directory.close(); } public void testThreeNumbersOneMissingWithMerging() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 0)); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.commit(); doc = new Document(); doc.add(new StringField("id", "2", Field.Store.YES)); doc.add(new NumericDocValuesField("dv1", 5)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); NumericDocValues dv = ar.getNumericDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(0, dv.longValue()); assertEquals(2, dv.nextDoc()); assertEquals(5, dv.longValue()); ir.close(); directory.close(); } public void testTwoBytesOneMissing() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef())); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); BinaryDocValues dv = ar.getBinaryDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef(), dv.binaryValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ir.close(); directory.close(); } public void testTwoBytesOneMissingWithMerging() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef())); iw.addDocument(doc); iw.commit(); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); BinaryDocValues dv = ar.getBinaryDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef(), dv.binaryValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ir.close(); directory.close(); } public void testThreeBytesOneMissingWithMerging() throws IOException { Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(null); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef())); iw.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); iw.addDocument(doc); iw.commit(); doc = new Document(); doc.add(new StringField("id", "2", Field.Store.YES)); doc.add(new BinaryDocValuesField("dv1", new BytesRef("boo"))); iw.addDocument(doc); iw.forceMerge(1); iw.close(); IndexReader ir = DirectoryReader.open(directory); assertEquals(1, ir.leaves().size()); LeafReader ar = ir.leaves().get(0).reader(); BinaryDocValues dv = ar.getBinaryDocValues("dv1"); assertEquals(0, dv.nextDoc()); assertEquals(new BytesRef(), dv.binaryValue()); assertEquals(2, dv.nextDoc()); assertEquals(new BytesRef("boo"), dv.binaryValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ir.close(); directory.close(); } /** Tests dv against stored fields with threads (binary/numeric/sorted, no missing) */ public void testThreads() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); doc.add(idField); doc.add(storedBinField); doc.add(dvBinField); doc.add(dvSortedField); doc.add(storedNumericField); doc.add(dvNumericField); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); int length = TestUtil.nextInt(random(), 0, 8); byte buffer[] = new byte[length]; random().nextBytes(buffer); storedBinField.setBytesValue(buffer); dvBinField.setBytesValue(buffer); dvSortedField.setBytesValue(buffer); long numericValue = random().nextLong(); storedNumericField.setStringValue(Long.toString(numericValue)); dvNumericField.setLongValue(numericValue); writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare final DirectoryReader ir = DirectoryReader.open(dir); int numThreads = TestUtil.nextInt(random(), 2, 7); Thread threads[] = new Thread[numThreads]; final CountDownLatch startingGun = new CountDownLatch(1); for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues binaries = r.getBinaryDocValues("dvBin"); SortedDocValues sorted = r.getSortedDocValues("dvSorted"); NumericDocValues numerics = r.getNumericDocValues("dvNum"); for (int j = 0; j < r.maxDoc(); j++) { BytesRef binaryValue = r.document(j).getBinaryValue("storedBin"); assertEquals(j, binaries.nextDoc()); BytesRef scratch = binaries.binaryValue(); assertEquals(binaryValue, scratch); assertEquals(j, sorted.nextDoc()); scratch = sorted.binaryValue(); assertEquals(binaryValue, scratch); String expected = r.document(j).get("storedNum"); assertEquals(j, numerics.nextDoc()); assertEquals(Long.parseLong(expected), numerics.longValue()); } } TestUtil.checkReader(ir); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } ir.close(); dir.close(); } /** Tests dv against stored fields with threads (all types + missing) */ @Slow public void testThreads2() throws Exception { Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); // index some docs int numDocs = TestUtil.nextInt(random(), 1025, 2047); for (int i = 0; i < numDocs; i++) { idField.setStringValue(Integer.toString(i)); int length = TestUtil.nextInt(random(), 0, 8); byte buffer[] = new byte[length]; random().nextBytes(buffer); storedBinField.setBytesValue(buffer); dvBinField.setBytesValue(buffer); dvSortedField.setBytesValue(buffer); long numericValue = random().nextLong(); storedNumericField.setStringValue(Long.toString(numericValue)); dvNumericField.setLongValue(numericValue); Document doc = new Document(); doc.add(idField); if (random().nextInt(4) > 0) { doc.add(storedBinField); doc.add(dvBinField); doc.add(dvSortedField); } if (random().nextInt(4) > 0) { doc.add(storedNumericField); doc.add(dvNumericField); } int numSortedSetFields = random().nextInt(3); Set<String> values = new TreeSet<>(); for (int j = 0; j < numSortedSetFields; j++) { values.add(TestUtil.randomSimpleString(random())); } for (String v : values) { doc.add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v))); doc.add(new StoredField("storedSortedSet", v)); } int numSortedNumericFields = random().nextInt(3); Set<Long> numValues = new TreeSet<>(); for (int j = 0; j < numSortedNumericFields; j++) { numValues.add(TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE)); } for (Long l : numValues) { doc.add(new SortedNumericDocValuesField("dvSortedNumeric", l)); doc.add(new StoredField("storedSortedNumeric", Long.toString(l))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs/10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } writer.close(); // compare final DirectoryReader ir = DirectoryReader.open(dir); int numThreads = TestUtil.nextInt(random(), 2, 7); Thread threads[] = new Thread[numThreads]; final CountDownLatch startingGun = new CountDownLatch(1); for (int i = 0; i < threads.length; i++) { threads[i] = new Thread() { @Override public void run() { try { startingGun.await(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); BinaryDocValues binaries = r.getBinaryDocValues("dvBin"); SortedDocValues sorted = r.getSortedDocValues("dvSorted"); NumericDocValues numerics = r.getNumericDocValues("dvNum"); SortedSetDocValues sortedSet = r.getSortedSetDocValues("dvSortedSet"); SortedNumericDocValues sortedNumeric = r.getSortedNumericDocValues("dvSortedNumeric"); for (int j = 0; j < r.maxDoc(); j++) { BytesRef binaryValue = r.document(j).getBinaryValue("storedBin"); if (binaryValue != null) { if (binaries != null) { assertEquals(j, binaries.nextDoc()); BytesRef scratch = binaries.binaryValue(); assertEquals(binaryValue, scratch); assertEquals(j, sorted.nextDoc()); scratch = sorted.binaryValue(); assertEquals(binaryValue, scratch); } } String number = r.document(j).get("storedNum"); if (number != null) { if (numerics != null) { assertEquals(j, numerics.advance(j)); assertEquals(Long.parseLong(number), numerics.longValue()); } } String values[] = r.document(j).getValues("storedSortedSet"); if (values.length > 0) { assertNotNull(sortedSet); assertEquals(j, sortedSet.nextDoc()); for (int k = 0; k < values.length; k++) { long ord = sortedSet.nextOrd(); assertTrue(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef value = sortedSet.lookupOrd(ord); assertEquals(values[k], value.utf8ToString()); } assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd()); } String numValues[] = r.document(j).getValues("storedSortedNumeric"); if (numValues.length > 0) { assertNotNull(sortedNumeric); assertEquals(j, sortedNumeric.nextDoc()); assertEquals(numValues.length, sortedNumeric.docValueCount()); for (int k = 0; k < numValues.length; k++) { long v = sortedNumeric.nextValue(); assertEquals(numValues[k], Long.toString(v)); } } } } TestUtil.checkReader(ir); } catch (Exception e) { throw new RuntimeException(e); } } }; threads[i].start(); } startingGun.countDown(); for (Thread t : threads) { t.join(); } ir.close(); dir.close(); } @Slow public void testThreads3() throws Exception { Directory dir = newFSDirectory(createTempDir()); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); int numSortedSets = random().nextInt(21); int numBinaries = random().nextInt(21); int numSortedNums = random().nextInt(21); int numDocs = TestUtil.nextInt(random(), 2025, 2047); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); for (int j = 0; j < numSortedSets; j++) { doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random())))); doc.add(new SortedSetDocValuesField("ss" + j, new BytesRef(TestUtil.randomSimpleString(random())))); } for (int j = 0; j < numBinaries; j++) { doc.add(new BinaryDocValuesField("b" + j, new BytesRef(TestUtil.randomSimpleString(random())))); } for (int j = 0; j < numSortedNums; j++) { doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); doc.add(new SortedNumericDocValuesField("sn" + j, TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } writer.addDocument(doc); } writer.close(); // now check with threads for (int i = 0; i < 10; i++) { final DirectoryReader r = DirectoryReader.open(dir); final CountDownLatch startingGun = new CountDownLatch(1); Thread threads[] = new Thread[TestUtil.nextInt(random(), 4, 10)]; for (int tid = 0; tid < threads.length; tid++) { threads[tid] = new Thread() { @Override public void run() { try { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); PrintStream infoStream = new PrintStream(bos, false, IOUtils.UTF_8); startingGun.await(); for (LeafReaderContext leaf : r.leaves()) { DocValuesStatus status = CheckIndex.testDocValues((SegmentReader)leaf.reader(), infoStream, true); if (status.error != null) { throw status.error; } } } catch (Throwable e) { throw new RuntimeException(e); } } }; } for (int tid = 0; tid < threads.length; tid++) { threads[tid].start(); } startingGun.countDown(); for (int tid = 0; tid < threads.length; tid++) { threads[tid].join(); } r.close(); } dir.close(); } // LUCENE-5218 public void testEmptyBinaryValueOnPageSizes() throws Exception { // Test larger and larger power-of-two sized values, // followed by empty string value: for(int i=0;i<20;i++) { if (i > 14 && codecAcceptsHugeBinaryValues("field") == false) { break; } Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); BytesRef bytes = new BytesRef(); bytes.bytes = new byte[1<<i]; bytes.length = 1<<i; for(int j=0;j<4;j++) { Document doc = new Document(); doc.add(new BinaryDocValuesField("field", bytes)); w.addDocument(doc); } Document doc = new Document(); doc.add(new StoredField("id", "5")); doc.add(new BinaryDocValuesField("field", new BytesRef())); w.addDocument(doc); IndexReader r = w.getReader(); w.close(); BinaryDocValues values = MultiDocValues.getBinaryValues(r, "field"); for(int j=0;j<5;j++) { assertEquals(j, values.nextDoc()); BytesRef result = values.binaryValue(); assertTrue(result.length == 0 || result.length == 1<<i); } r.close(); dir.close(); } } public void testOneSortedNumber() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 5)); writer.addDocument(doc); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(1, dv.docValueCount()); assertEquals(5, dv.nextValue()); reader.close(); directory.close(); } public void testOneSortedNumberOneMissing() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null)); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 5)); writer.addDocument(doc); writer.addDocument(new Document()); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(1, dv.docValueCount()); assertEquals(5, dv.nextValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); reader.close(); directory.close(); } public void testNumberMergeAwayAllValues() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new NumericDocValuesField("field", 5)); iwriter.addDocument(doc); iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testTwoSortedNumber() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 11)); doc.add(new SortedNumericDocValuesField("dv", -5)); writer.addDocument(doc); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(2, dv.docValueCount()); assertEquals(-5, dv.nextValue()); assertEquals(11, dv.nextValue()); reader.close(); directory.close(); } public void testTwoSortedNumberSameValue() throws IOException { Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 11)); doc.add(new SortedNumericDocValuesField("dv", 11)); writer.addDocument(doc); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(2, dv.docValueCount()); assertEquals(11, dv.nextValue()); assertEquals(11, dv.nextValue()); reader.close(); directory.close(); } public void testTwoSortedNumberOneMissing() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(null)); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 11)); doc.add(new SortedNumericDocValuesField("dv", -5)); writer.addDocument(doc); writer.addDocument(new Document()); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(2, dv.docValueCount()); assertEquals(-5, dv.nextValue()); assertEquals(11, dv.nextValue()); assertEquals(NO_MORE_DOCS, dv.nextDoc()); reader.close(); directory.close(); } public void testSortedNumberMerge() throws IOException { Directory directory = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", 11)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new SortedNumericDocValuesField("dv", -5)); writer.addDocument(doc); writer.forceMerge(1); writer.close(); // Now search the index: IndexReader reader = DirectoryReader.open(directory); assert reader.leaves().size() == 1; SortedNumericDocValues dv = reader.leaves().get(0).reader().getSortedNumericDocValues("dv"); assertEquals(0, dv.nextDoc()); assertEquals(1, dv.docValueCount()); assertEquals(11, dv.nextValue()); assertEquals(1, dv.nextDoc()); assertEquals(1, dv.docValueCount()); assertEquals(-5, dv.nextValue()); reader.close(); directory.close(); } public void testSortedNumberMergeAwayAllValues() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.NO)); iwriter.addDocument(doc); doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedNumericDocValuesField("field", 5)); iwriter.addDocument(doc); iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testSortedEnumAdvanceIndependently() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); SortedDocValuesField field = new SortedDocValuesField("field", new BytesRef("2")); doc.add(field); iwriter.addDocument(doc); field.setBytesValue(new BytesRef("1")); iwriter.addDocument(doc); field.setBytesValue(new BytesRef("3")); iwriter.addDocument(doc); iwriter.commit(); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field"); doTestSortedSetEnumAdvanceIndependently(DocValues.singleton(dv)); ireader.close(); directory.close(); } public void testSortedSetEnumAdvanceIndependently() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", new BytesRef("2")); SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", new BytesRef("3")); doc.add(field1); doc.add(field2); iwriter.addDocument(doc); field1.setBytesValue(new BytesRef("1")); iwriter.addDocument(doc); field2.setBytesValue(new BytesRef("2")); iwriter.addDocument(doc); iwriter.commit(); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); doTestSortedSetEnumAdvanceIndependently(dv); ireader.close(); directory.close(); } protected void doTestSortedSetEnumAdvanceIndependently(SortedSetDocValues dv) throws IOException { if (dv.getValueCount() < 2) { return; } List<BytesRef> terms = new ArrayList<>(); TermsEnum te = dv.termsEnum(); terms.add(BytesRef.deepCopyOf(te.next())); terms.add(BytesRef.deepCopyOf(te.next())); // Make sure that calls to next() does not modify the term of the other enum TermsEnum enum1 = dv.termsEnum(); TermsEnum enum2 = dv.termsEnum(); BytesRefBuilder term1 = new BytesRefBuilder(); BytesRefBuilder term2 = new BytesRefBuilder(); term1.copyBytes(enum1.next()); term2.copyBytes(enum2.next()); term1.copyBytes(enum1.next()); assertEquals(term1.get(), enum1.term()); assertEquals(term2.get(), enum2.term()); // Same for seekCeil enum1 = dv.termsEnum(); enum2 = dv.termsEnum(); term1 = new BytesRefBuilder(); term2 = new BytesRefBuilder(); term2.copyBytes(enum2.next()); BytesRefBuilder seekTerm = new BytesRefBuilder(); seekTerm.append(terms.get(0)); seekTerm.append((byte) 0); enum1.seekCeil(seekTerm.get()); term1.copyBytes(enum1.term()); assertEquals(term1.get(), enum1.term()); assertEquals(term2.get(), enum2.term()); // Same for seekCeil on an exact value enum1 = dv.termsEnum(); enum2 = dv.termsEnum(); term1 = new BytesRefBuilder(); term2 = new BytesRefBuilder(); term2.copyBytes(enum2.next()); enum1.seekCeil(terms.get(1)); term1.copyBytes(enum1.term()); assertEquals(term1.get(), enum1.term()); assertEquals(term2.get(), enum2.term()); // Same for seekExact enum1 = dv.termsEnum(); enum2 = dv.termsEnum(); term1 = new BytesRefBuilder(); term2 = new BytesRefBuilder(); term2.copyBytes(enum2.next()); final boolean found = enum1.seekExact(terms.get(1)); assertTrue(found); term1.copyBytes(enum1.term()); // Same for seek by ord enum1 = dv.termsEnum(); enum2 = dv.termsEnum(); term1 = new BytesRefBuilder(); term2 = new BytesRefBuilder(); term2.copyBytes(enum2.next()); enum1.seekExact(1); term1.copyBytes(enum1.term()); assertEquals(term1.get(), enum1.term()); assertEquals(term2.get(), enum2.term()); } // same as testSortedMergeAwayAllValues but on more than 1024 docs to have sparse encoding on public void testSortedMergeAwayAllValuesLargeSegment() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); final int numEmptyDocs = atLeast(1024); for (int i = 0; i < numEmptyDocs; ++i) { iwriter.addDocument(new Document()); } iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } // same as testSortedSetMergeAwayAllValues but on more than 1024 docs to have sparse encoding on public void testSortedSetMergeAwayAllValuesLargeSegment() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedSetDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); final int numEmptyDocs = atLeast(1024); for (int i = 0; i < numEmptyDocs; ++i) { iwriter.addDocument(new Document()); } iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } // same as testNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on public void testNumericMergeAwayAllValuesLargeSegment() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new NumericDocValuesField("field", 42L)); iwriter.addDocument(doc); final int numEmptyDocs = atLeast(1024); for (int i = 0; i < numEmptyDocs; ++i) { iwriter.addDocument(new Document()); } iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); NumericDocValues dv = getOnlyLeafReader(ireader).getNumericDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } // same as testSortedNumericMergeAwayAllValues but on more than 1024 docs to have sparse encoding on public void testSortedNumericMergeAwayAllValuesLargeSegment() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new SortedNumericDocValuesField("field", 42L)); iwriter.addDocument(doc); final int numEmptyDocs = atLeast(1024); for (int i = 0; i < numEmptyDocs; ++i) { iwriter.addDocument(new Document()); } iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); SortedNumericDocValues dv = getOnlyLeafReader(ireader).getSortedNumericDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } // same as testBinaryMergeAwayAllValues but on more than 1024 docs to have sparse encoding on public void testBinaryMergeAwayAllValuesLargeSegment() throws IOException { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); iwconfig.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.NO)); doc.add(new BinaryDocValuesField("field", new BytesRef("hello"))); iwriter.addDocument(doc); final int numEmptyDocs = atLeast(1024); for (int i = 0; i < numEmptyDocs; ++i) { iwriter.addDocument(new Document()); } iwriter.commit(); iwriter.deleteDocuments(new Term("id", "1")); iwriter.forceMerge(1); DirectoryReader ireader = iwriter.getReader(); iwriter.close(); BinaryDocValues dv = getOnlyLeafReader(ireader).getBinaryDocValues("field"); assertEquals(NO_MORE_DOCS, dv.nextDoc()); ireader.close(); directory.close(); } public void testRandomAdvanceNumeric() throws IOException { final long longRange; if (random().nextBoolean()) { longRange = TestUtil.nextInt(random(), 1, 1024); } else { longRange = TestUtil.nextLong(random(), 1, Long.MAX_VALUE); } doTestRandomAdvance(new FieldCreator() { @Override public Field next() { return new NumericDocValuesField("field", TestUtil.nextLong(random(), 0, longRange)); } @Override public DocIdSetIterator iterator(IndexReader r) throws IOException { return MultiDocValues.getNumericValues(r, "field"); } }); } public void testRandomAdvanceBinary() throws IOException { doTestRandomAdvance(new FieldCreator() { @Override public Field next() { byte[] bytes = new byte[random().nextInt(10)]; random().nextBytes(bytes); return new BinaryDocValuesField("field", new BytesRef(bytes)); } @Override public DocIdSetIterator iterator(IndexReader r) throws IOException { return MultiDocValues.getBinaryValues(r, "field"); } }); } private interface FieldCreator { public Field next(); public DocIdSetIterator iterator(IndexReader r) throws IOException; } private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(analyzer); conf.setMergePolicy(newLogMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf); int numChunks = atLeast(10); int id = 0; Set<Integer> missingSet = new HashSet<>(); for(int i=0;i<numChunks;i++) { // change sparseness for each chunk double sparseChance = random().nextDouble(); int docCount = atLeast(1000); for(int j=0;j<docCount;j++) { Document doc = new Document(); doc.add(new StoredField("id", id)); if (random().nextDouble() > sparseChance) { doc.add(fieldCreator.next()); } else { missingSet.add(id); } id++; w.addDocument(doc); } } if (random().nextBoolean()) { w.forceMerge(1); } // Now search the index: IndexReader r = w.getReader(); BitSet missing = new FixedBitSet(r.maxDoc()); for(int docID=0;docID<r.maxDoc();docID++) { Document doc = r.document(docID); if (missingSet.contains(doc.getField("id").numericValue())) { missing.set(docID); } } for(int iter=0;iter<100;iter++) { DocIdSetIterator values = fieldCreator.iterator(r); assertEquals(-1, values.docID()); while (true) { int docID; if (random().nextBoolean()) { docID = values.nextDoc(); } else { int range; if (random().nextInt(10) == 7) { // big jump range = r.maxDoc()-values.docID(); } else { // small jump range = 25; } int inc = TestUtil.nextInt(random(), 1, range); docID = values.advance(values.docID() + inc); } if (docID == NO_MORE_DOCS) { break; } assertFalse(missing.get(docID)); } } IOUtils.close(r, w, directory); } protected boolean codecAcceptsHugeBinaryValues(String field) { return true; } }