/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; /** Tests MultiDocValues versus ordinary segment merging */ public class TestMultiDocValues extends LuceneTestCase { public void testNumerics() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { field.setLongValue(random().nextLong()); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers"); NumericDocValues single = merged.getNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { assertEquals(i, multi.nextDoc()); assertEquals(i, single.nextDoc()); assertEquals(single.longValue(), multi.longValue()); } testRandomAdvance(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers")); testRandomAdvanceExact(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } public void testBinary() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new BinaryDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); BinaryDocValues multi = MultiDocValues.getBinaryValues(ir, "bytes"); BinaryDocValues single = merged.getBinaryDocValues("bytes"); for (int i = 0; i < numDocs; i++) { assertEquals(i, multi.nextDoc()); assertEquals(i, single.nextDoc()); final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue()); final BytesRef actual = multi.binaryValue(); assertEquals(expected, actual); } testRandomAdvance(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes")); testRandomAdvanceExact(merged.getBinaryDocValues("bytes"), MultiDocValues.getBinaryValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } public void testSorted() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomUnicodeString(random())); field.setBytesValue(ref); if (random().nextInt(7) == 0) { iw.addDocument(new Document()); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); while (true) { assertEquals(single.nextDoc(), multi.nextDoc()); if (single.docID() == NO_MORE_DOCS) { break; } // check value final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue()); final BytesRef actual = multi.binaryValue(); assertEquals(expected, actual); // check ord assertEquals(single.ordValue(), multi.ordValue()); } testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes")); testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } // tries to make more dups than testSorted public void testSortedWithLotsOfDups() throws Exception { Directory dir = newDirectory(); Document doc = new Document(); Field field = new SortedDocValuesField("bytes", new BytesRef()); doc.add(field); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { BytesRef ref = new BytesRef(TestUtil.randomSimpleString(random(), 2)); field.setBytesValue(ref); iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedDocValues multi = MultiDocValues.getSortedValues(ir, "bytes"); SortedDocValues single = merged.getSortedDocValues("bytes"); assertEquals(single.getValueCount(), multi.getValueCount()); for (int i = 0; i < numDocs; i++) { assertEquals(i, multi.nextDoc()); assertEquals(i, single.nextDoc()); // check ord assertEquals(single.ordValue(), multi.ordValue()); // check ord value final BytesRef expected = BytesRef.deepCopyOf(single.binaryValue()); final BytesRef actual = multi.binaryValue(); assertEquals(expected, actual); } testRandomAdvance(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes")); testRandomAdvanceExact(merged.getSortedDocValues("bytes"), MultiDocValues.getSortedValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } public void testSortedSet() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomUnicodeString(random())))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); if (multi == null) { assertNull(single); } else { assertEquals(single.getValueCount(), multi.getValueCount()); // check values for (long i = 0; i < single.getValueCount(); i++) { final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i)); final BytesRef actual = multi.lookupOrd(i); assertEquals(expected, actual); } // check ord list while (true) { int docID = single.nextDoc(); assertEquals(docID, multi.nextDoc()); if (docID == NO_MORE_DOCS) { break; } ArrayList<Long> expectedList = new ArrayList<>(); long ord; while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.add(ord); } int upto = 0; while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { assertEquals(expectedList.get(upto).longValue(), ord); upto++; } assertEquals(expectedList.size(), upto); } } testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes")); testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } // tries to make more dups than testSortedSet public void testSortedSetWithDups() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.randomSimpleString(random(), 2)))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); if (multi == null) { assertNull(single); } else { assertEquals(single.getValueCount(), multi.getValueCount()); // check values for (long i = 0; i < single.getValueCount(); i++) { final BytesRef expected = BytesRef.deepCopyOf(single.lookupOrd(i)); final BytesRef actual = multi.lookupOrd(i); assertEquals(expected, actual); } // check ord list while (true) { int docID = single.nextDoc(); assertEquals(docID, multi.nextDoc()); if (docID == NO_MORE_DOCS) { break; } ArrayList<Long> expectedList = new ArrayList<>(); long ord; while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { expectedList.add(ord); } int upto = 0; while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { assertEquals(expectedList.get(upto).longValue(), ord); upto++; } assertEquals(expectedList.size(), upto); } } testRandomAdvance(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes")); testRandomAdvanceExact(merged.getSortedSetDocValues("bytes"), MultiDocValues.getSortedSetValues(ir, "bytes"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } public void testSortedNumeric() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(random(), null); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numValues = random().nextInt(5); for (int j = 0; j < numValues; j++) { doc.add(new SortedNumericDocValuesField("nums", TestUtil.nextLong(random(), Long.MIN_VALUE, Long.MAX_VALUE))); } iw.addDocument(doc); if (random().nextInt(17) == 0) { iw.commit(); } } DirectoryReader ir = iw.getReader(); iw.forceMerge(1); DirectoryReader ir2 = iw.getReader(); LeafReader merged = getOnlyLeafReader(ir2); iw.close(); SortedNumericDocValues multi = MultiDocValues.getSortedNumericValues(ir, "nums"); SortedNumericDocValues single = merged.getSortedNumericDocValues("nums"); if (multi == null) { assertNull(single); } else { // check values for (int i = 0; i < numDocs; i++) { if (i > single.docID()) { assertEquals(single.nextDoc(), multi.nextDoc()); } if (i == single.docID()) { assertEquals(single.docValueCount(), multi.docValueCount()); for (int j = 0; j < single.docValueCount(); j++) { assertEquals(single.nextValue(), multi.nextValue()); } } } } testRandomAdvance(merged.getSortedNumericDocValues("nums"), MultiDocValues.getSortedNumericValues(ir, "nums")); testRandomAdvanceExact(merged.getSortedNumericDocValues("nums"), MultiDocValues.getSortedNumericValues(ir, "nums"), merged.maxDoc()); ir.close(); ir2.close(); dir.close(); } private void testRandomAdvance(DocIdSetIterator iter1, DocIdSetIterator iter2) throws IOException { assertEquals(-1, iter1.docID()); assertEquals(-1, iter2.docID()); while (iter1.docID() != NO_MORE_DOCS) { if (random().nextBoolean()) { assertEquals(iter1.nextDoc(), iter2.nextDoc()); } else { int target = iter1.docID() + TestUtil.nextInt(random(), 1, 100); assertEquals(iter1.advance(target), iter2.advance(target)); } } } private void testRandomAdvanceExact(DocValuesIterator iter1, DocValuesIterator iter2, int maxDoc) throws IOException { for (int target = random().nextInt(Math.min(maxDoc, 10)); target < maxDoc; target += random().nextInt(10)) { final boolean exists1 = iter1.advanceExact(target); final boolean exists2 = iter2.advanceExact(target); assertEquals(exists1, exists2); } } }