package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.ByteDocValuesField; import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.IntDocValuesField; import org.apache.lucene.document.LongDocValuesField; import org.apache.lucene.document.PackedLongDocValuesField; import org.apache.lucene.document.ShortDocValuesField; import org.apache.lucene.document.SortedBytesDocValuesField; import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /** * * Tests DocValues integration into IndexWriter & Codecs * */ @SuppressCodecs("Lucene3x") public class TestDocValuesIndexing extends LuceneTestCase { /* * - add test for multi segment case with deletes * - add multithreaded tests / integrate into stress indexing? */ /* * Simple test case to show how to use the API */ public void testDocValuesSimple() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.add(new PackedLongDocValuesField("docId", i)); doc.add(new TextField("docId", "" + i, Field.Store.NO)); writer.addDocument(doc); } writer.commit(); writer.forceMerge(1, true); writer.close(true); DirectoryReader reader = DirectoryReader.open(dir, 1); assertEquals(1, reader.leaves().size()); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery query = new BooleanQuery(); query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD); TopDocs search = searcher.search(query, 10); assertEquals(5, search.totalHits); ScoreDoc[] scoreDocs = search.scoreDocs; DocValues docValues = MultiDocValues.getDocValues(reader, "docId"); Source source = docValues.getSource(); for (int i = 0; i < scoreDocs.length; i++) { assertEquals(i, scoreDocs[i].doc); assertEquals(i, source.getInt(scoreDocs[i].doc)); } reader.close(); dir.close(); } public void testIndexBytesNoDeletes() throws IOException { runTestIndexBytes(writerConfig(random().nextBoolean()), false); } public void testIndexBytesDeletes() throws IOException { runTestIndexBytes(writerConfig(random().nextBoolean()), true); } public void testIndexNumericsNoDeletes() throws IOException { runTestNumerics(writerConfig(random().nextBoolean()), false); } public void testIndexNumericsDeletes() throws IOException { runTestNumerics(writerConfig(random().nextBoolean()), true); } public void testAddIndexes() throws IOException { Directory d1 = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d1); Document doc = new Document(); doc.add(newStringField("id", "1", Field.Store.YES)); doc.add(new PackedLongDocValuesField("dv", 1)); w.addDocument(doc); IndexReader r1 = w.getReader(); w.close(); Directory d2 = newDirectory(); w = new RandomIndexWriter(random(), d2); doc = new Document(); doc.add(newStringField("id", "2", Field.Store.YES)); doc.add(new PackedLongDocValuesField("dv", 2)); w.addDocument(doc); IndexReader r2 = w.getReader(); w.close(); Directory d3 = newDirectory(); w = new RandomIndexWriter(random(), d3); w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2)); r1.close(); d1.close(); r2.close(); d2.close(); w.forceMerge(1); DirectoryReader r3 = w.getReader(); w.close(); AtomicReader sr = getOnlySegmentReader(r3); assertEquals(2, sr.numDocs()); DocValues docValues = sr.docValues("dv"); assertNotNull(docValues); r3.close(); d3.close(); } public void testAddIndexesRandom() throws IOException { int valuesPerIndex = 10; List<Type> values = Arrays.asList(Type.values()); Collections.shuffle(values, random()); Type first = values.get(0); Type second = values.get(1); // index first index Directory d_1 = newDirectory(); IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random().nextBoolean())); indexValues(w_1, valuesPerIndex, first, values, false, 7); w_1.commit(); assertEquals(valuesPerIndex, w_1.maxDoc()); _TestUtil.checkIndex(d_1); // index second index Directory d_2 = newDirectory(); IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random().nextBoolean())); indexValues(w_2, valuesPerIndex, second, values, false, 7); w_2.commit(); assertEquals(valuesPerIndex, w_2.maxDoc()); _TestUtil.checkIndex(d_2); Directory target = newDirectory(); IndexWriter w = new IndexWriter(target, writerConfig(random().nextBoolean())); DirectoryReader r_1 = DirectoryReader.open(w_1, true); DirectoryReader r_2 = DirectoryReader.open(w_2, true); if (random().nextBoolean()) { w.addIndexes(d_1, d_2); } else { w.addIndexes(r_1, r_2); } w.forceMerge(1, true); w.commit(); _TestUtil.checkIndex(target); assertEquals(valuesPerIndex * 2, w.maxDoc()); // check values DirectoryReader merged = DirectoryReader.open(w, true); Source source_1 = getSource(getDocValues(r_1, first.name())); Source source_2 = getSource(getDocValues(r_2, second.name())); Source source_1_merged = getSource(getDocValues(merged, first.name())); Source source_2_merged = getSource(getDocValues(merged, second .name())); for (int i = 0; i < r_1.maxDoc(); i++) { switch (first) { case BYTES_FIXED_DEREF: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_FIXED_SORTED: case BYTES_VAR_SORTED: assertEquals(source_1.getBytes(i, new BytesRef()), source_1_merged.getBytes(i, new BytesRef())); break; case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case FIXED_INTS_8: case VAR_INTS: assertEquals(source_1.getInt(i), source_1_merged.getInt(i)); break; case FLOAT_32: case FLOAT_64: assertEquals(source_1.getFloat(i), source_1_merged.getFloat(i), 0.0d); break; default: fail("unkonwn " + first); } } for (int i = r_1.maxDoc(); i < merged.maxDoc(); i++) { switch (second) { case BYTES_FIXED_DEREF: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_FIXED_SORTED: case BYTES_VAR_SORTED: assertEquals(source_2.getBytes(i - r_1.maxDoc(), new BytesRef()), source_2_merged.getBytes(i, new BytesRef())); break; case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case FIXED_INTS_8: case VAR_INTS: assertEquals(source_2.getInt(i - r_1.maxDoc()), source_2_merged.getInt(i)); break; case FLOAT_32: case FLOAT_64: assertEquals(source_2.getFloat(i - r_1.maxDoc()), source_2_merged.getFloat(i), 0.0d); break; default: fail("unkonwn " + first); } } // close resources r_1.close(); r_2.close(); merged.close(); w_1.close(true); w_2.close(true); w.close(true); d_1.close(); d_2.close(); target.close(); } private IndexWriterConfig writerConfig(boolean useCompoundFile) { final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy(random())); LogMergePolicy policy = new LogDocMergePolicy(); cfg.setMergePolicy(policy); policy.setUseCompoundFile(useCompoundFile); return cfg; } @SuppressWarnings("fallthrough") public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions) throws IOException { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 50 + atLeast(10); final List<Type> numVariantList = new ArrayList<Type>(NUMERICS); // run in random order to test if fill works correctly during merges Collections.shuffle(numVariantList, random()); for (Type val : numVariantList) { FixedBitSet deleted = indexValues(w, numValues, val, numVariantList, withDeletions, 7); List<Closeable> closeables = new ArrayList<Closeable>(); DirectoryReader r = DirectoryReader.open(w, true); final int numRemainingValues = numValues - deleted.cardinality(); final int base = r.numDocs() - numRemainingValues; // for FIXED_INTS_8 we use value mod 128 - to enable testing in // one go we simply use numValues as the mod for all other INT types int mod = numValues; switch (val) { case FIXED_INTS_8: mod = 128; case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case VAR_INTS: { DocValues intsReader = getDocValues(r, val.name()); assertNotNull(intsReader); Source ints = getSource(intsReader); for (int i = 0; i < base; i++) { long value = ints.getInt(i); assertEquals("index " + i, 0, value); } int expected = 0; for (int i = base; i < r.numDocs(); i++, expected++) { while (deleted.get(expected)) { expected++; } assertEquals(val + " mod: " + mod + " index: " + i, expected%mod, ints.getInt(i)); } } break; case FLOAT_32: case FLOAT_64: { DocValues floatReader = getDocValues(r, val.name()); assertNotNull(floatReader); Source floats = getSource(floatReader); for (int i = 0; i < base; i++) { double value = floats.getFloat(i); assertEquals(val + " failed for doc: " + i + " base: " + base, 0.0d, value, 0.0d); } int expected = 0; for (int i = base; i < r.numDocs(); i++, expected++) { while (deleted.get(expected)) { expected++; } assertEquals("index " + i, 2.0 * expected, floats.getFloat(i), 0.00001); } } break; default: fail("unexpected value " + val); } closeables.add(r); for (Closeable toClose : closeables) { toClose.close(); } } w.close(); d.close(); } public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions) throws IOException { final Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, cfg); final List<Type> byteVariantList = new ArrayList<Type>(BYTES); // run in random order to test if fill works correctly during merges Collections.shuffle(byteVariantList, random()); final int numValues = 50 + atLeast(10); for (Type byteIndexValue : byteVariantList) { List<Closeable> closeables = new ArrayList<Closeable>(); final int bytesSize = 1 + atLeast(50); FixedBitSet deleted = indexValues(w, numValues, byteIndexValue, byteVariantList, withDeletions, bytesSize); final DirectoryReader r = DirectoryReader.open(w, withDeletions); assertEquals(0, r.numDeletedDocs()); final int numRemainingValues = numValues - deleted.cardinality(); final int base = r.numDocs() - numRemainingValues; DocValues bytesReader = getDocValues(r, byteIndexValue.name()); assertNotNull("field " + byteIndexValue.name() + " returned null reader - maybe merged failed", bytesReader); Source bytes = getSource(bytesReader); byte upto = 0; // test the filled up slots for correctness for (int i = 0; i < base; i++) { BytesRef br = bytes.getBytes(i, new BytesRef()); String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs(); switch (byteIndexValue) { case BYTES_VAR_STRAIGHT: case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: // fixed straight returns bytesref with zero bytes all of fixed // length assertNotNull("expected none null - " + msg, br); if (br.length != 0) { assertEquals("expected zero bytes of length " + bytesSize + " - " + msg + br.utf8ToString(), bytesSize, br.length); for (int j = 0; j < br.length; j++) { assertEquals("Byte at index " + j + " doesn't match - " + msg, 0, br.bytes[br.offset + j]); } } break; default: assertNotNull("expected none null - " + msg, br); assertEquals(byteIndexValue + "", 0, br.length); // make sure we advance at least until base } } // test the actual doc values added in this iteration assertEquals(base + numRemainingValues, r.numDocs()); int v = 0; for (int i = base; i < r.numDocs(); i++) { String msg = " field: " + byteIndexValue.name() + " at index: " + i + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: " + bytesSize + " src: " + bytes; while (withDeletions && deleted.get(v++)) { upto += bytesSize; } BytesRef br = bytes.getBytes(i, new BytesRef()); assertTrue(msg, br.length > 0); for (int j = 0; j < br.length; j++, upto++) { if (!(br.bytes.length > br.offset + j)) br = bytes.getBytes(i, new BytesRef()); assertTrue("BytesRef index exceeded [" + msg + "] offset: " + br.offset + " length: " + br.length + " index: " + (br.offset + j), br.bytes.length > br.offset + j); assertEquals("SourceRef Byte at index " + j + " doesn't match - " + msg, upto, br.bytes[br.offset + j]); } } // clean up closeables.add(r); for (Closeable toClose : closeables) { toClose.close(); } } w.close(); d.close(); } public void testGetArrayNumerics() throws IOException { Directory d = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 50 + atLeast(10); final List<Type> numVariantList = new ArrayList<Type>(NUMERICS); Collections.shuffle(numVariantList, random()); for (Type val : numVariantList) { indexValues(w, numValues, val, numVariantList, false, 7); DirectoryReader r = DirectoryReader.open(w, true); DocValues docValues = getDocValues(r, val.name()); assertNotNull(docValues); // make sure we don't get a direct source since they don't support getArray() Source source = docValues.getSource(); switch (source.getType()) { case FIXED_INTS_8: { assertTrue(source.hasArray()); byte[] values = (byte[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals((long)values[i], source.getInt(i)); } } break; case FIXED_INTS_16: { assertTrue(source.hasArray()); short[] values = (short[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals((long)values[i], source.getInt(i)); } } break; case FIXED_INTS_32: { assertTrue(source.hasArray()); int[] values = (int[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals((long)values[i], source.getInt(i)); } } break; case FIXED_INTS_64: { assertTrue(source.hasArray()); long[] values = (long[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals(values[i], source.getInt(i)); } } break; case VAR_INTS: assertFalse(source.hasArray()); break; case FLOAT_32: { assertTrue(source.hasArray()); float[] values = (float[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals((double)values[i], source.getFloat(i), 0.0d); } } break; case FLOAT_64: { assertTrue(source.hasArray()); double[] values = (double[]) source.getArray(); for (int i = 0; i < numValues; i++) { assertEquals(values[i], source.getFloat(i), 0.0d); } } break; default: fail("unexpected value " + source.getType()); } r.close(); } w.close(); d.close(); } public void testGetArrayBytes() throws IOException { Directory d = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(d, cfg); final int numValues = 50 + atLeast(10); // only single byte fixed straight supports getArray() indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1); DirectoryReader r = DirectoryReader.open(w, true); DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name()); assertNotNull(docValues); // make sure we don't get a direct source since they don't support // getArray() Source source = docValues.getSource(); switch (source.getType()) { case BYTES_FIXED_STRAIGHT: { BytesRef ref = new BytesRef(); if (source.hasArray()) { byte[] values = (byte[]) source.getArray(); for (int i = 0; i < numValues; i++) { source.getBytes(i, ref); assertEquals(1, ref.length); assertEquals(values[i], ref.bytes[ref.offset]); } } } break; default: fail("unexpected value " + source.getType()); } r.close(); w.close(); d.close(); } private DocValues getDocValues(IndexReader reader, String field) throws IOException { return MultiDocValues.getDocValues(reader, field); } @SuppressWarnings("fallthrough") private Source getSource(DocValues values) throws IOException { // getSource uses cache internally switch(random().nextInt(5)) { case 3: return values.load(); case 2: return values.getDirectSource(); case 1: if(values.getType() == Type.BYTES_VAR_SORTED || values.getType() == Type.BYTES_FIXED_SORTED) { return values.getSource().asSortedSource(); } default: return values.getSource(); } } private static EnumSet<Type> BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF, Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF, Type.BYTES_VAR_STRAIGHT, Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED); private static EnumSet<Type> NUMERICS = EnumSet.of(Type.VAR_INTS, Type.FIXED_INTS_16, Type.FIXED_INTS_32, Type.FIXED_INTS_64, Type.FIXED_INTS_8, Type.FLOAT_32, Type.FLOAT_64); private FixedBitSet indexValues(IndexWriter w, int numValues, Type valueType, List<Type> valueVarList, boolean withDeletions, int bytesSize) throws IOException { final boolean isNumeric = NUMERICS.contains(valueType); FixedBitSet deleted = new FixedBitSet(numValues); Document doc = new Document(); final Field valField; if (isNumeric) { switch (valueType) { case VAR_INTS: valField = new PackedLongDocValuesField(valueType.name(), (long) 0); break; case FIXED_INTS_16: valField = new ShortDocValuesField(valueType.name(), (short) 0); break; case FIXED_INTS_32: valField = new IntDocValuesField(valueType.name(), 0); break; case FIXED_INTS_64: valField = new LongDocValuesField(valueType.name(), (long) 0); break; case FIXED_INTS_8: valField = new ByteDocValuesField(valueType.name(), (byte) 0); break; case FLOAT_32: valField = new FloatDocValuesField(valueType.name(), (float) 0); break; case FLOAT_64: valField = new DoubleDocValuesField(valueType.name(), (double) 0); break; default: valField = null; fail("unhandled case"); } } else { switch (valueType) { case BYTES_FIXED_STRAIGHT: valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), true); break; case BYTES_VAR_STRAIGHT: valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), false); break; case BYTES_FIXED_DEREF: valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), true); break; case BYTES_VAR_DEREF: valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), false); break; case BYTES_FIXED_SORTED: valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), true); break; case BYTES_VAR_SORTED: valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), false); break; default: valField = null; fail("unhandled case"); } } doc.add(valField); final BytesRef bytesRef = new BytesRef(); final String idBase = valueType.name() + "_"; final byte[] b = new byte[bytesSize]; if (bytesRef != null) { bytesRef.bytes = b; bytesRef.length = b.length; bytesRef.offset = 0; } byte upto = 0; for (int i = 0; i < numValues; i++) { if (isNumeric) { switch (valueType) { case VAR_INTS: valField.setLongValue((long)i); break; case FIXED_INTS_16: valField.setShortValue((short)i); break; case FIXED_INTS_32: valField.setIntValue(i); break; case FIXED_INTS_64: valField.setLongValue((long)i); break; case FIXED_INTS_8: valField.setByteValue((byte)(0xFF & (i % 128))); break; case FLOAT_32: valField.setFloatValue(2.0f * i); break; case FLOAT_64: valField.setDoubleValue(2.0d * i); break; default: fail("unexpected value " + valueType); } } else { for (int j = 0; j < b.length; j++) { b[j] = upto++; } if (bytesRef != null) { valField.setBytesValue(bytesRef); } } doc.removeFields("id"); doc.add(new StringField("id", idBase + i, Field.Store.YES)); w.addDocument(doc); if (i % 7 == 0) { if (withDeletions && random().nextBoolean()) { Type val = valueVarList.get(random().nextInt(1 + valueVarList .indexOf(valueType))); final int randInt = val == valueType ? random().nextInt(1 + i) : random() .nextInt(numValues); w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); if (val == valueType) { deleted.set(randInt); } } if (random().nextInt(10) == 0) { w.commit(); } } } w.commit(); // TODO test multi seg with deletions if (withDeletions || random().nextBoolean()) { w.forceMerge(1, true); } return deleted; } public void testMultiValuedDocValuesField() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); Field f = new PackedLongDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); doc.add(f); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); r.close(); d.close(); } public void testDifferentTypedDocValuesField() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f; doc.add(f = new PackedLongDocValuesField("field", 17)); doc.add(new FloatDocValuesField("field", 22.0f)); try { w.addDocument(doc); fail("didn't hit expected exception"); } catch (IllegalArgumentException iae) { // expected } doc = new Document(); doc.add(f); w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); w.close(); assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0)); r.close(); d.close(); } public void testSortedBytes() throws IOException { Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED }; for (Type type : types) { boolean fixed = type == Type.BYTES_FIXED_SORTED; final Directory d = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); IndexWriter w = new IndexWriter(d, cfg); int numDocs = atLeast(100); BytesRefHash hash = new BytesRefHash(); Map<String, String> docToString = new HashMap<String, String>(); int len = 1 + random().nextInt(50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newTextField("id", "" + i, Field.Store.YES)); String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len); BytesRef br = new BytesRef(string); doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); hash.add(br); docToString.put("" + i, string); w.addDocument(doc); } if (rarely()) { w.commit(); } int numDocsNoValue = atLeast(10); for (int i = 0; i < numDocsNoValue; i++) { Document doc = new Document(); doc.add(newTextField("id", "noValue", Field.Store.YES)); w.addDocument(doc); } BytesRef bytesRef = new BytesRef(fixed ? len : 0); bytesRef.offset = 0; bytesRef.length = fixed ? len : 0; hash.add(bytesRef); // add empty value for the gaps if (rarely()) { w.commit(); } for (int i = 0; i < numDocs; i++) { Document doc = new Document(); String id = "" + i + numDocs; doc.add(newTextField("id", id, Field.Store.YES)); String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len); BytesRef br = new BytesRef(string); hash.add(br); docToString.put(id, string); doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); w.addDocument(doc); } w.commit(); IndexReader reader = w.getReader(); DocValues docValues = MultiDocValues.getDocValues(reader, "field"); Source source = getSource(docValues); SortedSource asSortedSource = source.asSortedSource(); int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); BytesRef expected = new BytesRef(); BytesRef actual = new BytesRef(); assertEquals(hash.size(), asSortedSource.getValueCount()); for (int i = 0; i < hash.size(); i++) { hash.get(sort[i], expected); asSortedSource.getByOrd(i, actual); assertEquals(expected.utf8ToString(), actual.utf8ToString()); int ord = asSortedSource.getOrdByValue(expected, actual); assertEquals(i, ord); } AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader); Set<Entry<String, String>> entrySet = docToString.entrySet(); for (Entry<String, String> entry : entrySet) { int docId = docId(slowR, new Term("id", entry.getKey())); expected = new BytesRef(entry.getValue()); assertEquals(expected, asSortedSource.getBytes(docId, actual)); } reader.close(); w.close(); d.close(); } } public int docId(AtomicReader reader, Term term) throws IOException { int docFreq = reader.docFreq(term); assertEquals(1, docFreq); DocsEnum termDocsEnum = reader.termDocsEnum(term); int nextDoc = termDocsEnum.nextDoc(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocsEnum.nextDoc()); return nextDoc; } public void testWithThreads() throws Exception { Random random = random(); final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<String>(); if (VERBOSE) { System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<BytesRef>(); // TODO: deletions while (numDocs < NUM_DOCS) { final String s; if (random.nextBoolean()) { s = _TestUtil.randomSimpleString(random); } else { s = _TestUtil.randomUnicodeString(random); } final BytesRef br = new BytesRef(s); if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } final Document doc = new Document(); doc.add(new SortedBytesDocValuesField("stringdv", br)); doc.add(new PackedLongDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } writer.forceMerge(1); final DirectoryReader r = writer.getReader(); writer.close(); final AtomicReader sr = getOnlySegmentReader(r); final DocValues dv = sr.docValues("stringdv"); assertNotNull(dv); final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1); final DocValues.Source docIDToID = sr.docValues("id").getSource(); final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10); Thread[] threads = new Thread[NUM_THREADS]; for(int thread=0;thread<NUM_THREADS;thread++) { threads[thread] = new Thread() { @Override public void run() { Random random = random(); final DocValues.Source stringDVSource; final DocValues.Source stringDVDirectSource; try { stringDVSource = dv.getSource(); assertNotNull(stringDVSource); stringDVDirectSource = dv.getDirectSource(); assertNotNull(stringDVDirectSource); } catch (IOException ioe) { throw new RuntimeException(ioe); } while(System.currentTimeMillis() < END_TIME) { final DocValues.Source source; if (random.nextBoolean()) { source = stringDVSource; } else { source = stringDVDirectSource; } final DocValues.SortedSource sortedSource = source.asSortedSource(); assertNotNull(sortedSource); final BytesRef scratch = new BytesRef(); for(int iter=0;iter<100;iter++) { final int docID = random.nextInt(sr.maxDoc()); final BytesRef br = sortedSource.getBytes(docID, scratch); assertEquals(docValues.get((int) docIDToID.getInt(docID)), br); } } } }; threads[thread].start(); } for(Thread thread : threads) { thread.join(); } r.close(); dir.close(); } // LUCENE-3870 public void testLengthPrefixAcrossTwoPages() throws Exception { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); byte[] bytes = new byte[32764]; BytesRef b = new BytesRef(); b.bytes = bytes; b.length = bytes.length; doc.add(new DerefBytesDocValuesField("field", b)); w.addDocument(doc); bytes[0] = 1; w.addDocument(doc); w.forceMerge(1); DirectoryReader r = w.getReader(); Source s = getOnlySegmentReader(r).docValues("field").getSource(); BytesRef bytes1 = s.getBytes(0, new BytesRef()); assertEquals(bytes.length, bytes1.length); bytes[0] = 0; assertEquals(b, bytes1); bytes1 = s.getBytes(1, new BytesRef()); assertEquals(bytes.length, bytes1.length); bytes[0] = 1; assertEquals(b, bytes1); r.close(); w.close(); d.close(); } public void testFixedLengthNotReallyFixed() throws IOException { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); Document doc = new Document(); doc.add(new DerefBytesDocValuesField("foo", new BytesRef("bar"), true)); w.addDocument(doc); doc = new Document(); doc.add(new DerefBytesDocValuesField("foo", new BytesRef("bazz"), true)); try { w.addDocument(doc); } catch (IllegalArgumentException expected) { // expected } w.close(); d.close(); } public void testDocValuesUnstored() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); iwconfig.setMergePolicy(newLogMergePolicy()); IndexWriter writer = new IndexWriter(dir, iwconfig); for (int i = 0; i < 50; i++) { Document doc = new Document(); doc.add(new PackedLongDocValuesField("dv", i)); doc.add(new TextField("docId", "" + i, Field.Store.YES)); writer.addDocument(doc); } DirectoryReader r = writer.getReader(); SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r); FieldInfos fi = slow.getFieldInfos(); FieldInfo dvInfo = fi.fieldInfo("dv"); assertTrue(dvInfo.hasDocValues()); DocValues dv = slow.docValues("dv"); Source source = dv.getDirectSource(); for (int i = 0; i < 50; i++) { assertEquals(i, source.getInt(i)); Document d = slow.document(i); // cannot use d.get("dv") due to another bug! assertNull(d.getField("dv")); assertEquals(Integer.toString(i), d.get("docId")); } slow.close(); writer.close(); dir.close(); } }