/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.util.HashSet; import java.util.Random; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; import com.carrotsearch.randomizedtesting.generators.RandomPicks; public class TestMixedDocValuesUpdates extends LuceneTestCase { public void testManyReopensAndFields() throws Exception { Directory dir = newDirectory(); final Random random = random(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random)); LogMergePolicy lmp = newLogMergePolicy(); lmp.setMergeFactor(3); // merge often conf.setMergePolicy(lmp); IndexWriter writer = new IndexWriter(dir, conf); final boolean isNRT = random.nextBoolean(); DirectoryReader reader; if (isNRT) { reader = DirectoryReader.open(writer); } else { writer.commit(); reader = DirectoryReader.open(dir); } final int numFields = random.nextInt(4) + 3; // 3-7 final int numNDVFields = random.nextInt(numFields/2) + 1; // 1-3 final long[] fieldValues = new long[numFields]; for (int i = 0; i < fieldValues.length; i++) { fieldValues[i] = 1; } int numRounds = atLeast(15); int docID = 0; for (int i = 0; i < numRounds; i++) { int numDocs = atLeast(5); // System.out.println("TEST: round=" + i + ", numDocs=" + numDocs); for (int j = 0; j < numDocs; j++) { Document doc = new Document(); doc.add(new StringField("id", "doc-" + docID, Store.NO)); doc.add(new StringField("key", "all", Store.NO)); // update key // add all fields with their current value for (int f = 0; f < fieldValues.length; f++) { if (f < numNDVFields) { doc.add(new NumericDocValuesField("f" + f, fieldValues[f])); } else { doc.add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.toBytes(fieldValues[f]))); } } writer.addDocument(doc); ++docID; } int fieldIdx = random.nextInt(fieldValues.length); String updateField = "f" + fieldIdx; if (fieldIdx < numNDVFields) { writer.updateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); } else { writer.updateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.toBytes(++fieldValues[fieldIdx])); } //System.out.println("TEST: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); if (random.nextDouble() < 0.2) { int deleteDoc = random.nextInt(docID); // might also delete an already deleted document, ok! writer.deleteDocuments(new Term("id", "doc-" + deleteDoc)); // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); } // verify reader if (!isNRT) { writer.commit(); } // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); DirectoryReader newReader = DirectoryReader.openIfChanged(reader); assertNotNull(newReader); reader.close(); reader = newReader; // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); assertTrue(reader.numDocs() > 0); // we delete at most one document per round for (LeafReaderContext context : reader.leaves()) { LeafReader r = context.reader(); // System.out.println(((SegmentReader) r).getSegmentName()); Bits liveDocs = r.getLiveDocs(); for (int field = 0; field < fieldValues.length; field++) { String f = "f" + field; BinaryDocValues bdv = r.getBinaryDocValues(f); NumericDocValues ndv = r.getNumericDocValues(f); if (field < numNDVFields) { assertNotNull(ndv); assertNull(bdv); } else { assertNull(ndv); assertNotNull(bdv); } int maxDoc = r.maxDoc(); for (int doc = 0; doc < maxDoc; doc++) { if (liveDocs == null || liveDocs.get(doc)) { // System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); if (field < numNDVFields) { assertEquals(doc, ndv.advance(doc)); assertEquals("invalid numeric value for doc=" + doc + ", field=" + f + ", reader=" + r, fieldValues[field], ndv.longValue()); } else { assertEquals(doc, bdv.advance(doc)); assertEquals("invalid binary value for doc=" + doc + ", field=" + f + ", reader=" + r, fieldValues[field], TestBinaryDocValuesUpdates.getValue(bdv)); } } } } } // System.out.println(); } writer.close(); IOUtils.close(reader, dir); } public void testStressMultiThreading() throws Exception { final Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); final IndexWriter writer = new IndexWriter(dir, conf); // create index final int numFields = TestUtil.nextInt(random(), 2, 4); final int numThreads = TestUtil.nextInt(random(), 3, 6); final int numDocs = atLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "doc" + i, Store.NO)); double group = random().nextDouble(); String g; if (group < 0.1) g = "g0"; else if (group < 0.5) g = "g1"; else if (group < 0.8) g = "g2"; else g = "g3"; doc.add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numFields; j++) { long value = random().nextInt(); doc.add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.toBytes(value))); doc.add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.addDocument(doc); } final CountDownLatch done = new CountDownLatch(numThreads); final AtomicInteger numUpdates = new AtomicInteger(atLeast(100)); // same thread updates a field as well as reopens Thread[] threads = new Thread[numThreads]; for (int i = 0; i < threads.length; i++) { threads[i] = new Thread("UpdateThread-" + i) { @Override public void run() { DirectoryReader reader = null; boolean success = false; try { Random random = random(); while (numUpdates.getAndDecrement() > 0) { double group = random.nextDouble(); Term t; if (group < 0.1) t = new Term("updKey", "g0"); else if (group < 0.5) t = new Term("updKey", "g1"); else if (group < 0.8) t = new Term("updKey", "g2"); else t = new Term("updKey", "g3"); // System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t); int field = random().nextInt(numFields); final String f = "f" + field; final String cf = "cf" + field; long updValue = random.nextInt(); // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue); writer.updateDocValues(t, new BinaryDocValuesField(f, TestBinaryDocValuesUpdates.toBytes(updValue)), new NumericDocValuesField(cf, updValue*2)); if (random.nextDouble() < 0.2) { // delete a random document int doc = random.nextInt(numDocs); // System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); writer.deleteDocuments(new Term("id", "doc" + doc)); } if (random.nextDouble() < 0.05) { // commit every 20 updates on average // System.out.println("[" + Thread.currentThread().getName() + "] commit"); writer.commit(); } if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates if (reader == null) { // System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); reader = DirectoryReader.open(writer); } else { // System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer); if (r2 != null) { reader.close(); reader = r2; } } } } // System.out.println("[" + Thread.currentThread().getName() + "] DONE"); success = true; } catch (IOException e) { throw new RuntimeException(e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { if (success) { // suppress this exception only if there was another exception throw new RuntimeException(e); } } } done.countDown(); } } }; } for (Thread t : threads) t.start(); done.await(); writer.close(); DirectoryReader reader = DirectoryReader.open(dir); BytesRef scratch = new BytesRef(); for (LeafReaderContext context : reader.leaves()) { LeafReader r = context.reader(); for (int i = 0; i < numFields; i++) { BinaryDocValues bdv = r.getBinaryDocValues("f" + i); NumericDocValues control = r.getNumericDocValues("cf" + i); Bits liveDocs = r.getLiveDocs(); for (int j = 0; j < r.maxDoc(); j++) { if (liveDocs == null || liveDocs.get(j)) { assertEquals(j, control.advance(j)); long ctrlValue = control.longValue(); assertEquals(j, bdv.advance(j)); long bdvValue = TestBinaryDocValuesUpdates.getValue(bdv) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.document(j).get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } assertEquals(ctrlValue, bdvValue); } } } } reader.close(); dir.close(); } public void testUpdateDifferentDocsInDifferentGens() throws Exception { // update same document multiple times across generations Directory dir = newDirectory(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random())); conf.setMaxBufferedDocs(4); IndexWriter writer = new IndexWriter(dir, conf); final int numDocs = atLeast(10); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(new StringField("id", "doc" + i, Store.NO)); long value = random().nextInt(); doc.add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value))); doc.add(new NumericDocValuesField("cf", value * 2)); writer.addDocument(doc); } int numGens = atLeast(5); for (int i = 0; i < numGens; i++) { int doc = random().nextInt(numDocs); Term t = new Term("id", "doc" + doc); long value = random().nextLong(); writer.updateDocValues(t, new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf", value*2)); DirectoryReader reader = DirectoryReader.open(writer); for (LeafReaderContext context : reader.leaves()) { LeafReader r = context.reader(); BinaryDocValues fbdv = r.getBinaryDocValues("f"); NumericDocValues cfndv = r.getNumericDocValues("cf"); for (int j = 0; j < r.maxDoc(); j++) { assertEquals(j, cfndv.nextDoc()); assertEquals(j, fbdv.nextDoc()); assertEquals(cfndv.longValue(), TestBinaryDocValuesUpdates.getValue(fbdv) * 2); } } reader.close(); } writer.close(); dir.close(); } @Nightly public void testTonsOfUpdates() throws Exception { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = newDirectory(); final Random random = random(); IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random)); conf.setRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) final int numDocs = atLeast(20000); final int numBinaryFields = atLeast(5); final int numTerms = TestUtil.nextInt(random, 10, 100); // terms should affect many docs Set<String> updateTerms = new HashSet<>(); while (updateTerms.size() < numTerms) { updateTerms.add(TestUtil.randomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.nextInt(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.add(new StringField("upd", RandomPicks.randomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.nextInt(); doc.add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.toBytes(val))); doc.add(new NumericDocValuesField("cf" + j, val * 2)); } writer.addDocument(doc); } writer.commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.getConfig().setRAMBufferSizeMB(2048.0 / 1024 / 1024); final int numUpdates = atLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.nextInt(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.randomFrom(random, updateTerms)); long value = random.nextInt(); writer.updateDocValues(updateTerm, new BinaryDocValuesField("f"+field, TestBinaryDocValuesUpdates.toBytes(value)), new NumericDocValuesField("cf"+field, value*2)); } writer.close(); DirectoryReader reader = DirectoryReader.open(dir); for (LeafReaderContext context : reader.leaves()) { for (int i = 0; i < numBinaryFields; i++) { LeafReader r = context.reader(); BinaryDocValues f = r.getBinaryDocValues("f" + i); NumericDocValues cf = r.getNumericDocValues("cf" + i); for (int j = 0; j < r.maxDoc(); j++) { assertEquals(j, cf.nextDoc()); assertEquals(j, f.nextDoc()); assertEquals("reader=" + r + ", field=f" + i + ", doc=" + j, cf.longValue(), TestBinaryDocValuesUpdates.getValue(f) * 2); } } } reader.close(); dir.close(); } }