package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.Closeable; import java.io.IOException; import java.util.Iterator; import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.ByteDocValuesField; import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.IntDocValuesField; import org.apache.lucene.document.LongDocValuesField; import org.apache.lucene.document.PackedLongDocValuesField; import org.apache.lucene.document.ShortDocValuesField; import org.apache.lucene.document.SortedBytesDocValuesField; import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; import org.apache.lucene.util._TestUtil; /** Silly class that randomizes the indexing experience. EG * it may swap in a different merge policy/scheduler; may * commit periodically; may or may not forceMerge in the end, * may flush by doc count instead of RAM, etc. */ public class RandomIndexWriter implements Closeable { public IndexWriter w; private final Random r; int docCount; int flushAt; private double flushAtFactor = 1.0; private boolean getReaderCalled; private final int fixedBytesLength; private final long docValuesFieldPrefix; private volatile boolean doDocValues; private final Codec codec; // sugar // Randomly calls Thread.yield so we mixup thread scheduling private static final class MockIndexWriter extends IndexWriter { private final Random r; public MockIndexWriter(Random r, Directory dir, IndexWriterConfig conf) throws IOException { super(dir, conf); // TODO: this should be solved in a different way; Random should not be shared (!). this.r = new Random(r.nextLong()); } @Override boolean testPoint(String name) { if (r.nextInt(4) == 2) Thread.yield(); return true; } } /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT and MockAnalyzer */ public RandomIndexWriter(Random r, Directory dir) throws IOException { this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, new MockAnalyzer(r))); } /** create a RandomIndexWriter with a random config: Uses TEST_VERSION_CURRENT */ public RandomIndexWriter(Random r, Directory dir, Analyzer a) throws IOException { this(r, dir, LuceneTestCase.newIndexWriterConfig(r, LuceneTestCase.TEST_VERSION_CURRENT, a)); } /** create a RandomIndexWriter with a random config */ public RandomIndexWriter(Random r, Directory dir, Version v, Analyzer a) throws IOException { this(r, dir, LuceneTestCase.newIndexWriterConfig(r, v, a)); } /** create a RandomIndexWriter with the provided config */ public RandomIndexWriter(Random r, Directory dir, IndexWriterConfig c) throws IOException { // TODO: this should be solved in a different way; Random should not be shared (!). this.r = new Random(r.nextLong()); w = new MockIndexWriter(r, dir, c); flushAt = _TestUtil.nextInt(r, 10, 1000); codec = w.getConfig().getCodec(); if (LuceneTestCase.VERBOSE) { System.out.println("RIW dir=" + dir + " config=" + w.getConfig()); System.out.println("codec default=" + codec.getName()); } /* TODO: find some way to make this random... * This must be fixed across all fixed bytes * fields in one index. so if you open another writer * this might change if I use r.nextInt(x) * maybe we can peek at the existing files here? */ fixedBytesLength = 17; // NOTE: this means up to 13 * 5 unique fields (we have // 13 different DV types): docValuesFieldPrefix = r.nextInt(5); switchDoDocValues(); // Make sure we sometimes test indices that don't get // any forced merges: doRandomForceMerge = r.nextBoolean(); } private void switchDoDocValues() { // randomly enable / disable docValues doDocValues = LuceneTestCase.rarely(r); if (LuceneTestCase.VERBOSE) { if (doDocValues) { System.out.println("NOTE: RIW: turning on random DocValues fields"); } } } /** * Adds a Document. * @see IndexWriter#addDocument(Iterable) */ public <T extends IndexableField> void addDocument(final Iterable<T> doc) throws IOException { addDocument(doc, w.getAnalyzer()); } public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) throws IOException { if (doDocValues && doc instanceof Document) { randomPerDocFieldValues((Document) doc); } if (r.nextInt(5) == 3) { // TODO: maybe, we should simply buffer up added docs // (but we need to clone them), and only when // getReader, commit, etc. are called, we do an // addDocuments? Would be better testing. w.addDocuments(new Iterable<Iterable<T>>() { @Override public Iterator<Iterable<T>> iterator() { return new Iterator<Iterable<T>>() { boolean done; @Override public boolean hasNext() { return !done; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Iterable<T> next() { if (done) { throw new IllegalStateException(); } done = true; return doc; } }; } }, a); } else { w.addDocument(doc, a); } maybeCommit(); } private BytesRef getFixedRandomBytes() { final String randomUnicodeString = _TestUtil.randomFixedByteLengthUnicodeString(r, fixedBytesLength); BytesRef fixedRef = new BytesRef(randomUnicodeString); if (fixedRef.length > fixedBytesLength) { fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength); } else { fixedRef.grow(fixedBytesLength); fixedRef.length = fixedBytesLength; } return fixedRef; } private void randomPerDocFieldValues(Document doc) { DocValues.Type[] values = DocValues.Type.values(); DocValues.Type type = values[r.nextInt(values.length)]; String name = "random_" + type.name() + "" + docValuesFieldPrefix; if ("Lucene3x".equals(codec.getName()) || doc.getField(name) != null) { return; } final Field f; switch (type) { case BYTES_FIXED_DEREF: f = new DerefBytesDocValuesField(name, getFixedRandomBytes(), true); break; case BYTES_VAR_DEREF: f = new DerefBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); break; case BYTES_FIXED_STRAIGHT: f = new StraightBytesDocValuesField(name, getFixedRandomBytes(), true); break; case BYTES_VAR_STRAIGHT: f = new StraightBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); break; case BYTES_FIXED_SORTED: f = new SortedBytesDocValuesField(name, getFixedRandomBytes(), true); break; case BYTES_VAR_SORTED: f = new SortedBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); break; case FLOAT_32: f = new FloatDocValuesField(name, r.nextFloat()); break; case FLOAT_64: f = new DoubleDocValuesField(name, r.nextDouble()); break; case VAR_INTS: f = new PackedLongDocValuesField(name, r.nextLong()); break; case FIXED_INTS_16: // TODO: we should test negatives too? f = new ShortDocValuesField(name, (short) r.nextInt(Short.MAX_VALUE)); break; case FIXED_INTS_32: f = new IntDocValuesField(name, r.nextInt()); break; case FIXED_INTS_64: f = new LongDocValuesField(name, r.nextLong()); break; case FIXED_INTS_8: // TODO: we should test negatives too? f = new ByteDocValuesField(name, (byte) r.nextInt(128)); break; default: throw new IllegalArgumentException("no such type: " + type); } doc.add(f); } private void maybeCommit() throws IOException { if (docCount++ == flushAt) { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount); } w.commit(); flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000)); if (flushAtFactor < 2e6) { // gradually but exponentially increase time b/w flushes flushAtFactor *= 1.05; } switchDoDocValues(); } } public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException { w.addDocuments(docs); maybeCommit(); } public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException { w.updateDocuments(delTerm, docs); maybeCommit(); } /** * Updates a document. * @see IndexWriter#updateDocument(Term, Iterable) */ public <T extends IndexableField> void updateDocument(Term t, final Iterable<T> doc) throws IOException { if (doDocValues) { randomPerDocFieldValues((Document) doc); } if (r.nextInt(5) == 3) { w.updateDocuments(t, new Iterable<Iterable<T>>() { @Override public Iterator<Iterable<T>> iterator() { return new Iterator<Iterable<T>>() { boolean done; @Override public boolean hasNext() { return !done; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public Iterable<T> next() { if (done) { throw new IllegalStateException(); } done = true; return doc; } }; } }); } else { w.updateDocument(t, doc); } maybeCommit(); } public void addIndexes(Directory... dirs) throws IOException { w.addIndexes(dirs); } public void addIndexes(IndexReader... readers) throws IOException { w.addIndexes(readers); } public void deleteDocuments(Term term) throws IOException { w.deleteDocuments(term); } public void deleteDocuments(Query q) throws IOException { w.deleteDocuments(q); } public void commit() throws IOException { w.commit(); switchDoDocValues(); } public int numDocs() { return w.numDocs(); } public int maxDoc() { return w.maxDoc(); } public void deleteAll() throws IOException { w.deleteAll(); } public DirectoryReader getReader() throws IOException { return getReader(true); } private boolean doRandomForceMerge = true; private boolean doRandomForceMergeAssert = true; public void forceMergeDeletes(boolean doWait) throws IOException { w.forceMergeDeletes(doWait); } public void forceMergeDeletes() throws IOException { w.forceMergeDeletes(); } public void setDoRandomForceMerge(boolean v) { doRandomForceMerge = v; } public void setDoRandomForceMergeAssert(boolean v) { doRandomForceMergeAssert = v; } private void doRandomForceMerge() throws IOException { if (doRandomForceMerge) { final int segCount = w.getSegmentCount(); if (r.nextBoolean() || segCount == 0) { // full forceMerge if (LuceneTestCase.VERBOSE) { System.out.println("RIW: doRandomForceMerge(1)"); } w.forceMerge(1); } else { // partial forceMerge final int limit = _TestUtil.nextInt(r, 1, segCount); if (LuceneTestCase.VERBOSE) { System.out.println("RIW: doRandomForceMerge(" + limit + ")"); } w.forceMerge(limit); assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount(); } } switchDoDocValues(); } public DirectoryReader getReader(boolean applyDeletions) throws IOException { getReaderCalled = true; if (r.nextInt(20) == 2) { doRandomForceMerge(); } // If we are writing with PreFlexRW, force a full // IndexReader.open so terms are sorted in codepoint // order during searching: if (!applyDeletions || !codec.getName().equals("Lucene3x") && r.nextBoolean()) { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: use NRT reader"); } if (r.nextInt(5) == 1) { w.commit(); } return w.getReader(applyDeletions); } else { if (LuceneTestCase.VERBOSE) { System.out.println("RIW.getReader: open new reader"); } w.commit(); switchDoDocValues(); if (r.nextBoolean()) { return DirectoryReader.open(w.getDirectory(), _TestUtil.nextInt(r, 1, 10)); } else { return w.getReader(applyDeletions); } } } /** * Close this writer. * @see IndexWriter#close() */ public void close() throws IOException { // if someone isn't using getReader() API, we want to be sure to // forceMerge since presumably they might open a reader on the dir. if (getReaderCalled == false && r.nextInt(8) == 2) { doRandomForceMerge(); } w.close(); } /** * Forces a forceMerge. * <p> * NOTE: this should be avoided in tests unless absolutely necessary, * as it will result in less test coverage. * @see IndexWriter#forceMerge(int) */ public void forceMerge(int maxSegmentCount) throws IOException { w.forceMerge(maxSegmentCount); } }