BaseStoredFieldsFormatTestCase.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;

import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;

/**
 * Base class aiming at testing {@link StoredFieldsFormat stored fields formats}.
 * To test a new format, all you need is to register a new {@link Codec} which
 * uses it and extend this class and override {@link #getCodec()}.
 * @lucene.experimental
 */
public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormatTestCase {

  @Override
  protected void addRandomFields(Document d) {
    final int numValues = random().nextInt(3);
    for (int i = 0; i < numValues; ++i) {
      d.add(new StoredField("f", TestUtil.randomSimpleString(random(), 100)));
    }
  }

  public void testRandomStoredFields() throws IOException {
    Directory dir = newDirectory();
    Random rand = random();
    RandomIndexWriter w = new RandomIndexWriter(rand, dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(TestUtil.nextInt(rand, 5, 20)));
    //w.w.setNoCFSRatio(0.0);
    final int docCount = atLeast(200);
    final int fieldCount = TestUtil.nextInt(rand, 1, 5);

    final List<Integer> fieldIDs = new ArrayList<>();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setTokenized(false);
    Field idField = newField("id", "", customType);

    for(int i=0;i<fieldCount;i++) {
      fieldIDs.add(i);
    }

    final Map<String,Document> docs = new HashMap<>();

    if (VERBOSE) {
      System.out.println("TEST: build index docCount=" + docCount);
    }

    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    for(int i=0;i<docCount;i++) {
      Document doc = new Document();
      doc.add(idField);
      final String id = ""+i;
      idField.setStringValue(id);
      docs.put(id, doc);
      if (VERBOSE) {
        System.out.println("TEST: add doc id=" + id);
      }

      for(int field: fieldIDs) {
        final String s;
        if (rand.nextInt(4) != 3) {
          s = TestUtil.randomUnicodeString(rand, 1000);
          doc.add(newField("f"+field, s, customType2));
        } else {
          s = null;
        }
      }
      w.addDocument(doc);
      if (rand.nextInt(50) == 17) {
        // mixup binding of field name -> Number every so often
        Collections.shuffle(fieldIDs, random());
      }
      if (rand.nextInt(5) == 3 && i > 0) {
        final String delID = ""+rand.nextInt(i);
        if (VERBOSE) {
          System.out.println("TEST: delete doc id=" + delID);
        }
        w.deleteDocuments(new Term("id", delID));
        docs.remove(delID);
      }
    }

    if (VERBOSE) {
      System.out.println("TEST: " + docs.size() + " docs in index; now load fields");
    }
    if (docs.size() > 0) {
      String[] idsList = docs.keySet().toArray(new String[docs.size()]);

      for(int x=0;x<2;x++) {
        IndexReader r = w.getReader();
        IndexSearcher s = newSearcher(r);

        if (VERBOSE) {
          System.out.println("TEST: cycle x=" + x + " r=" + r);
        }

        int num = atLeast(1000);
        for(int iter=0;iter<num;iter++) {
          String testID = idsList[rand.nextInt(idsList.length)];
          if (VERBOSE) {
            System.out.println("TEST: test id=" + testID);
          }
          TopDocs hits = s.search(new TermQuery(new Term("id", testID)), 1);
          assertEquals(1, hits.totalHits);
          Document doc = r.document(hits.scoreDocs[0].doc);
          Document docExp = docs.get(testID);
          for(int i=0;i<fieldCount;i++) {
            assertEquals("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.get("f"+i),  doc.get("f"+i));
          }
        }
        r.close();
        w.forceMerge(1);
      }
    }
    w.close();
    dir.close();
  }
  
  // LUCENE-1727: make sure doc fields are stored in order
  public void testStoredFieldsOrder() throws Throwable {
    Directory d = newDirectory();
    IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();

    FieldType customType = new FieldType();
    customType.setStored(true);
    doc.add(newField("zzz", "a b c", customType));
    doc.add(newField("aaa", "a b c", customType));
    doc.add(newField("zzz", "1 2 3", customType));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    Document doc2 = r.document(0);
    Iterator<IndexableField> it = doc2.getFields().iterator();
    assertTrue(it.hasNext());
    Field f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "aaa");
    assertEquals(f.stringValue(), "a b c");

    assertTrue(it.hasNext());
    f = (Field) it.next();
    assertEquals(f.name(), "zzz");
    assertEquals(f.stringValue(), "1 2 3");
    assertFalse(it.hasNext());
    r.close();
    w.close();
    d.close();
  }
  
  // LUCENE-1219
  public void testBinaryFieldOffsetLength() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    byte[] b = new byte[50];
    for(int i=0;i<50;i++)
      b[i] = (byte) (i+77);

    Document doc = new Document();
    Field f = new StoredField("binary", b, 10, 17);
    byte[] bx = f.binaryValue().bytes;
    assertTrue(bx != null);
    assertEquals(50, bx.length);
    assertEquals(10, f.binaryValue().offset);
    assertEquals(17, f.binaryValue().length);
    doc.add(f);
    w.addDocument(doc);
    w.close();

    IndexReader ir = DirectoryReader.open(dir);
    Document doc2 = ir.document(0);
    IndexableField f2 = doc2.getField("binary");
    b = f2.binaryValue().bytes;
    assertTrue(b != null);
    assertEquals(17, b.length, 17);
    assertEquals(87, b[0]);
    ir.close();
    dir.close();
  }
  
  public void testNumericField() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(500);
    final Number[] answers = new Number[numDocs];
    final Class<?>[] typeAnswers = new Class<?>[numDocs];
    for(int id=0;id<numDocs;id++) {
      Document doc = new Document();
      final Field nf;
      final Number answer;
      final Class<?> typeAnswer;
      if (random().nextBoolean()) {
        // float/double
        if (random().nextBoolean()) {
          final float f = random().nextFloat();
          answer = Float.valueOf(f);
          nf = new StoredField("nf", f);
          typeAnswer = Float.class;
        } else {
          final double d = random().nextDouble();
          answer = Double.valueOf(d);
          nf = new StoredField("nf", d);
          typeAnswer = Double.class;
        }
      } else {
        // int/long
        if (random().nextBoolean()) {
          final int i = random().nextInt();
          answer = Integer.valueOf(i);
          nf = new StoredField("nf", i);
          typeAnswer = Integer.class;
        } else {
          final long l = random().nextLong();
          answer = Long.valueOf(l);
          nf = new StoredField("nf", l);
          typeAnswer = Long.class;
        }
      }
      doc.add(nf);
      answers[id] = answer;
      typeAnswers[id] = typeAnswer;
      doc.add(new StoredField("id", id));
      doc.add(new IntPoint("id", id));
      doc.add(new NumericDocValuesField("id", id));
      w.addDocument(doc);
    }
    final DirectoryReader r = w.getReader();
    w.close();
    
    assertEquals(numDocs, r.numDocs());

    for(LeafReaderContext ctx : r.leaves()) {
      final LeafReader sub = ctx.reader();
      final NumericDocValues ids = DocValues.getNumeric(sub, "id");
      for(int docID=0;docID<sub.numDocs();docID++) {
        final Document doc = sub.document(docID);
        final Field f = (Field) doc.getField("nf");
        assertTrue("got f=" + f, f instanceof StoredField);
        assertEquals(docID, ids.nextDoc());
        assertEquals(answers[(int) ids.longValue()], f.numericValue());
      }
    }
    r.close();
    dir.close();
  }

  public void testIndexedBit() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    FieldType onlyStored = new FieldType();
    onlyStored.setStored(true);
    doc.add(new Field("field", "value", onlyStored));
    doc.add(new StringField("field2", "value", Field.Store.YES));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();
    assertEquals(IndexOptions.NONE, r.document(0).getField("field").fieldType().indexOptions());
    assertNotNull(r.document(0).getField("field2").fieldType().indexOptions());
    r.close();
    dir.close();
  }
  
  public void testReadSkip() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
    
    FieldType ft = new FieldType();
    ft.setStored(true);
    ft.freeze();

    final String string = TestUtil.randomSimpleString(random(), 50);
    final byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
    final long l = random().nextBoolean() ? random().nextInt(42) : random().nextLong();
    final int i = random().nextBoolean() ? random().nextInt(42) : random().nextInt();
    final float f = random().nextFloat();
    final double d = random().nextDouble();

    List<Field> fields = Arrays.asList(
        new Field("bytes", bytes, ft),
        new Field("string", string, ft),
        new StoredField("long", l),
        new StoredField("int", i),
        new StoredField("float", f),
        new StoredField("double", d)
    );

    for (int k = 0; k < 100; ++k) {
      Document doc = new Document();
      for (Field fld : fields) {
        doc.add(fld);
      }
      iw.w.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final int docID = random().nextInt(100);
    for (Field fld : fields) {
      String fldName = fld.name();
      final Document sDoc = reader.document(docID, Collections.singleton(fldName));
      final IndexableField sField = sDoc.getField(fldName);
      if (Field.class.equals(fld.getClass())) {
        assertEquals(fld.binaryValue(), sField.binaryValue());
        assertEquals(fld.stringValue(), sField.stringValue());
      } else {
        assertEquals(fld.numericValue(), sField.numericValue());
      }
    }
    reader.close();
    iw.close();
    dir.close();
  }
  
  public void testEmptyDocs() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
    
    // make sure that the fact that documents might be empty is not a problem
    final Document emptyDoc = new Document();
    final int numDocs = random().nextBoolean() ? 1 : atLeast(1000);
    for (int i = 0; i < numDocs; ++i) {
      iw.addDocument(emptyDoc);
    }
    iw.commit();
    final DirectoryReader rd = DirectoryReader.open(dir);
    for (int i = 0; i < numDocs; ++i) {
      final Document doc = rd.document(i);
      assertNotNull(doc);
      assertTrue(doc.getFields().isEmpty());
    }
    rd.close();
    
    iw.close();
    dir.close();
  }
  
  public void testConcurrentReads() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);
    
    // make sure the readers are properly cloned
    final Document doc = new Document();
    final Field field = new StringField("fld", "", Store.YES);
    doc.add(field);
    final int numDocs = atLeast(1000);
    for (int i = 0; i < numDocs; ++i) {
      field.setStringValue("" + i);
      iw.addDocument(doc);
    }
    iw.commit();

    final DirectoryReader rd = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(rd);
    final int concurrentReads = atLeast(5);
    final int readsPerThread = atLeast(50);
    final List<Thread> readThreads = new ArrayList<>();
    final AtomicReference<Exception> ex = new AtomicReference<>();
    for (int i = 0; i < concurrentReads; ++i) {
      readThreads.add(new Thread() {

        int[] queries;

        {
          queries = new int[readsPerThread];
          for (int i = 0; i < queries.length; ++i) {
            queries[i] = random().nextInt(numDocs);
          }
        }

        @Override
        public void run() {
          for (int q : queries) {
            final Query query = new TermQuery(new Term("fld", "" + q));
            try {
              final TopDocs topDocs = searcher.search(query, 1);
              if (topDocs.totalHits != 1) {
                throw new IllegalStateException("Expected 1 hit, got " + topDocs.totalHits);
              }
              final Document sdoc = rd.document(topDocs.scoreDocs[0].doc);
              if (sdoc == null || sdoc.get("fld") == null) {
                throw new IllegalStateException("Could not find document " + q);
              }
              if (!Integer.toString(q).equals(sdoc.get("fld"))) {
                throw new IllegalStateException("Expected " + q + ", but got " + sdoc.get("fld"));
              }
            } catch (Exception e) {
              ex.compareAndSet(null, e);
            }
          }
        }
      });
    }
    for (Thread thread : readThreads) {
      thread.start();
    }
    for (Thread thread : readThreads) {
      thread.join();
    }
    rd.close();
    if (ex.get() != null) {
      throw ex.get();
    }
    
    iw.close();
    dir.close();
  }
  
  private byte[] randomByteArray(int length, int max) {
    final byte[] result = new byte[length];
    for (int i = 0; i < length; ++i) {
      result[i] = (byte) random().nextInt(max);
    }
    return result;
  }
  
  public void testWriteReadMerge() throws IOException {
    // get another codec, other than the default: so we are merging segments across different codecs
    final Codec otherCodec;
    if ("SimpleText".equals(Codec.getDefault().getName())) {
      otherCodec = TestUtil.getDefaultCodec();
    } else {
      otherCodec = new SimpleTextCodec();
    }
    Directory dir = newDirectory();
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    final int docCount = atLeast(200);
    final byte[][][] data = new byte [docCount][][];
    for (int i = 0; i < docCount; ++i) {
      final int fieldCount = rarely()
          ? RandomNumbers.randomIntBetween(random(), 1, 500)
          : RandomNumbers.randomIntBetween(random(), 1, 5);
      data[i] = new byte[fieldCount][];
      for (int j = 0; j < fieldCount; ++j) {
        final int length = rarely()
            ? random().nextInt(1000)
            : random().nextInt(10);
        final int max = rarely() ? 256 : 2;
        data[i][j] = randomByteArray(length, max);
      }
    }

    final FieldType type = new FieldType(StringField.TYPE_STORED);
    type.setIndexOptions(IndexOptions.NONE);
    type.freeze();
    IntPoint id = new IntPoint("id", 0);
    StoredField idStored = new StoredField("id", 0);
    for (int i = 0; i < data.length; ++i) {
      Document doc = new Document();
      doc.add(id);
      doc.add(idStored);
      id.setIntValue(i);
      idStored.setIntValue(i);
      for (int j = 0; j < data[i].length; ++j) {
        Field f = new Field("bytes" + j, data[i][j], type);
        doc.add(f);
      }
      iw.w.addDocument(doc);
      if (random().nextBoolean() && (i % (data.length / 10) == 0)) {
        iw.w.close();
        IndexWriterConfig iwConfNew = newIndexWriterConfig(new MockAnalyzer(random()));
        // test merging against a non-compressing codec
        if (iwConf.getCodec() == otherCodec) {
          iwConfNew.setCodec(Codec.getDefault());
        } else {
          iwConfNew.setCodec(otherCodec);
        }
        iwConf = iwConfNew;
        iw = new RandomIndexWriter(random(), dir, iwConf);
      }
    }

    for (int i = 0; i < 10; ++i) {
      final int min = random().nextInt(data.length);
      final int max = min + random().nextInt(20);
      iw.deleteDocuments(IntPoint.newRangeQuery("id", min, max-1));
    }

    iw.forceMerge(2); // force merges with deletions

    iw.commit();

    final DirectoryReader ir = DirectoryReader.open(dir);
    assertTrue(ir.numDocs() > 0);
    int numDocs = 0;
    for (int i = 0; i < ir.maxDoc(); ++i) {
      final Document doc = ir.document(i);
      if (doc == null) {
        continue;
      }
      ++ numDocs;
      final int docId = doc.getField("id").numericValue().intValue();
      assertEquals(data[docId].length + 1, doc.getFields().size());
      for (int j = 0; j < data[docId].length; ++j) {
        final byte[] arr = data[docId][j];
        final BytesRef arr2Ref = doc.getBinaryValue("bytes" + j);
        final byte[] arr2 = Arrays.copyOfRange(arr2Ref.bytes, arr2Ref.offset, arr2Ref.offset + arr2Ref.length);
        assertArrayEquals(arr, arr2);
      }
    }
    assertTrue(ir.numDocs() <= numDocs);
    ir.close();

    iw.deleteAll();
    iw.commit();
    iw.forceMerge(1);
    
    iw.close();
    dir.close();
  }

  /** A dummy filter reader that reverse the order of documents in stored fields. */
  private static class DummyFilterLeafReader extends FilterLeafReader {

    public DummyFilterLeafReader(LeafReader in) {
      super(in);
    }

    @Override
    public void document(int docID, StoredFieldVisitor visitor) throws IOException {
      super.document(maxDoc() - 1 - docID, visitor);
    }

    @Override
    public CacheHelper getCoreCacheHelper() {
      return null;
    }

    @Override
    public CacheHelper getReaderCacheHelper() {
      return null;
    }

  }

  private static class DummyFilterDirectoryReader extends FilterDirectoryReader {

    public DummyFilterDirectoryReader(DirectoryReader in) throws IOException {
      super(in, new SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
          return new DummyFilterLeafReader(reader);
        }
      });
    }

    @Override
    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
      return new DummyFilterDirectoryReader(in);
    }

    @Override
    public CacheHelper getReaderCacheHelper() {
      return null;
    }
    
  }

  public void testMergeFilterReader() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(200);
    final String[] stringValues = new String[10];
    for (int i = 0; i < stringValues.length; ++i) {
      stringValues[i] = RandomStrings.randomRealisticUnicodeOfLength(random(), 10);
    }
    Document[] docs = new Document[numDocs];
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      doc.add(new StringField("to_delete", random().nextBoolean() ? "yes" : "no", Store.NO));
      doc.add(new StoredField("id", i));
      doc.add(new StoredField("i", random().nextInt(50)));
      doc.add(new StoredField("l", random().nextLong()));
      doc.add(new StoredField("d", random().nextDouble()));
      doc.add(new StoredField("f", random().nextFloat()));
      doc.add(new StoredField("s", RandomPicks.randomFrom(random(), stringValues)));
      doc.add(new StoredField("b", new BytesRef(RandomPicks.randomFrom(random(), stringValues))));
      docs[i] = doc;
      w.addDocument(doc);
    }
    if (random().nextBoolean()) {
      w.deleteDocuments(new Term("to_delete", "yes"));
    }
    w.commit();
    w.close();
    
    DirectoryReader reader = new DummyFilterDirectoryReader(DirectoryReader.open(dir));
    
    Directory dir2 = newDirectory();
    w = new RandomIndexWriter(random(), dir2);
    TestUtil.addIndexesSlowly(w.w, reader);
    reader.close();
    dir.close();

    reader = w.getReader();
    for (int i = 0; i < reader.maxDoc(); ++i) {
      final Document doc = reader.document(i);
      final int id = doc.getField("id").numericValue().intValue();
      final Document expected = docs[id];
      assertEquals(expected.get("s"), doc.get("s"));
      assertEquals(expected.getField("i").numericValue(), doc.getField("i").numericValue());
      assertEquals(expected.getField("l").numericValue(), doc.getField("l").numericValue());
      assertEquals(expected.getField("d").numericValue(), doc.getField("d").numericValue());
      assertEquals(expected.getField("f").numericValue(), doc.getField("f").numericValue());
      assertEquals(expected.getField("b").binaryValue(), doc.getField("b").binaryValue());
    }

    reader.close();
    w.close();
    TestUtil.checkIndex(dir2);
    dir2.close();
  }

  @Nightly
  public void testBigDocuments() throws IOException {
    assumeWorkingMMapOnWindows();
    
    // "big" as "much bigger than the chunk size"
    // for this test we force a FS dir
    // we can't just use newFSDirectory, because this test doesn't really index anything.
    // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
    Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
    IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
    iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

    if (dir instanceof MockDirectoryWrapper) {
      ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
    }

    final Document emptyDoc = new Document(); // emptyDoc
    final Document bigDoc1 = new Document(); // lot of small fields
    final Document bigDoc2 = new Document(); // 1 very big field

    final Field idField = new StringField("id", "", Store.NO);
    emptyDoc.add(idField);
    bigDoc1.add(idField);
    bigDoc2.add(idField);

    final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
    onlyStored.setIndexOptions(IndexOptions.NONE);

    final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
    final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000);
    for (int i = 0; i < numFields; ++i) {
      bigDoc1.add(smallField);
    }

    final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
    bigDoc2.add(bigField);

    final int numDocs = atLeast(5);
    final Document[] docs = new Document[numDocs];
    for (int i = 0; i < numDocs; ++i) {
      docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
    }
    for (int i = 0; i < numDocs; ++i) {
      idField.setStringValue("" + i);
      iw.addDocument(docs[i]);
      if (random().nextInt(numDocs) == 0) {
        iw.commit();
      }
    }
    iw.commit();
    iw.forceMerge(1); // look at what happens when big docs are merged
    final DirectoryReader rd = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(rd);
    for (int i = 0; i < numDocs; ++i) {
      final Query query = new TermQuery(new Term("id", "" + i));
      final TopDocs topDocs = searcher.search(query, 1);
      assertEquals("" + i, 1, topDocs.totalHits);
      final Document doc = rd.document(topDocs.scoreDocs[0].doc);
      assertNotNull(doc);
      final IndexableField[] fieldValues = doc.getFields("fld");
      assertEquals(docs[i].getFields("fld").length, fieldValues.length);
      if (fieldValues.length > 0) {
        assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
      }
    }
    rd.close();
    iw.close();
    dir.close();
  }

  public void testBulkMergeWithDeletes() throws IOException {
    final int numDocs = atLeast(200);
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      doc.add(new StringField("id", Integer.toString(i), Store.YES));
      doc.add(new StoredField("f", TestUtil.randomSimpleString(random())));
      w.addDocument(doc);
    }
    final int deleteCount = TestUtil.nextInt(random(), 5, numDocs);
    for (int i = 0; i < deleteCount; ++i) {
      final int id = random().nextInt(numDocs);
      w.deleteDocuments(new Term("id", Integer.toString(id)));
    }
    w.commit();
    w.close();
    w = new RandomIndexWriter(random(), dir);
    w.forceMerge(TestUtil.nextInt(random(), 1, 3));
    w.commit();
    w.close();
    TestUtil.checkIndex(dir);
    dir.close();
  }

  /** mix up field numbers, merge, and check that data is correct */
  public void testMismatchedFields() throws Exception {
    Directory dirs[] = new Directory[10];
    for (int i = 0; i < dirs.length; i++) {
      Directory dir = newDirectory();
      IndexWriterConfig iwc = new IndexWriterConfig(null);
      IndexWriter iw = new IndexWriter(dir, iwc);
      Document doc = new Document();
      for (int j = 0; j < 10; j++) {
        // add fields where name=value (e.g. 3=3) so we can detect if stuff gets screwed up.
        doc.add(new StringField(Integer.toString(j), Integer.toString(j), Field.Store.YES));
      }
      for (int j = 0; j < 10; j++) {
        iw.addDocument(doc);
      }
      
      DirectoryReader reader = DirectoryReader.open(iw);
      // mix up fields explicitly
      if (random().nextBoolean()) {
        reader = new MismatchedDirectoryReader(reader, random());
      }
      dirs[i] = newDirectory();
      IndexWriter adder = new IndexWriter(dirs[i], new IndexWriterConfig(null));
      TestUtil.addIndexesSlowly(adder, reader);
      adder.commit();
      adder.close();
      
      IOUtils.close(reader, iw, dir);
    }
    
    Directory everything = newDirectory();
    IndexWriter iw = new IndexWriter(everything, new IndexWriterConfig(null));
    iw.addIndexes(dirs);
    iw.forceMerge(1);
    
    LeafReader ir = getOnlyLeafReader(DirectoryReader.open(iw));
    for (int i = 0; i < ir.maxDoc(); i++) {
      Document doc = ir.document(i);
      assertEquals(10, doc.getFields().size());
      for (int j = 0; j < 10; j++) {
        assertEquals(Integer.toString(j), doc.get(Integer.toString(j)));
      }
    }

    IOUtils.close(iw, ir, everything);
    IOUtils.close(dirs);
  }
}