TestDocValuesIndexing.java example

Explorer
solr-analytics-master
- lucene
- solr
package org.apache.lucene.index;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;

import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.ByteDocValuesField;
import org.apache.lucene.document.DerefBytesDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.LongDocValuesField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.ShortDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;

/**
 * 
 * Tests DocValues integration into IndexWriter & Codecs
 * 
 */
@SuppressCodecs("Lucene3x")
public class TestDocValuesIndexing extends LuceneTestCase {
  /*
   * - add test for multi segment case with deletes
   * - add multithreaded tests / integrate into stress indexing?
   */
  
  /*
   * Simple test case to show how to use the API
   */
  public void testDocValuesSimple() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, writerConfig(false));
    for (int i = 0; i < 5; i++) {
      Document doc = new Document();
      doc.add(new PackedLongDocValuesField("docId", i));
      doc.add(new TextField("docId", "" + i, Field.Store.NO));
      writer.addDocument(doc);
    }
    writer.commit();
    writer.forceMerge(1, true);

    writer.close(true);

    DirectoryReader reader = DirectoryReader.open(dir, 1);
    assertEquals(1, reader.leaves().size());

    IndexSearcher searcher = new IndexSearcher(reader);

    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("docId", "0")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "1")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "2")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "3")), BooleanClause.Occur.SHOULD);
    query.add(new TermQuery(new Term("docId", "4")), BooleanClause.Occur.SHOULD);

    TopDocs search = searcher.search(query, 10);
    assertEquals(5, search.totalHits);
    ScoreDoc[] scoreDocs = search.scoreDocs;
    DocValues docValues = MultiDocValues.getDocValues(reader, "docId");
    Source source = docValues.getSource();
    for (int i = 0; i < scoreDocs.length; i++) {
      assertEquals(i, scoreDocs[i].doc);
      assertEquals(i, source.getInt(scoreDocs[i].doc));
    }
    reader.close();
    dir.close();
  }

  public void testIndexBytesNoDeletes() throws IOException {
    runTestIndexBytes(writerConfig(random().nextBoolean()), false);
  }

  public void testIndexBytesDeletes() throws IOException {
    runTestIndexBytes(writerConfig(random().nextBoolean()), true);
  }

  public void testIndexNumericsNoDeletes() throws IOException {
    runTestNumerics(writerConfig(random().nextBoolean()), false);
  }

  public void testIndexNumericsDeletes() throws IOException {
    runTestNumerics(writerConfig(random().nextBoolean()), true);
  }

  public void testAddIndexes() throws IOException {
    Directory d1 = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d1);
    Document doc = new Document();
    doc.add(newStringField("id", "1", Field.Store.YES));
    doc.add(new PackedLongDocValuesField("dv", 1));
    w.addDocument(doc);
    IndexReader r1 = w.getReader();
    w.close();

    Directory d2 = newDirectory();
    w = new RandomIndexWriter(random(), d2);
    doc = new Document();
    doc.add(newStringField("id", "2", Field.Store.YES));
    doc.add(new PackedLongDocValuesField("dv", 2));
    w.addDocument(doc);
    IndexReader r2 = w.getReader();
    w.close();

    Directory d3 = newDirectory();
    w = new RandomIndexWriter(random(), d3);
    w.addIndexes(SlowCompositeReaderWrapper.wrap(r1), SlowCompositeReaderWrapper.wrap(r2));
    r1.close();
    d1.close();
    r2.close();
    d2.close();

    w.forceMerge(1);
    DirectoryReader r3 = w.getReader();
    w.close();
    AtomicReader sr = getOnlySegmentReader(r3);
    assertEquals(2, sr.numDocs());
    DocValues docValues = sr.docValues("dv");
    assertNotNull(docValues);
    r3.close();
    d3.close();
  }

  public void testAddIndexesRandom() throws IOException {
    int valuesPerIndex = 10;
    List<Type> values = Arrays.asList(Type.values());
    Collections.shuffle(values, random());
    Type first = values.get(0);
    Type second = values.get(1);
    // index first index
    Directory d_1 = newDirectory();
    IndexWriter w_1 = new IndexWriter(d_1, writerConfig(random().nextBoolean()));
    indexValues(w_1, valuesPerIndex, first, values, false, 7);
    w_1.commit();
    assertEquals(valuesPerIndex, w_1.maxDoc());
    _TestUtil.checkIndex(d_1);

    // index second index
    Directory d_2 = newDirectory();
    IndexWriter w_2 = new IndexWriter(d_2, writerConfig(random().nextBoolean()));
    indexValues(w_2, valuesPerIndex, second, values, false, 7);
    w_2.commit();
    assertEquals(valuesPerIndex, w_2.maxDoc());
    _TestUtil.checkIndex(d_2);

    Directory target = newDirectory();
    IndexWriter w = new IndexWriter(target, writerConfig(random().nextBoolean()));
    DirectoryReader r_1 = DirectoryReader.open(w_1, true);
    DirectoryReader r_2 = DirectoryReader.open(w_2, true);
    if (random().nextBoolean()) {
      w.addIndexes(d_1, d_2);
    } else {
      w.addIndexes(r_1, r_2);
    }
    w.forceMerge(1, true);
    w.commit();
    
    _TestUtil.checkIndex(target);
    assertEquals(valuesPerIndex * 2, w.maxDoc());

    // check values
    
    DirectoryReader merged = DirectoryReader.open(w, true);
    Source source_1 = getSource(getDocValues(r_1, first.name()));
    Source source_2 = getSource(getDocValues(r_2, second.name()));
    Source source_1_merged = getSource(getDocValues(merged, first.name()));
    Source source_2_merged = getSource(getDocValues(merged, second
        .name()));
    for (int i = 0; i < r_1.maxDoc(); i++) {
      switch (first) {
      case BYTES_FIXED_DEREF:
      case BYTES_FIXED_STRAIGHT:
      case BYTES_VAR_DEREF:
      case BYTES_VAR_STRAIGHT:
      case BYTES_FIXED_SORTED:
      case BYTES_VAR_SORTED:
        assertEquals(source_1.getBytes(i, new BytesRef()),
            source_1_merged.getBytes(i, new BytesRef()));
        break;
      case FIXED_INTS_16:
      case FIXED_INTS_32:
      case FIXED_INTS_64:
      case FIXED_INTS_8:
      case VAR_INTS:
        assertEquals(source_1.getInt(i), source_1_merged.getInt(i));
        break;
      case FLOAT_32:
      case FLOAT_64:
        assertEquals(source_1.getFloat(i), source_1_merged.getFloat(i), 0.0d);
        break;
      default:
        fail("unkonwn " + first);
      }
    }

    for (int i = r_1.maxDoc(); i < merged.maxDoc(); i++) {
      switch (second) {
      case BYTES_FIXED_DEREF:
      case BYTES_FIXED_STRAIGHT:
      case BYTES_VAR_DEREF:
      case BYTES_VAR_STRAIGHT:
      case BYTES_FIXED_SORTED:
      case BYTES_VAR_SORTED:
        assertEquals(source_2.getBytes(i - r_1.maxDoc(), new BytesRef()),
            source_2_merged.getBytes(i, new BytesRef()));
        break;
      case FIXED_INTS_16:
      case FIXED_INTS_32:
      case FIXED_INTS_64:
      case FIXED_INTS_8:
      case VAR_INTS:
        assertEquals(source_2.getInt(i - r_1.maxDoc()),
            source_2_merged.getInt(i));
        break;
      case FLOAT_32:
      case FLOAT_64:
        assertEquals(source_2.getFloat(i - r_1.maxDoc()),
            source_2_merged.getFloat(i), 0.0d);
        break;
      default:
        fail("unkonwn " + first);
      }
    }
    // close resources
    r_1.close();
    r_2.close();
    merged.close();
    w_1.close(true);
    w_2.close(true);
    w.close(true);
    d_1.close();
    d_2.close();
    target.close();
  }

  private IndexWriterConfig writerConfig(boolean useCompoundFile) {
    final IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random()));
    cfg.setMergePolicy(newLogMergePolicy(random()));
    LogMergePolicy policy = new LogDocMergePolicy();
    cfg.setMergePolicy(policy);
    policy.setUseCompoundFile(useCompoundFile);
    return cfg;
  }

  @SuppressWarnings("fallthrough")
  public void runTestNumerics(IndexWriterConfig cfg, boolean withDeletions)
      throws IOException {
    Directory d = newDirectory();
    IndexWriter w = new IndexWriter(d, cfg);
    final int numValues = 50 + atLeast(10);
    final List<Type> numVariantList = new ArrayList<Type>(NUMERICS);

    // run in random order to test if fill works correctly during merges
    Collections.shuffle(numVariantList, random());
    for (Type val : numVariantList) {
      FixedBitSet deleted = indexValues(w, numValues, val, numVariantList,
          withDeletions, 7);
      List<Closeable> closeables = new ArrayList<Closeable>();
      DirectoryReader r = DirectoryReader.open(w, true);
      final int numRemainingValues = numValues - deleted.cardinality();
      final int base = r.numDocs() - numRemainingValues;
      // for FIXED_INTS_8 we use value mod 128 - to enable testing in 
      // one go we simply use numValues as the mod for all other INT types
      int mod = numValues;
      switch (val) {
      case FIXED_INTS_8:
        mod = 128;
      case FIXED_INTS_16:
      case FIXED_INTS_32:
      case FIXED_INTS_64:
      case VAR_INTS: {
        DocValues intsReader = getDocValues(r, val.name());
        assertNotNull(intsReader);

        Source ints = getSource(intsReader);

        for (int i = 0; i < base; i++) {
          long value = ints.getInt(i);
          assertEquals("index " + i, 0, value);
        }

        int expected = 0;
        for (int i = base; i < r.numDocs(); i++, expected++) {
          while (deleted.get(expected)) {
            expected++;
          }
          assertEquals(val + " mod: " + mod + " index: " +  i, expected%mod, ints.getInt(i));
        }
      }
        break;
      case FLOAT_32:
      case FLOAT_64: {
        DocValues floatReader = getDocValues(r, val.name());
        assertNotNull(floatReader);
        Source floats = getSource(floatReader);
        for (int i = 0; i < base; i++) {
          double value = floats.getFloat(i);
          assertEquals(val + " failed for doc: " + i + " base: " + base,
              0.0d, value, 0.0d);
        }
        int expected = 0;
        for (int i = base; i < r.numDocs(); i++, expected++) {
          while (deleted.get(expected)) {
            expected++;
          }
          assertEquals("index " + i, 2.0 * expected, floats.getFloat(i),
              0.00001);
        }
      }
        break;
      default:
        fail("unexpected value " + val);
      }

      closeables.add(r);
      for (Closeable toClose : closeables) {
        toClose.close();
      }
    }
    w.close();
    d.close();
  }
  
  public void runTestIndexBytes(IndexWriterConfig cfg, boolean withDeletions)
      throws IOException {
    final Directory d = newDirectory();
    IndexWriter w = new IndexWriter(d, cfg);
    final List<Type> byteVariantList = new ArrayList<Type>(BYTES);
    // run in random order to test if fill works correctly during merges
    Collections.shuffle(byteVariantList, random());
    final int numValues = 50 + atLeast(10);
    for (Type byteIndexValue : byteVariantList) {
      List<Closeable> closeables = new ArrayList<Closeable>();
      final int bytesSize = 1 + atLeast(50);
      FixedBitSet deleted = indexValues(w, numValues, byteIndexValue,
          byteVariantList, withDeletions, bytesSize);
      final DirectoryReader r = DirectoryReader.open(w, withDeletions);
      assertEquals(0, r.numDeletedDocs());
      final int numRemainingValues = numValues - deleted.cardinality();
      final int base = r.numDocs() - numRemainingValues;
      DocValues bytesReader = getDocValues(r, byteIndexValue.name());
      assertNotNull("field " + byteIndexValue.name()
          + " returned null reader - maybe merged failed", bytesReader);
      Source bytes = getSource(bytesReader);
      byte upto = 0;

      // test the filled up slots for correctness
      for (int i = 0; i < base; i++) {

        BytesRef br = bytes.getBytes(i, new BytesRef());
        String msg = " field: " + byteIndexValue.name() + " at index: " + i
            + " base: " + base + " numDocs:" + r.numDocs();
        switch (byteIndexValue) {
        case BYTES_VAR_STRAIGHT:
        case BYTES_FIXED_STRAIGHT:
        case BYTES_FIXED_DEREF:
        case BYTES_FIXED_SORTED:
          // fixed straight returns bytesref with zero bytes all of fixed
          // length
          assertNotNull("expected none null - " + msg, br);
          if (br.length != 0) {
            assertEquals("expected zero bytes of length " + bytesSize + " - "
                + msg + br.utf8ToString(), bytesSize, br.length);
            for (int j = 0; j < br.length; j++) {
              assertEquals("Byte at index " + j + " doesn't match - " + msg, 0,
                  br.bytes[br.offset + j]);
            }
          }
          break;
        default:
          assertNotNull("expected none null - " + msg, br);
          assertEquals(byteIndexValue + "", 0, br.length);
          // make sure we advance at least until base
        }
      }

      // test the actual doc values added in this iteration
      assertEquals(base + numRemainingValues, r.numDocs());
      int v = 0;
      for (int i = base; i < r.numDocs(); i++) {
        String msg = " field: " + byteIndexValue.name() + " at index: " + i
            + " base: " + base + " numDocs:" + r.numDocs() + " bytesSize: "
            + bytesSize + " src: " + bytes;
        while (withDeletions && deleted.get(v++)) {
          upto += bytesSize;
        }
        BytesRef br = bytes.getBytes(i, new BytesRef());
        assertTrue(msg, br.length > 0);
        for (int j = 0; j < br.length; j++, upto++) {
          if (!(br.bytes.length > br.offset + j))
            br = bytes.getBytes(i, new BytesRef());
          assertTrue("BytesRef index exceeded [" + msg + "] offset: "
              + br.offset + " length: " + br.length + " index: "
              + (br.offset + j), br.bytes.length > br.offset + j);
          assertEquals("SourceRef Byte at index " + j + " doesn't match - "
              + msg, upto, br.bytes[br.offset + j]);
        }
      }

      // clean up
      closeables.add(r);
      for (Closeable toClose : closeables) {
        toClose.close();
      }
    }

    w.close();
    d.close();
  }
  
  public void testGetArrayNumerics() throws IOException {
    Directory d = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(d, cfg);
    final int numValues = 50 + atLeast(10);
    final List<Type> numVariantList = new ArrayList<Type>(NUMERICS);
    Collections.shuffle(numVariantList, random());
    for (Type val : numVariantList) {
      indexValues(w, numValues, val, numVariantList,
          false, 7);
      DirectoryReader r = DirectoryReader.open(w, true);
      DocValues docValues = getDocValues(r, val.name());
      assertNotNull(docValues);
      // make sure we don't get a direct source since they don't support getArray()
      Source source = docValues.getSource();
      switch (source.getType()) {
      case FIXED_INTS_8:
      {
        assertTrue(source.hasArray());
        byte[] values = (byte[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals((long)values[i], source.getInt(i));
        }
      }
      break;
      case FIXED_INTS_16:
      {
        assertTrue(source.hasArray());
        short[] values = (short[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals((long)values[i], source.getInt(i));
        }
      }
      break;
      case FIXED_INTS_32:
      {
        assertTrue(source.hasArray());
        int[] values = (int[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals((long)values[i], source.getInt(i));
        }
      }
      break;
      case FIXED_INTS_64:
      {
        assertTrue(source.hasArray());
        long[] values = (long[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals(values[i], source.getInt(i));
        }
      }
      break;
      case VAR_INTS:
        assertFalse(source.hasArray());
        break;
      case FLOAT_32:
      {
        assertTrue(source.hasArray());
        float[] values = (float[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals((double)values[i], source.getFloat(i), 0.0d);
        }
      }
      break;
      case FLOAT_64:
      {
        assertTrue(source.hasArray());
        double[] values = (double[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          assertEquals(values[i], source.getFloat(i), 0.0d);
        }
      }
        break;
      default:
        fail("unexpected value " + source.getType());
      }
      r.close();
    }
    w.close();
    d.close();
  }
  
  public void testGetArrayBytes() throws IOException {
    Directory d = newDirectory();
    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
        new MockAnalyzer(random()));
    IndexWriter w = new IndexWriter(d, cfg);
    final int numValues = 50 + atLeast(10);
    // only single byte fixed straight supports getArray()
    indexValues(w, numValues, Type.BYTES_FIXED_STRAIGHT, null, false, 1);
    DirectoryReader r = DirectoryReader.open(w, true);
    DocValues docValues = getDocValues(r, Type.BYTES_FIXED_STRAIGHT.name());
    assertNotNull(docValues);
    // make sure we don't get a direct source since they don't support
    // getArray()
    Source source = docValues.getSource();

    switch (source.getType()) {
    case BYTES_FIXED_STRAIGHT: {
      BytesRef ref = new BytesRef();
      if (source.hasArray()) {
        byte[] values = (byte[]) source.getArray();
        for (int i = 0; i < numValues; i++) {
          source.getBytes(i, ref);
          assertEquals(1, ref.length);
          assertEquals(values[i], ref.bytes[ref.offset]);
        }
      }
    }
      break;
    default:
      fail("unexpected value " + source.getType());
    }
    r.close();
    w.close();
    d.close();
  }

  private DocValues getDocValues(IndexReader reader, String field) throws IOException {
    return MultiDocValues.getDocValues(reader, field);
  }

  @SuppressWarnings("fallthrough")
  private Source getSource(DocValues values) throws IOException {
    // getSource uses cache internally
    switch(random().nextInt(5)) {
    case 3:
      return values.load();
    case 2:
      return values.getDirectSource();
    case 1:
      if(values.getType() == Type.BYTES_VAR_SORTED || values.getType() == Type.BYTES_FIXED_SORTED) {
        return values.getSource().asSortedSource();
      }
    default:
      return values.getSource();
    }
  }


  private static EnumSet<Type> BYTES = EnumSet.of(Type.BYTES_FIXED_DEREF,
      Type.BYTES_FIXED_STRAIGHT, Type.BYTES_VAR_DEREF,
      Type.BYTES_VAR_STRAIGHT, Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED);

  private static EnumSet<Type> NUMERICS = EnumSet.of(Type.VAR_INTS,
      Type.FIXED_INTS_16, Type.FIXED_INTS_32,
      Type.FIXED_INTS_64, 
      Type.FIXED_INTS_8,
      Type.FLOAT_32,
      Type.FLOAT_64);

  private FixedBitSet indexValues(IndexWriter w, int numValues, Type valueType,
      List<Type> valueVarList, boolean withDeletions, int bytesSize)
      throws IOException {
    final boolean isNumeric = NUMERICS.contains(valueType);
    FixedBitSet deleted = new FixedBitSet(numValues);
    Document doc = new Document();
    final Field valField;
    if (isNumeric) {
      switch (valueType) {
      case VAR_INTS:
        valField = new PackedLongDocValuesField(valueType.name(), (long) 0);
        break;
      case FIXED_INTS_16:
        valField = new ShortDocValuesField(valueType.name(), (short) 0);
        break;
      case FIXED_INTS_32:
        valField = new IntDocValuesField(valueType.name(), 0);
        break;
      case FIXED_INTS_64:
        valField = new LongDocValuesField(valueType.name(), (long) 0);
        break;
      case FIXED_INTS_8:
        valField = new ByteDocValuesField(valueType.name(), (byte) 0);
        break;
      case FLOAT_32:
        valField = new FloatDocValuesField(valueType.name(), (float) 0);
        break;
      case FLOAT_64:
        valField = new DoubleDocValuesField(valueType.name(), (double) 0);
        break;
      default:
        valField = null;
        fail("unhandled case");
      }
    } else {
      switch (valueType) {
      case BYTES_FIXED_STRAIGHT:
        valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), true);
        break;
      case BYTES_VAR_STRAIGHT:
        valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), false);
        break;
      case BYTES_FIXED_DEREF:
        valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), true);
        break;
      case BYTES_VAR_DEREF:
        valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), false);
        break;
      case BYTES_FIXED_SORTED:
        valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), true);
        break;
      case BYTES_VAR_SORTED:
        valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), false);
        break;
      default:
        valField = null;
        fail("unhandled case");
      }
    }
    doc.add(valField);
    final BytesRef bytesRef = new BytesRef();

    final String idBase = valueType.name() + "_";
    final byte[] b = new byte[bytesSize];
    if (bytesRef != null) {
      bytesRef.bytes = b;
      bytesRef.length = b.length;
      bytesRef.offset = 0;
    }
    byte upto = 0;
    for (int i = 0; i < numValues; i++) {
      if (isNumeric) {
        switch (valueType) {
        case VAR_INTS:
          valField.setLongValue((long)i);
          break;
        case FIXED_INTS_16:
          valField.setShortValue((short)i);
          break;
        case FIXED_INTS_32:
          valField.setIntValue(i);
          break;
        case FIXED_INTS_64:
          valField.setLongValue((long)i);
          break;
        case FIXED_INTS_8:
          valField.setByteValue((byte)(0xFF & (i % 128)));
          break;
        case FLOAT_32:
          valField.setFloatValue(2.0f * i);
          break;
        case FLOAT_64:
          valField.setDoubleValue(2.0d * i);
          break;
        default:
          fail("unexpected value " + valueType);
        }
      } else {
        for (int j = 0; j < b.length; j++) {
          b[j] = upto++;
        }
        if (bytesRef != null) {
          valField.setBytesValue(bytesRef);
        }
      }
      doc.removeFields("id");
      doc.add(new StringField("id", idBase + i, Field.Store.YES));
      w.addDocument(doc);

      if (i % 7 == 0) {
        if (withDeletions && random().nextBoolean()) {
          Type val = valueVarList.get(random().nextInt(1 + valueVarList
              .indexOf(valueType)));
          final int randInt = val == valueType ? random().nextInt(1 + i) : random()
              .nextInt(numValues);
          w.deleteDocuments(new Term("id", val.name() + "_" + randInt));
          if (val == valueType) {
            deleted.set(randInt);
          }
        }
        if (random().nextInt(10) == 0) {
          w.commit();
        }
      }
    }
    w.commit();

    // TODO test multi seg with deletions
    if (withDeletions || random().nextBoolean()) {
      w.forceMerge(1, true);
    }
    return deleted;
  }

  public void testMultiValuedDocValuesField() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    Field f = new PackedLongDocValuesField("field", 17);
    // Index doc values are single-valued so we should not
    // be able to add same field more than once:
    doc.add(f);
    doc.add(f);
    try {
      w.addDocument(doc);
      fail("didn't hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
    }

    doc = new Document();
    doc.add(f);
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0));
    r.close();
    d.close();
  }

  public void testDifferentTypedDocValuesField() throws Exception {
    Directory d = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), d);
    Document doc = new Document();
    // Index doc values are single-valued so we should not
    // be able to add same field more than once:
    Field f;
    doc.add(f = new PackedLongDocValuesField("field", 17));
    doc.add(new FloatDocValuesField("field", 22.0f));
    try {
      w.addDocument(doc);
      fail("didn't hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
    }

    doc = new Document();
    doc.add(f);
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    w.close();
    assertEquals(17, getOnlySegmentReader(r).docValues("field").load().getInt(0));
    r.close();
    d.close();
  }
  
  public void testSortedBytes() throws IOException {
    Type[] types = new Type[] { Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED };
    for (Type type : types) {
      boolean fixed = type == Type.BYTES_FIXED_SORTED;
      final Directory d = newDirectory();
      IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT,
          new MockAnalyzer(random()));
      IndexWriter w = new IndexWriter(d, cfg);
      int numDocs = atLeast(100);
      BytesRefHash hash = new BytesRefHash();
      Map<String, String> docToString = new HashMap<String, String>();
      int len = 1 + random().nextInt(50);
      for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("id", "" + i, Field.Store.YES));
        String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(),
            len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len);
        BytesRef br = new BytesRef(string);
        doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED));
        hash.add(br);
        docToString.put("" + i, string);
        w.addDocument(doc);
      }
      if (rarely()) {
        w.commit();
      }
      int numDocsNoValue = atLeast(10);
      for (int i = 0; i < numDocsNoValue; i++) {
        Document doc = new Document();
        doc.add(newTextField("id", "noValue", Field.Store.YES));
        w.addDocument(doc);
      }
      BytesRef bytesRef = new BytesRef(fixed ? len : 0);
      bytesRef.offset = 0;
      bytesRef.length = fixed ? len : 0;
      hash.add(bytesRef); // add empty value for the gaps
      if (rarely()) {
        w.commit();
      }
      for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        String id = "" + i + numDocs;
        doc.add(newTextField("id", id, Field.Store.YES));
        String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(),
            len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len);
        BytesRef br = new BytesRef(string);
        hash.add(br);
        docToString.put(id, string);
        doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED));
        w.addDocument(doc);
      }
      w.commit();
      IndexReader reader = w.getReader();
      DocValues docValues = MultiDocValues.getDocValues(reader, "field");
      Source source = getSource(docValues);
      SortedSource asSortedSource = source.asSortedSource();
      int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
      BytesRef expected = new BytesRef();
      BytesRef actual = new BytesRef();
      assertEquals(hash.size(), asSortedSource.getValueCount());
      for (int i = 0; i < hash.size(); i++) {
        hash.get(sort[i], expected);
        asSortedSource.getByOrd(i, actual);
        assertEquals(expected.utf8ToString(), actual.utf8ToString());
        int ord = asSortedSource.getOrdByValue(expected, actual);
        assertEquals(i, ord);
      }
      AtomicReader slowR = SlowCompositeReaderWrapper.wrap(reader);
      Set<Entry<String, String>> entrySet = docToString.entrySet();

      for (Entry<String, String> entry : entrySet) {
        int docId = docId(slowR, new Term("id", entry.getKey()));
        expected = new BytesRef(entry.getValue());
        assertEquals(expected, asSortedSource.getBytes(docId, actual));
      }

      reader.close();
      w.close();
      d.close();
    }
  }
  
  public int docId(AtomicReader reader, Term term) throws IOException {
    int docFreq = reader.docFreq(term);
    assertEquals(1, docFreq);
    DocsEnum termDocsEnum = reader.termDocsEnum(term);
    int nextDoc = termDocsEnum.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, termDocsEnum.nextDoc());
    return nextDoc;
  }

  public void testWithThreads() throws Exception {
    Random random = random();
    final int NUM_DOCS = atLeast(100);
    final Directory dir = newDirectory();
    final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
    final boolean allowDups = random.nextBoolean();
    final Set<String> seen = new HashSet<String>();
    if (VERBOSE) {
      System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
    }
    int numDocs = 0;
    final List<BytesRef> docValues = new ArrayList<BytesRef>();

    // TODO: deletions
    while (numDocs < NUM_DOCS) {
      final String s;
      if (random.nextBoolean()) {
        s = _TestUtil.randomSimpleString(random);
      } else {
        s = _TestUtil.randomUnicodeString(random);
      }
      final BytesRef br = new BytesRef(s);

      if (!allowDups) {
        if (seen.contains(s)) {
          continue;
        }
        seen.add(s);
      }

      if (VERBOSE) {
        System.out.println("  " + numDocs + ": s=" + s);
      }
      
      final Document doc = new Document();
      doc.add(new SortedBytesDocValuesField("stringdv", br));
      doc.add(new PackedLongDocValuesField("id", numDocs));
      docValues.add(br);
      writer.addDocument(doc);
      numDocs++;

      if (random.nextInt(40) == 17) {
        // force flush
        writer.getReader().close();
      }
    }

    writer.forceMerge(1);
    final DirectoryReader r = writer.getReader();
    writer.close();
    
    final AtomicReader sr = getOnlySegmentReader(r);
    final DocValues dv = sr.docValues("stringdv");
    assertNotNull(dv);

    final long END_TIME = System.currentTimeMillis() + (TEST_NIGHTLY ? 30 : 1);

    final DocValues.Source docIDToID = sr.docValues("id").getSource();

    final int NUM_THREADS = _TestUtil.nextInt(random(), 1, 10);
    Thread[] threads = new Thread[NUM_THREADS];
    for(int thread=0;thread<NUM_THREADS;thread++) {
      threads[thread] = new Thread() {
          @Override
          public void run() {
            Random random = random();            
            final DocValues.Source stringDVSource;
            final DocValues.Source stringDVDirectSource;
            try {
              stringDVSource = dv.getSource();
              assertNotNull(stringDVSource);
              stringDVDirectSource = dv.getDirectSource();
              assertNotNull(stringDVDirectSource);
            } catch (IOException ioe) {
              throw new RuntimeException(ioe);
            }
            while(System.currentTimeMillis() < END_TIME) {
              final DocValues.Source source;
              if (random.nextBoolean()) {
                source = stringDVSource;
              } else {
                source = stringDVDirectSource;
              }

              final DocValues.SortedSource sortedSource = source.asSortedSource();
              assertNotNull(sortedSource);

              final BytesRef scratch = new BytesRef();

              for(int iter=0;iter<100;iter++) {
                final int docID = random.nextInt(sr.maxDoc());
                final BytesRef br = sortedSource.getBytes(docID, scratch);
                assertEquals(docValues.get((int) docIDToID.getInt(docID)), br);
              }
            }
          }
        };
      threads[thread].start();
    }

    for(Thread thread : threads) {
      thread.join();
    }

    r.close();
    dir.close();
  }

  // LUCENE-3870
  public void testLengthPrefixAcrossTwoPages() throws Exception {
    Directory d = newDirectory();
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    byte[] bytes = new byte[32764];
    BytesRef b = new BytesRef();
    b.bytes = bytes;
    b.length = bytes.length;
    doc.add(new DerefBytesDocValuesField("field", b));
    w.addDocument(doc);
    bytes[0] = 1;
    w.addDocument(doc);
    w.forceMerge(1);
    DirectoryReader r = w.getReader();
    Source s = getOnlySegmentReader(r).docValues("field").getSource();

    BytesRef bytes1 = s.getBytes(0, new BytesRef());
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 0;
    assertEquals(b, bytes1);
    
    bytes1 = s.getBytes(1, new BytesRef());
    assertEquals(bytes.length, bytes1.length);
    bytes[0] = 1;
    assertEquals(b, bytes1);
    r.close();
    w.close();
    d.close();
  }
  
  public void testFixedLengthNotReallyFixed() throws IOException {
    Directory d = newDirectory();
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new DerefBytesDocValuesField("foo", new BytesRef("bar"), true));
    w.addDocument(doc);
    doc = new Document();
    doc.add(new DerefBytesDocValuesField("foo", new BytesRef("bazz"), true));
    try {
      w.addDocument(doc);
    } catch (IllegalArgumentException expected) {
      // expected
    }
    w.close();
    d.close();
  }
  
  public void testDocValuesUnstored() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
    iwconfig.setMergePolicy(newLogMergePolicy());
    IndexWriter writer = new IndexWriter(dir, iwconfig);
    for (int i = 0; i < 50; i++) {
      Document doc = new Document();
      doc.add(new PackedLongDocValuesField("dv", i));
      doc.add(new TextField("docId", "" + i, Field.Store.YES));
      writer.addDocument(doc);
    }
    DirectoryReader r = writer.getReader();
    SlowCompositeReaderWrapper slow = new SlowCompositeReaderWrapper(r);
    FieldInfos fi = slow.getFieldInfos();
    FieldInfo dvInfo = fi.fieldInfo("dv");
    assertTrue(dvInfo.hasDocValues());
    DocValues dv = slow.docValues("dv");
    Source source = dv.getDirectSource();
    for (int i = 0; i < 50; i++) {
      assertEquals(i, source.getInt(i));
      Document d = slow.document(i);
      // cannot use d.get("dv") due to another bug!
      assertNull(d.getField("dv"));
      assertEquals(Integer.toString(i), d.get("docId"));
    }
    slow.close();
    writer.close();
    dir.close();
  }
}