package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import java.util.EnumSet;
import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.ByteDocValuesField;
import org.apache.lucene.document.DerefBytesDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.LongDocValuesField;
import org.apache.lucene.document.PackedLongDocValuesField;
import org.apache.lucene.document.ShortDocValuesField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@SuppressCodecs("Lucene3x")
public class TestTypePromotion extends LuceneTestCase {
private static EnumSet<Type> INTEGERS = EnumSet.of(Type.VAR_INTS,
Type.FIXED_INTS_16, Type.FIXED_INTS_32,
Type.FIXED_INTS_64, Type.FIXED_INTS_8);
private static EnumSet<Type> FLOATS = EnumSet.of(Type.FLOAT_32,
Type.FLOAT_64, Type.FIXED_INTS_8);
private static EnumSet<Type> UNSORTED_BYTES = EnumSet.of(
Type.BYTES_FIXED_DEREF, Type.BYTES_FIXED_STRAIGHT,
Type.BYTES_VAR_STRAIGHT, Type.BYTES_VAR_DEREF);
private static EnumSet<Type> SORTED_BYTES = EnumSet.of(
Type.BYTES_FIXED_SORTED, Type.BYTES_VAR_SORTED);
public Type randomValueType(EnumSet<Type> typeEnum, Random random) {
Type[] array = typeEnum.toArray(new Type[0]);
return array[random.nextInt(array.length)];
}
private static enum TestType {
Int, Float, Byte
}
private void runTest(EnumSet<Type> types, TestType type) throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
int num_1 = atLeast(200);
int num_2 = atLeast(200);
int num_3 = atLeast(200);
long[] values = new long[num_1 + num_2 + num_3];
Type[] sourceType = new Type[num_1 + num_2 + num_3];
index(writer,
randomValueType(types, random()), values, sourceType, 0, num_1);
writer.commit();
index(writer,
randomValueType(types, random()), values, sourceType, num_1, num_2);
writer.commit();
if (random().nextInt(4) == 0) {
// once in a while use addIndexes
writer.forceMerge(1);
Directory dir_2 = newDirectory() ;
IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
index(writer_2,
randomValueType(types, random()), values, sourceType, num_1 + num_2, num_3);
writer_2.commit();
writer_2.close();
if (rarely()) {
writer.addIndexes(dir_2);
} else {
// do a real merge here
IndexReader open = maybeWrapReader(DirectoryReader.open(dir_2));
writer.addIndexes(open);
open.close();
}
dir_2.close();
} else {
index(writer,
randomValueType(types, random()), values, sourceType, num_1 + num_2, num_3);
}
writer.forceMerge(1);
writer.close();
assertValues(type, dir, values, sourceType);
dir.close();
}
private void assertValues(TestType type, Directory dir, long[] values, Type[] sourceType)
throws IOException {
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexReaderContext topReaderContext = reader.getContext();
List<AtomicReaderContext> leaves = topReaderContext.leaves();
assertEquals(1, leaves.size());
DocValues docValues = leaves.get(0).reader().docValues("promote");
Source directSource = docValues.getDirectSource();
for (int i = 0; i < values.length; i++) {
int id = Integer.parseInt(reader.document(i).get("id"));
String msg = "id: " + id + " doc: " + i;
switch (type) {
case Byte:
BytesRef bytes = directSource.getBytes(i, new BytesRef());
long value = 0;
switch(bytes.length) {
case 1:
value = bytes.bytes[bytes.offset];
break;
case 2:
value = ((bytes.bytes[bytes.offset] & 0xFF) << 8) | (bytes.bytes[bytes.offset+1] & 0xFF);
break;
case 4:
value = ((bytes.bytes[bytes.offset] & 0xFF) << 24) | ((bytes.bytes[bytes.offset+1] & 0xFF) << 16)
| ((bytes.bytes[bytes.offset+2] & 0xFF) << 8) | (bytes.bytes[bytes.offset+3] & 0xFF);
break;
case 8:
value = (((long)(bytes.bytes[bytes.offset] & 0xff) << 56) | ((long)(bytes.bytes[bytes.offset+1] & 0xff) << 48) |
((long)(bytes.bytes[bytes.offset+2] & 0xff) << 40) | ((long)(bytes.bytes[bytes.offset+3] & 0xff) << 32) |
((long)(bytes.bytes[bytes.offset+4] & 0xff) << 24) | ((long)(bytes.bytes[bytes.offset+5] & 0xff) << 16) |
((long)(bytes.bytes[bytes.offset+6] & 0xff) << 8) | ((long)(bytes.bytes[bytes.offset+7] & 0xff)));
break;
default:
fail(msg + " bytessize: " + bytes.length);
}
assertEquals(msg + " byteSize: " + bytes.length, values[id], value);
break;
case Float:
if (sourceType[id] == Type.FLOAT_32
|| sourceType[id] == Type.FLOAT_64) {
assertEquals(msg, values[id],
Double.doubleToRawLongBits(directSource.getFloat(i)));
} else {
assertEquals(msg, values[id], directSource.getFloat(i), 0.0d);
}
break;
case Int:
assertEquals(msg, values[id], directSource.getInt(i));
break;
default:
break;
}
}
docValues.close();
reader.close();
}
public void index(IndexWriter writer,
Type valueType, long[] values, Type[] sourceTypes, int offset, int num)
throws IOException {
final Field valField;
if (VERBOSE) {
System.out.println("TEST: add docs " + offset + "-" + (offset+num) + " valType=" + valueType);
}
switch(valueType) {
case VAR_INTS:
valField = new PackedLongDocValuesField("promote", (long) 0);
break;
case FIXED_INTS_8:
valField = new ByteDocValuesField("promote", (byte) 0);
break;
case FIXED_INTS_16:
valField = new ShortDocValuesField("promote", (short) 0);
break;
case FIXED_INTS_32:
valField = new IntDocValuesField("promote", 0);
break;
case FIXED_INTS_64:
valField = new LongDocValuesField("promote", (byte) 0);
break;
case FLOAT_32:
valField = new FloatDocValuesField("promote", 0f);
break;
case FLOAT_64:
valField = new DoubleDocValuesField("promote", 0d);
break;
case BYTES_FIXED_STRAIGHT:
valField = new StraightBytesDocValuesField("promote", new BytesRef(), true);
break;
case BYTES_VAR_STRAIGHT:
valField = new StraightBytesDocValuesField("promote", new BytesRef(), false);
break;
case BYTES_FIXED_DEREF:
valField = new DerefBytesDocValuesField("promote", new BytesRef(), true);
break;
case BYTES_VAR_DEREF:
valField = new DerefBytesDocValuesField("promote", new BytesRef(), false);
break;
case BYTES_FIXED_SORTED:
valField = new SortedBytesDocValuesField("promote", new BytesRef(), true);
break;
case BYTES_VAR_SORTED:
valField = new SortedBytesDocValuesField("promote", new BytesRef(), false);
break;
default:
throw new IllegalStateException("unknown Type: " + valueType);
}
for (int i = offset; i < offset + num; i++) {
Document doc = new Document();
doc.add(new TextField("id", i + "", Field.Store.YES));
sourceTypes[i] = valueType;
switch (valueType) {
case VAR_INTS:
// TODO: can we do nextLong()?
values[i] = random().nextInt();
valField.setLongValue(values[i]);
break;
case FIXED_INTS_16:
// TODO: negatives too?
values[i] = random().nextInt(Short.MAX_VALUE);
valField.setShortValue((short) values[i]);
break;
case FIXED_INTS_32:
values[i] = random().nextInt();
valField.setIntValue((int) values[i]);
break;
case FIXED_INTS_64:
values[i] = random().nextLong();
valField.setLongValue(values[i]);
break;
case FLOAT_64:
final double nextDouble = random().nextDouble();
values[i] = Double.doubleToRawLongBits(nextDouble);
valField.setDoubleValue(nextDouble);
break;
case FLOAT_32:
final float nextFloat = random().nextFloat();
values[i] = Double.doubleToRawLongBits(nextFloat);
valField.setFloatValue(nextFloat);
break;
case FIXED_INTS_8:
values[i] = (byte) i;
valField.setByteValue((byte)values[i]);
break;
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
values[i] = random().nextLong();
byte bytes[] = new byte[8];
ByteArrayDataOutput out = new ByteArrayDataOutput(bytes, 0, 8);
out.writeLong(values[i]);
valField.setBytesValue(new BytesRef(bytes));
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
byte lbytes[] = new byte[8];
ByteArrayDataOutput lout = new ByteArrayDataOutput(lbytes, 0, 8);
final int len;
if (random().nextBoolean()) {
values[i] = random().nextInt();
lout.writeInt((int)values[i]);
len = 4;
} else {
values[i] = random().nextLong();
lout.writeLong(values[i]);
len = 8;
}
valField.setBytesValue(new BytesRef(lbytes, 0, len));
break;
default:
fail("unexpected value " + valueType);
}
if (VERBOSE) {
System.out.println(" doc " + i + " has val=" + valField);
}
doc.add(valField);
writer.addDocument(doc);
if (random().nextInt(10) == 0) {
writer.commit();
}
}
}
public void testPromoteBytes() throws IOException {
runTest(UNSORTED_BYTES, TestType.Byte);
}
public void testSortedPromoteBytes() throws IOException {
runTest(SORTED_BYTES, TestType.Byte);
}
public void testPromoteInteger() throws IOException {
runTest(INTEGERS, TestType.Int);
}
public void testPromotFloatingPoint() throws IOException {
runTest(FLOATS, TestType.Float);
}
public void testMergeIncompatibleTypes() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
writerConfig.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); // no merges until we are done with adding values
IndexWriter writer = new IndexWriter(dir, writerConfig);
int num_1 = atLeast(200);
int num_2 = atLeast(200);
long[] values = new long[num_1 + num_2];
Type[] sourceType = new Type[num_1 + num_2];
index(writer,
randomValueType(INTEGERS, random()), values, sourceType, 0, num_1);
writer.commit();
if (random().nextInt(4) == 0) {
// once in a while use addIndexes
Directory dir_2 = newDirectory() ;
IndexWriter writer_2 = new IndexWriter(dir_2,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
index(writer_2,
randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, sourceType, num_1, num_2);
writer_2.commit();
writer_2.close();
if (random().nextBoolean()) {
writer.addIndexes(dir_2);
} else {
// do a real merge here
IndexReader open = DirectoryReader.open(dir_2);
writer.addIndexes(open);
open.close();
}
dir_2.close();
} else {
index(writer,
randomValueType(random().nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random()), values, sourceType, num_1, num_2);
writer.commit();
}
writer.close();
writerConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
if (writerConfig.getMergePolicy() instanceof NoMergePolicy) {
writerConfig.setMergePolicy(newLogMergePolicy()); // make sure we merge to one segment (merge everything together)
}
writer = new IndexWriter(dir, writerConfig);
// now merge
writer.forceMerge(1);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
assertEquals(1, reader.leaves().size());
IndexReaderContext topReaderContext = reader.getContext();
List<AtomicReaderContext> leaves = topReaderContext.leaves();
DocValues docValues = leaves.get(0).reader().docValues("promote");
assertNotNull(docValues);
assertValues(TestType.Byte, dir, values, sourceType);
assertEquals(Type.BYTES_VAR_STRAIGHT, docValues.getType());
reader.close();
dir.close();
}
}