package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts.Reader;
/**
* A wrapper for CompositeIndexReader providing access to per segment
* {@link DocValues}
*
* <p><b>NOTE</b>: for multi readers, you'll get better
* performance by gathering the sub readers using
* {@link IndexReader#getContext()} to get the
* atomic leaves and then operate per-AtomicReader,
* instead of using this class.
*
* @lucene.experimental
* @lucene.internal
*/
class MultiDocValues extends DocValues {
private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller();
private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() {
public DocValues pull(AtomicReader reader, String field) throws IOException {
return reader.normValues(field);
}
public boolean stopLoadingOnNull(AtomicReader reader, String field) {
// for norms we drop all norms if one leaf reader has no norms and the field is present
FieldInfos fieldInfos = reader.getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
return fieldInfo != null && fieldInfo.omitsNorms();
}
};
public static class DocValuesSlice {
public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0];
final int start;
final int length;
DocValues docValues;
public DocValuesSlice(DocValues docValues, int start, int length) {
this.docValues = docValues;
this.start = start;
this.length = length;
}
}
private static class DocValuesPuller {
public DocValuesPuller() {}
public DocValues pull(AtomicReader reader, String field) throws IOException {
return reader.docValues(field);
}
public boolean stopLoadingOnNull(AtomicReader reader, String field) {
return false;
}
}
private DocValuesSlice[] slices;
private int[] starts;
private Type type;
private int valueSize;
private MultiDocValues(DocValuesSlice[] slices, int[] starts, TypePromoter promotedType) {
this.starts = starts;
this.slices = slices;
this.type = promotedType.type();
this.valueSize = promotedType.getValueSize();
}
/**
* Returns a single {@link DocValues} instance for this field, merging
* their values on the fly.
*
* <p>
* <b>NOTE</b>: this is a slow way to access DocValues.
* It's better to get the sub-readers and iterate through them
* yourself.
*/
public static DocValues getDocValues(IndexReader r, final String field) throws IOException {
return getDocValues(r, field, DEFAULT_PULLER);
}
/**
* Returns a single {@link DocValues} instance for this norms field, merging
* their values on the fly.
*
* <p>
* <b>NOTE</b>: this is a slow way to access DocValues.
* It's better to get the sub-readers and iterate through them
* yourself.
*/
public static DocValues getNormDocValues(IndexReader r, final String field) throws IOException {
return getDocValues(r, field, NORMS_PULLER);
}
private static DocValues getDocValues(IndexReader reader, final String field, final DocValuesPuller puller) throws IOException {
if (reader instanceof AtomicReader) {
// already an atomic reader
return puller.pull((AtomicReader) reader, field);
}
assert reader instanceof CompositeReader;
final List<AtomicReaderContext> leaves = reader.leaves();
switch (leaves.size()) {
case 0:
// no fields
return null;
case 1:
// already an atomic reader / reader with one leave
return getDocValues(leaves.get(0).reader(), field, puller);
default:
final List<DocValuesSlice> slices = new ArrayList<DocValuesSlice>();
TypePromoter promotedType = TypePromoter.getIdentityPromoter();
// gather all docvalues fields, accumulating a promoted type across
// potentially incompatible types
for (final AtomicReaderContext ctx : leaves) {
final AtomicReader r = ctx.reader();
final DocValues d = puller.pull(r, field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
promotedType = promotedType.promote(incoming);
} else if (puller.stopLoadingOnNull(r, field)){
return null;
}
slices.add(new DocValuesSlice(d, ctx.docBase, r.maxDoc()));
}
// return null if no docvalues encountered anywhere
if (promotedType == TypePromoter.getIdentityPromoter()) {
return null;
}
// populate starts and fill gaps with empty docvalues
int starts[] = new int[slices.size()];
for (int i = 0; i < slices.size(); i++) {
DocValuesSlice slice = slices.get(i);
starts[i] = slice.start;
if (slice.docValues == null) {
Type promoted = promotedType.type();
switch(promoted) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_SORTED:
assert promotedType.getValueSize() >= 0;
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType.getValueSize());
break;
default:
slice.docValues = new EmptyDocValues(slice.length, promoted);
}
}
}
return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType);
}
}
@Override
public Source load() throws IOException {
return new MultiSource(slices, starts, false, type);
}
public static class EmptyDocValues extends DocValues {
final int maxDoc;
final Source emptySource;
public EmptyDocValues(int maxDoc, Type type) {
this.maxDoc = maxDoc;
this.emptySource = new EmptySource(type);
}
@Override
public Source load() throws IOException {
return emptySource;
}
@Override
public Type getType() {
return emptySource.getType();
}
@Override
public Source getDirectSource() throws IOException {
return emptySource;
}
}
public static class EmptyFixedDocValues extends DocValues {
final int maxDoc;
final Source emptyFixedSource;
final int valueSize;
public EmptyFixedDocValues(int maxDoc, Type type, int valueSize) {
this.maxDoc = maxDoc;
this.emptyFixedSource = new EmptyFixedSource(type, valueSize);
this.valueSize = valueSize;
}
@Override
public Source load() throws IOException {
return emptyFixedSource;
}
@Override
public Type getType() {
return emptyFixedSource.getType();
}
@Override
public int getValueSize() {
return valueSize;
}
@Override
public Source getDirectSource() throws IOException {
return emptyFixedSource;
}
}
private static class MultiSource extends Source {
private int numDocs = 0;
private int start = 0;
private Source current;
private final int[] starts;
private final DocValuesSlice[] slices;
private boolean direct;
private Object cachedArray; // cached array if supported
public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) {
super(type);
this.slices = slices;
this.starts = starts;
assert slices.length != 0;
this.direct = direct;
}
public long getInt(int docID) {
final int doc = ensureSource(docID);
return current.getInt(doc);
}
private final int ensureSource(int docID) {
if (docID >= start && docID < start+numDocs) {
return docID - start;
} else {
final int idx = ReaderUtil.subIndex(docID, starts);
assert idx >= 0 && idx < slices.length : "idx was " + idx
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
assert slices[idx] != null;
try {
if (direct) {
current = slices[idx].docValues.getDirectSource();
} else {
current = slices[idx].docValues.getSource();
}
} catch (IOException e) {
throw new RuntimeException("load failed", e); // TODO how should we
// handle this
}
start = slices[idx].start;
numDocs = slices[idx].length;
return docID - start;
}
}
public double getFloat(int docID) {
final int doc = ensureSource(docID);
return current.getFloat(doc);
}
public BytesRef getBytes(int docID, BytesRef bytesRef) {
final int doc = ensureSource(docID);
return current.getBytes(doc, bytesRef);
}
@Override
public SortedSource asSortedSource() {
try {
if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) {
DocValues[] values = new DocValues[slices.length];
Comparator<BytesRef> comp = null;
for (int i = 0; i < values.length; i++) {
values[i] = slices[i].docValues;
if (!(values[i] instanceof EmptyDocValues)) {
Comparator<BytesRef> comparator = values[i].getDirectSource()
.asSortedSource().getComparator();
assert comp == null || comp == comparator;
comp = comparator;
}
}
assert comp != null;
final int globalNumDocs = globalNumDocs();
final MergeContext ctx = SortedBytesMergeUtils.init(type, values,
comp, globalNumDocs);
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(
docBases(), new MergeState.DocMap[values.length], values, ctx);
RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer(
type);
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer,
slices);
final int[] docToOrd = new int[globalNumDocs];
for (SortedSourceSlice slice : slices) {
slice.toAbsolutOrds(docToOrd);
}
return new MultiSortedSource(type, comp, consumer.pagedBytes,
ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset);
}
} catch (IOException e) {
throw new RuntimeException("load failed", e);
}
return super.asSortedSource();
}
private int globalNumDocs() {
int docs = 0;
for (int i = 0; i < slices.length; i++) {
docs += slices[i].length;
}
return docs;
}
private int[] docBases() {
int[] docBases = new int[slices.length];
for (int i = 0; i < slices.length; i++) {
docBases[i] = slices[i].start;
}
return docBases;
}
public boolean hasArray() {
boolean oneRealSource = false;
for (DocValuesSlice slice : slices) {
try {
Source source = slice.docValues.getSource();
if (source instanceof EmptySource) {
/*
* empty source marks a gap in the array skip if we encounter one
*/
continue;
}
oneRealSource = true;
if (!source.hasArray()) {
return false;
}
} catch (IOException e) {
throw new RuntimeException("load failed", e);
}
}
return oneRealSource;
}
@Override
public Object getArray() {
if (!hasArray()) {
return null;
}
try {
Class<?> componentType = null;
Object[] arrays = new Object[slices.length];
int numDocs = 0;
for (int i = 0; i < slices.length; i++) {
DocValuesSlice slice = slices[i];
Source source = slice.docValues.getSource();
Object array = null;
if (!(source instanceof EmptySource)) {
// EmptySource is skipped - marks a gap in the array
array = source.getArray();
}
numDocs += slice.length;
if (array != null) {
if (componentType == null) {
componentType = array.getClass().getComponentType();
}
assert componentType == array.getClass().getComponentType();
}
arrays[i] = array;
}
assert componentType != null;
synchronized (this) {
if (cachedArray != null) {
return cachedArray;
}
final Object globalArray = Array.newInstance(componentType, numDocs);
for (int i = 0; i < slices.length; i++) {
DocValuesSlice slice = slices[i];
if (arrays[i] != null) {
assert slice.length == Array.getLength(arrays[i]);
System.arraycopy(arrays[i], 0, globalArray, slice.start,
slice.length);
}
}
return cachedArray = globalArray;
}
} catch (IOException e) {
throw new RuntimeException("load failed", e);
}
}
}
private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer {
private final static int PAGED_BYTES_BITS = 15;
final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS);
long[] ordToOffset;
public RecordingBytesRefConsumer(Type type) {
ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null;
}
@Override
public void consume(BytesRef ref, int ord, long offset) {
pagedBytes.copy(ref);
if (ordToOffset != null) {
if (ord+1 >= ordToOffset.length) {
ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
}
ordToOffset[ord+1] = offset;
}
}
}
private static final class MultiSortedSource extends SortedSource {
private final PagedBytes.Reader data;
private final int[] docToOrd;
private final long[] ordToOffset;
private int size;
private int valueCount;
public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) {
super(type, comparator);
data = pagedBytes.freeze(true);
this.size = size;
this.valueCount = numValues;
this.docToOrd = docToOrd;
this.ordToOffset = ordToOffset;
}
@Override
public int ord(int docID) {
return docToOrd[docID];
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
int size = this.size;
long offset = (ord*size);
if (ordToOffset != null) {
offset = ordToOffset[ord];
size = (int) (ordToOffset[1 + ord] - offset);
}
assert size >=0;
return data.fillSlice(bytesRef, offset, size);
}
@Override
public Reader getDocToOrd() {
return null;
}
@Override
public int getValueCount() {
return valueCount;
}
}
// TODO: this is dup of DocValues.getDefaultSource()?
private static class EmptySource extends SortedSource {
public EmptySource(Type type) {
super(type, BytesRef.getUTF8SortedAsUnicodeComparator());
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref.length = 0;
return ref;
}
@Override
public double getFloat(int docID) {
return 0d;
}
@Override
public long getInt(int docID) {
return 0;
}
@Override
public SortedSource asSortedSource() {
if (getType() == Type.BYTES_FIXED_SORTED || getType() == Type.BYTES_VAR_SORTED) {
}
return super.asSortedSource();
}
@Override
public int ord(int docID) {
return 0;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
bytesRef.length = 0;
bytesRef.offset = 0;
return bytesRef;
}
@Override
public Reader getDocToOrd() {
return null;
}
@Override
public int getValueCount() {
return 1;
}
}
private static class EmptyFixedSource extends EmptySource {
private final int valueSize;
private final byte[] valueArray;
public EmptyFixedSource(Type type, int valueSize) {
super(type);
this.valueSize = valueSize;
valueArray = new byte[valueSize];
}
@Override
public BytesRef getBytes(int docID, BytesRef ref) {
ref.grow(valueSize);
ref.length = valueSize;
Arrays.fill(ref.bytes, ref.offset, ref.offset+valueSize, (byte)0);
return ref;
}
@Override
public double getFloat(int docID) {
return 0d;
}
@Override
public long getInt(int docID) {
return 0;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
bytesRef.bytes = valueArray;
bytesRef.length = valueSize;
bytesRef.offset = 0;
return bytesRef;
}
}
@Override
public Type getType() {
return type;
}
@Override
public int getValueSize() {
return valueSize;
}
@Override
public Source getDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type);
}
}