package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext; import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.packed.PackedInts.Reader; /** * A wrapper for CompositeIndexReader providing access to per segment * {@link DocValues} * * <p><b>NOTE</b>: for multi readers, you'll get better * performance by gathering the sub readers using * {@link IndexReader#getContext()} to get the * atomic leaves and then operate per-AtomicReader, * instead of using this class. * * @lucene.experimental * @lucene.internal */ class MultiDocValues extends DocValues { private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller(); private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() { public DocValues pull(AtomicReader reader, String field) throws IOException { return reader.normValues(field); } public boolean stopLoadingOnNull(AtomicReader reader, String field) { // for norms we drop all norms if one leaf reader has no norms and the field is present FieldInfos fieldInfos = reader.getFieldInfos(); FieldInfo fieldInfo = fieldInfos.fieldInfo(field); return fieldInfo != null && fieldInfo.omitsNorms(); } }; public static class DocValuesSlice { public final static DocValuesSlice[] EMPTY_ARRAY = new DocValuesSlice[0]; final int start; final int length; DocValues docValues; public DocValuesSlice(DocValues docValues, int start, int length) { this.docValues = docValues; this.start = start; this.length = length; } } private static class DocValuesPuller { public DocValuesPuller() {} public DocValues pull(AtomicReader reader, String field) throws IOException { return reader.docValues(field); } public boolean stopLoadingOnNull(AtomicReader reader, String field) { return false; } } private DocValuesSlice[] slices; private int[] starts; private Type type; private int valueSize; private MultiDocValues(DocValuesSlice[] slices, int[] starts, TypePromoter promotedType) { this.starts = starts; this.slices = slices; this.type = promotedType.type(); this.valueSize = promotedType.getValueSize(); } /** * Returns a single {@link DocValues} instance for this field, merging * their values on the fly. * * <p> * <b>NOTE</b>: this is a slow way to access DocValues. * It's better to get the sub-readers and iterate through them * yourself. */ public static DocValues getDocValues(IndexReader r, final String field) throws IOException { return getDocValues(r, field, DEFAULT_PULLER); } /** * Returns a single {@link DocValues} instance for this norms field, merging * their values on the fly. * * <p> * <b>NOTE</b>: this is a slow way to access DocValues. * It's better to get the sub-readers and iterate through them * yourself. */ public static DocValues getNormDocValues(IndexReader r, final String field) throws IOException { return getDocValues(r, field, NORMS_PULLER); } private static DocValues getDocValues(IndexReader reader, final String field, final DocValuesPuller puller) throws IOException { if (reader instanceof AtomicReader) { // already an atomic reader return puller.pull((AtomicReader) reader, field); } assert reader instanceof CompositeReader; final List<AtomicReaderContext> leaves = reader.leaves(); switch (leaves.size()) { case 0: // no fields return null; case 1: // already an atomic reader / reader with one leave return getDocValues(leaves.get(0).reader(), field, puller); default: final List<DocValuesSlice> slices = new ArrayList<DocValuesSlice>(); TypePromoter promotedType = TypePromoter.getIdentityPromoter(); // gather all docvalues fields, accumulating a promoted type across // potentially incompatible types for (final AtomicReaderContext ctx : leaves) { final AtomicReader r = ctx.reader(); final DocValues d = puller.pull(r, field); if (d != null) { TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize()); promotedType = promotedType.promote(incoming); } else if (puller.stopLoadingOnNull(r, field)){ return null; } slices.add(new DocValuesSlice(d, ctx.docBase, r.maxDoc())); } // return null if no docvalues encountered anywhere if (promotedType == TypePromoter.getIdentityPromoter()) { return null; } // populate starts and fill gaps with empty docvalues int starts[] = new int[slices.size()]; for (int i = 0; i < slices.size(); i++) { DocValuesSlice slice = slices.get(i); starts[i] = slice.start; if (slice.docValues == null) { Type promoted = promotedType.type(); switch(promoted) { case BYTES_FIXED_DEREF: case BYTES_FIXED_STRAIGHT: case BYTES_FIXED_SORTED: assert promotedType.getValueSize() >= 0; slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType.getValueSize()); break; default: slice.docValues = new EmptyDocValues(slice.length, promoted); } } } return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType); } } @Override public Source load() throws IOException { return new MultiSource(slices, starts, false, type); } public static class EmptyDocValues extends DocValues { final int maxDoc; final Source emptySource; public EmptyDocValues(int maxDoc, Type type) { this.maxDoc = maxDoc; this.emptySource = new EmptySource(type); } @Override public Source load() throws IOException { return emptySource; } @Override public Type getType() { return emptySource.getType(); } @Override public Source getDirectSource() throws IOException { return emptySource; } } public static class EmptyFixedDocValues extends DocValues { final int maxDoc; final Source emptyFixedSource; final int valueSize; public EmptyFixedDocValues(int maxDoc, Type type, int valueSize) { this.maxDoc = maxDoc; this.emptyFixedSource = new EmptyFixedSource(type, valueSize); this.valueSize = valueSize; } @Override public Source load() throws IOException { return emptyFixedSource; } @Override public Type getType() { return emptyFixedSource.getType(); } @Override public int getValueSize() { return valueSize; } @Override public Source getDirectSource() throws IOException { return emptyFixedSource; } } private static class MultiSource extends Source { private int numDocs = 0; private int start = 0; private Source current; private final int[] starts; private final DocValuesSlice[] slices; private boolean direct; private Object cachedArray; // cached array if supported public MultiSource(DocValuesSlice[] slices, int[] starts, boolean direct, Type type) { super(type); this.slices = slices; this.starts = starts; assert slices.length != 0; this.direct = direct; } public long getInt(int docID) { final int doc = ensureSource(docID); return current.getInt(doc); } private final int ensureSource(int docID) { if (docID >= start && docID < start+numDocs) { return docID - start; } else { final int idx = ReaderUtil.subIndex(docID, starts); assert idx >= 0 && idx < slices.length : "idx was " + idx + " for doc id: " + docID + " slices : " + Arrays.toString(starts); assert slices[idx] != null; try { if (direct) { current = slices[idx].docValues.getDirectSource(); } else { current = slices[idx].docValues.getSource(); } } catch (IOException e) { throw new RuntimeException("load failed", e); // TODO how should we // handle this } start = slices[idx].start; numDocs = slices[idx].length; return docID - start; } } public double getFloat(int docID) { final int doc = ensureSource(docID); return current.getFloat(doc); } public BytesRef getBytes(int docID, BytesRef bytesRef) { final int doc = ensureSource(docID); return current.getBytes(doc, bytesRef); } @Override public SortedSource asSortedSource() { try { if (type == Type.BYTES_FIXED_SORTED || type == Type.BYTES_VAR_SORTED) { DocValues[] values = new DocValues[slices.length]; Comparator<BytesRef> comp = null; for (int i = 0; i < values.length; i++) { values[i] = slices[i].docValues; if (!(values[i] instanceof EmptyDocValues)) { Comparator<BytesRef> comparator = values[i].getDirectSource() .asSortedSource().getComparator(); assert comp == null || comp == comparator; comp = comparator; } } assert comp != null; final int globalNumDocs = globalNumDocs(); final MergeContext ctx = SortedBytesMergeUtils.init(type, values, comp, globalNumDocs); List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices( docBases(), new MergeState.DocMap[values.length], values, ctx); RecordingBytesRefConsumer consumer = new RecordingBytesRefConsumer( type); final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, consumer, slices); final int[] docToOrd = new int[globalNumDocs]; for (SortedSourceSlice slice : slices) { slice.toAbsolutOrds(docToOrd); } return new MultiSortedSource(type, comp, consumer.pagedBytes, ctx.sizePerValues, maxOrd, docToOrd, consumer.ordToOffset); } } catch (IOException e) { throw new RuntimeException("load failed", e); } return super.asSortedSource(); } private int globalNumDocs() { int docs = 0; for (int i = 0; i < slices.length; i++) { docs += slices[i].length; } return docs; } private int[] docBases() { int[] docBases = new int[slices.length]; for (int i = 0; i < slices.length; i++) { docBases[i] = slices[i].start; } return docBases; } public boolean hasArray() { boolean oneRealSource = false; for (DocValuesSlice slice : slices) { try { Source source = slice.docValues.getSource(); if (source instanceof EmptySource) { /* * empty source marks a gap in the array skip if we encounter one */ continue; } oneRealSource = true; if (!source.hasArray()) { return false; } } catch (IOException e) { throw new RuntimeException("load failed", e); } } return oneRealSource; } @Override public Object getArray() { if (!hasArray()) { return null; } try { Class<?> componentType = null; Object[] arrays = new Object[slices.length]; int numDocs = 0; for (int i = 0; i < slices.length; i++) { DocValuesSlice slice = slices[i]; Source source = slice.docValues.getSource(); Object array = null; if (!(source instanceof EmptySource)) { // EmptySource is skipped - marks a gap in the array array = source.getArray(); } numDocs += slice.length; if (array != null) { if (componentType == null) { componentType = array.getClass().getComponentType(); } assert componentType == array.getClass().getComponentType(); } arrays[i] = array; } assert componentType != null; synchronized (this) { if (cachedArray != null) { return cachedArray; } final Object globalArray = Array.newInstance(componentType, numDocs); for (int i = 0; i < slices.length; i++) { DocValuesSlice slice = slices[i]; if (arrays[i] != null) { assert slice.length == Array.getLength(arrays[i]); System.arraycopy(arrays[i], 0, globalArray, slice.start, slice.length); } } return cachedArray = globalArray; } } catch (IOException e) { throw new RuntimeException("load failed", e); } } } private static final class RecordingBytesRefConsumer implements SortedBytesMergeUtils.BytesRefConsumer { private final static int PAGED_BYTES_BITS = 15; final PagedBytes pagedBytes = new PagedBytes(PAGED_BYTES_BITS); long[] ordToOffset; public RecordingBytesRefConsumer(Type type) { ordToOffset = type == Type.BYTES_VAR_SORTED ? new long[2] : null; } @Override public void consume(BytesRef ref, int ord, long offset) { pagedBytes.copy(ref); if (ordToOffset != null) { if (ord+1 >= ordToOffset.length) { ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2); } ordToOffset[ord+1] = offset; } } } private static final class MultiSortedSource extends SortedSource { private final PagedBytes.Reader data; private final int[] docToOrd; private final long[] ordToOffset; private int size; private int valueCount; public MultiSortedSource(Type type, Comparator<BytesRef> comparator, PagedBytes pagedBytes, int size, int numValues, int[] docToOrd, long[] ordToOffset) { super(type, comparator); data = pagedBytes.freeze(true); this.size = size; this.valueCount = numValues; this.docToOrd = docToOrd; this.ordToOffset = ordToOffset; } @Override public int ord(int docID) { return docToOrd[docID]; } @Override public BytesRef getByOrd(int ord, BytesRef bytesRef) { int size = this.size; long offset = (ord*size); if (ordToOffset != null) { offset = ordToOffset[ord]; size = (int) (ordToOffset[1 + ord] - offset); } assert size >=0; return data.fillSlice(bytesRef, offset, size); } @Override public Reader getDocToOrd() { return null; } @Override public int getValueCount() { return valueCount; } } // TODO: this is dup of DocValues.getDefaultSource()? private static class EmptySource extends SortedSource { public EmptySource(Type type) { super(type, BytesRef.getUTF8SortedAsUnicodeComparator()); } @Override public BytesRef getBytes(int docID, BytesRef ref) { ref.length = 0; return ref; } @Override public double getFloat(int docID) { return 0d; } @Override public long getInt(int docID) { return 0; } @Override public SortedSource asSortedSource() { if (getType() == Type.BYTES_FIXED_SORTED || getType() == Type.BYTES_VAR_SORTED) { } return super.asSortedSource(); } @Override public int ord(int docID) { return 0; } @Override public BytesRef getByOrd(int ord, BytesRef bytesRef) { bytesRef.length = 0; bytesRef.offset = 0; return bytesRef; } @Override public Reader getDocToOrd() { return null; } @Override public int getValueCount() { return 1; } } private static class EmptyFixedSource extends EmptySource { private final int valueSize; private final byte[] valueArray; public EmptyFixedSource(Type type, int valueSize) { super(type); this.valueSize = valueSize; valueArray = new byte[valueSize]; } @Override public BytesRef getBytes(int docID, BytesRef ref) { ref.grow(valueSize); ref.length = valueSize; Arrays.fill(ref.bytes, ref.offset, ref.offset+valueSize, (byte)0); return ref; } @Override public double getFloat(int docID) { return 0d; } @Override public long getInt(int docID) { return 0; } @Override public BytesRef getByOrd(int ord, BytesRef bytesRef) { bytesRef.bytes = valueArray; bytesRef.length = valueSize; bytesRef.offset = 0; return bytesRef; } } @Override public Type getType() { return type; } @Override public int getValueSize() { return valueSize; } @Override public Source getDirectSource() throws IOException { return new MultiSource(slices, starts, true, type); } }