package org.apache.blur.lucene.codec; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import org.apache.blur.trace.Trace; import org.apache.blur.trace.Tracer; import org.apache.blur.utils.ThreadValue; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.BlockPackedReader; import org.apache.lucene.util.packed.MonotonicBlockPackedReader; class DiskDocValuesProducer extends DocValuesProducer { private final Map<Integer, NumericEntry> numerics; private final Map<Integer, BinaryEntry> binaries; private final Map<Integer, NumericEntry> ords; private final Map<Integer, NumericEntry> ordIndexes; private final Map<Integer, BinaryDocValues> _binaryDocValuesCache; private final Map<Integer, NumericDocValues> _numericDocValuesCache; private final Map<Integer, SortedDocValues> _sortedDocValuesCache; private final Map<Integer, SortedSetDocValues> _sortedSetDocValuesCache; private final IndexInput data; private final boolean _cache = true; DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); // read in the entries from the metadata file. IndexInput in = state.directory.openInput(metaName, state.context); boolean success = false; try { CodecUtil.checkHeader(in, metaCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START); numerics = new ConcurrentHashMap<Integer, NumericEntry>(); ords = new ConcurrentHashMap<Integer, NumericEntry>(); ordIndexes = new ConcurrentHashMap<Integer, NumericEntry>(); binaries = new ConcurrentHashMap<Integer, BinaryEntry>(); _binaryDocValuesCache = new ConcurrentHashMap<Integer, BinaryDocValues>(); _numericDocValuesCache = new ConcurrentHashMap<Integer, NumericDocValues>(); _sortedDocValuesCache = new ConcurrentHashMap<Integer, SortedDocValues>(); _sortedSetDocValuesCache = new ConcurrentHashMap<Integer, SortedSetDocValues>(); readFields(in, state.fieldInfos); success = true; } finally { if (success) { IOUtils.close(in); } else { IOUtils.closeWhileHandlingException(in); } } String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); data = state.directory.openInput(dataName, state.context); CodecUtil.checkHeader(data, dataCodec, DiskDocValuesFormat.VERSION_START, DiskDocValuesFormat.VERSION_START); } private void readFields(IndexInput meta, FieldInfos infos) throws IOException { int fieldNumber = meta.readVInt(); while (fieldNumber != -1) { byte type = meta.readByte(); if (type == DiskDocValuesFormat.NUMERIC) { numerics.put(fieldNumber, readNumericEntry(meta)); } else if (type == DiskDocValuesFormat.BINARY) { BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); } else if (type == DiskDocValuesFormat.SORTED) { // sorted = binary + numeric if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt"); } if (meta.readByte() != DiskDocValuesFormat.BINARY) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt"); } if (meta.readByte() != DiskDocValuesFormat.NUMERIC) { throw new CorruptIndexException("sorted entry for field: " + fieldNumber + " is corrupt"); } NumericEntry n = readNumericEntry(meta); ords.put(fieldNumber, n); } else if (type == DiskDocValuesFormat.SORTED_SET) { // sortedset = binary + numeric + ordIndex if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } if (meta.readByte() != DiskDocValuesFormat.BINARY) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } BinaryEntry b = readBinaryEntry(meta); binaries.put(fieldNumber, b); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } if (meta.readByte() != DiskDocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } NumericEntry n1 = readNumericEntry(meta); ords.put(fieldNumber, n1); if (meta.readVInt() != fieldNumber) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } if (meta.readByte() != DiskDocValuesFormat.NUMERIC) { throw new CorruptIndexException("sortedset entry for field: " + fieldNumber + " is corrupt"); } NumericEntry n2 = readNumericEntry(meta); ordIndexes.put(fieldNumber, n2); } else { throw new CorruptIndexException("invalid type: " + type + ", resource=" + meta); } fieldNumber = meta.readVInt(); } } static NumericEntry readNumericEntry(IndexInput meta) throws IOException { NumericEntry entry = new NumericEntry(); entry.packedIntsVersion = meta.readVInt(); entry.offset = meta.readLong(); entry.count = meta.readVLong(); entry.blockSize = meta.readVInt(); return entry; } static BinaryEntry readBinaryEntry(IndexInput meta) throws IOException { BinaryEntry entry = new BinaryEntry(); entry.minLength = meta.readVInt(); entry.maxLength = meta.readVInt(); entry.count = meta.readVLong(); entry.offset = meta.readLong(); if (entry.minLength != entry.maxLength) { entry.addressesOffset = meta.readLong(); entry.packedIntsVersion = meta.readVInt(); entry.blockSize = meta.readVInt(); } return entry; } @Override public NumericDocValues getNumeric(FieldInfo field) throws IOException { NumericDocValues numericDocValues = _numericDocValuesCache.get(field.number); if (numericDocValues != null) { return numericDocValues; } synchronized (_numericDocValuesCache) { numericDocValues = _numericDocValuesCache.get(field.number); if (numericDocValues != null) { return numericDocValues; } NumericEntry entry = numerics.get(field.number); numericDocValues = newNumeric(entry); if (_cache && numericDocValues != null) { _numericDocValuesCache.put(field.number, numericDocValues); } return numericDocValues; } } LongNumericDocValues newNumeric(NumericEntry entry) throws IOException { final IndexInput data = this.data.clone(); data.seek(entry.offset); final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true); return new LongNumericDocValues() { @Override public long get(long id) { return reader.get(id); } }; } @Override public BinaryDocValues getBinary(FieldInfo field) throws IOException { BinaryDocValues binaryDocValues = _binaryDocValuesCache.get(field.number); if (binaryDocValues != null) { return binaryDocValues; } synchronized (_binaryDocValuesCache) { binaryDocValues = _binaryDocValuesCache.get(field.number); if (binaryDocValues != null) { return binaryDocValues; } binaryDocValues = newBinary(field); if (_cache && binaryDocValues != null) { _binaryDocValuesCache.put(field.number, binaryDocValues); } return binaryDocValues; } } private BinaryDocValues newBinary(FieldInfo field) throws IOException { BinaryEntry bytes = binaries.get(field.number); if (bytes.minLength == bytes.maxLength) { return getFixedBinary(field, bytes); } else { return getVariableBinary(field, bytes); } } private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) { final IndexInput data = this.data.clone(); return new LongBinaryDocValues() { private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() { @Override protected IndexInput initialValue() { return data.clone(); } }; @Override public void get(long id, BytesRef result) { long address = bytes.offset + id * bytes.maxLength; try { IndexInput indexInput = in.get(); indexInput.seek(address); // NOTE: we could have one buffer, but various consumers (e.g. // FieldComparatorSource) // assume "they" own the bytes after calling this! final byte[] buffer = new byte[bytes.maxLength]; indexInput.readBytes(buffer, 0, buffer.length); result.bytes = buffer; result.offset = 0; result.length = buffer.length; } catch (IOException e) { throw new RuntimeException(e); } } }; } private BinaryDocValues getVariableBinary(FieldInfo field, final BinaryEntry bytes) throws IOException { final IndexInput data = this.data.clone(); Tracer trace = Trace.trace("getSorted - BlockPackedReader - create"); final MonotonicBlockPackedReader addresses; try { data.seek(bytes.addressesOffset); addresses = new MonotonicBlockPackedReader(data, bytes.packedIntsVersion, bytes.blockSize, bytes.count, true); } finally { trace.done(); } return new LongBinaryDocValues() { private final ThreadValue<IndexInput> _input = new ThreadValue<IndexInput>() { @Override protected IndexInput initialValue() { return data.clone(); } }; @Override public void get(long id, BytesRef result) { long startAddress = bytes.offset + (id == 0 ? 0 : addresses.get(id - 1)); long endAddress = bytes.offset + addresses.get(id); int length = (int) (endAddress - startAddress); try { IndexInput indexInput = _input.get(); indexInput.seek(startAddress); // NOTE: we could have one buffer, but various consumers (e.g. // FieldComparatorSource) // assume "they" own the bytes after calling this! final byte[] buffer = new byte[length]; indexInput.readBytes(buffer, 0, buffer.length); result.bytes = buffer; result.offset = 0; result.length = length; } catch (IOException e) { throw new RuntimeException(e); } } }; } @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { SortedDocValues sortedDocValues = _sortedDocValuesCache.get(field.number); if (sortedDocValues != null) { return sortedDocValues; } synchronized (_sortedDocValuesCache) { sortedDocValues = _sortedDocValuesCache.get(field.number); if (sortedDocValues != null) { return sortedDocValues; } sortedDocValues = newSortedDocValues(field); if (_cache && sortedDocValues != null) { _sortedDocValuesCache.put(field.number, sortedDocValues); } return sortedDocValues; } } private SortedDocValues newSortedDocValues(FieldInfo field) throws IOException { final int valueCount = (int) binaries.get(field.number).count; final BinaryDocValues binary = getBinary(field); Tracer trace = Trace.trace("getSorted - BlockPackedReader - create"); final BlockPackedReader ordinals; try { NumericEntry entry = ords.get(field.number); IndexInput data = this.data.clone(); data.seek(entry.offset); ordinals = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true); } finally { trace.done(); } return new SortedDocValues() { @Override public int getOrd(int docID) { return (int) ordinals.get(docID); } @Override public void lookupOrd(int ord, BytesRef result) { binary.get(ord, result); } @Override public int getValueCount() { return valueCount; } }; } @Override public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { SortedSetDocValues sortedSetDocValues = _sortedSetDocValuesCache.get(field.number); if (sortedSetDocValues != null) { return sortedSetDocValues; } synchronized (_sortedSetDocValuesCache) { sortedSetDocValues = _sortedSetDocValuesCache.get(field.number); if (sortedSetDocValues != null) { return sortedSetDocValues; } sortedSetDocValues = newSortedSetDocValues(field); if (_cache && sortedSetDocValues != null) { _sortedSetDocValuesCache.put(field.number, sortedSetDocValues); } return sortedSetDocValues; } } private SortedSetDocValues newSortedSetDocValues(FieldInfo field) throws IOException { final long valueCount = binaries.get(field.number).count; // we keep the byte[]s and list of ords on disk, these could be large final LongBinaryDocValues binary = (LongBinaryDocValues) getBinary(field); final LongNumericDocValues ordinals = newNumeric(ords.get(field.number)); Tracer trace = Trace.trace("getSortedSet - MonotonicBlockPackedReader - create"); final MonotonicBlockPackedReader ordIndex; try { NumericEntry entry = ordIndexes.get(field.number); IndexInput data = this.data.clone(); data.seek(entry.offset); ordIndex = new MonotonicBlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true); } finally { trace.done(); } return new SortedSetDocValues() { long offset; long endOffset; @Override public long nextOrd() { if (offset == endOffset) { return NO_MORE_ORDS; } else { long ord = ordinals.get(offset); offset++; return ord; } } @Override public void setDocument(int docID) { offset = (docID == 0 ? 0 : ordIndex.get(docID - 1)); endOffset = ordIndex.get(docID); } @Override public void lookupOrd(long ord, BytesRef result) { binary.get(ord, result); } @Override public long getValueCount() { return valueCount; } }; } @Override public void close() throws IOException { data.close(); } static class NumericEntry { long offset; int packedIntsVersion; long count; int blockSize; } static class BinaryEntry { long offset; long count; int minLength; int maxLength; long addressesOffset; int packedIntsVersion; int blockSize; } // internally we compose complex dv (sorted/sortedset) from other ones static abstract class LongNumericDocValues extends NumericDocValues { @Override public final long get(int docID) { return get((long) docID); } abstract long get(long id); } static abstract class LongBinaryDocValues extends BinaryDocValues { @Override public final void get(int docID, BytesRef result) { get((long) docID, result); } abstract void get(long id, BytesRef Result); } }