package org.apache.lucene.codecs.lucene40.values;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSortedSourceBase;
import org.apache.lucene.codecs.lucene40.values.Bytes.DerefBytesWriterBase;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedBytesMergeUtils;
import org.apache.lucene.index.DocValues.SortedSource;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.SortedBytesMergeUtils.IndexOutputBytesRefConsumer;
import org.apache.lucene.index.SortedBytesMergeUtils.MergeContext;
import org.apache.lucene.index.SortedBytesMergeUtils.SortedSourceSlice;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
// Stores variable-length byte[] by deref, ie when two docs
// have the same value, they store only 1 byte[] and both
// docs reference that single source
/**
* @lucene.experimental
*/
final class VarSortedBytesImpl {
static final String CODEC_NAME_IDX = "VarDerefBytesIdx";
static final String CODEC_NAME_DAT = "VarDerefBytesDat";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
final static class Writer extends DerefBytesWriterBase {
private final Comparator<BytesRef> comp;
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
Counter bytesUsed, IOContext context, float acceptableOverheadRatio) {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_CURRENT, bytesUsed, context, acceptableOverheadRatio, Type.BYTES_VAR_SORTED);
this.comp = comp;
size = 0;
}
@Override
public void merge(MergeState mergeState, DocValues[] docValues)
throws IOException {
boolean success = false;
try {
MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.segmentInfo.getDocCount());
final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
IndexOutput datOut = getOrCreateDataOut();
ctx.offsets = new long[1];
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final long[] offsets = ctx.offsets;
maxBytes = offsets[maxOrd-1];
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeLong(maxBytes);
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
offsetWriter.add(0);
for (int i = 0; i < maxOrd; i++) {
offsetWriter.add(offsets[i]);
}
offsetWriter.finish();
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter);
}
ordsWriter.finish();
success = true;
} finally {
releaseResources();
if (success) {
IOUtils.close(getIndexOut(), getDataOut());
} else {
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
}
}
}
@Override
protected void checkSize(BytesRef bytes) {
// allow var bytes sizes
}
// Important that we get docCount, in case there were
// some last docs that we didn't see
@Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final int count = hash.size();
final IndexOutput datOut = getOrCreateDataOut();
final IndexOutput idxOut = getOrCreateIndexOut();
long offset = 0;
final int[] index = new int[count];
final int[] sortedEntries = hash.sort(comp);
// total bytes of data
idxOut.writeLong(maxBytes);
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
// first dump bytes data, recording index & write offset as
// we go
final BytesRef spare = new BytesRef();
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
offsetWriter.add(offset);
index[e] = i;
final BytesRef bytes = hash.get(e, spare);
// TODO: we could prefix code...
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
offset += bytes.length;
}
// write sentinel
offsetWriter.add(offset);
offsetWriter.finish();
// write index
writeIndex(idxOut, docCount, count, index, docToEntry);
}
}
public static class Reader extends BytesReaderBase {
private final Comparator<BytesRef> comparator;
Reader(Directory dir, String id, int maxDoc,
IOContext context, Type type, Comparator<BytesRef> comparator)
throws IOException {
super(dir, id, CODEC_NAME_IDX, CODEC_NAME_DAT, VERSION_START, true, context, type);
this.comparator = comparator;
}
@Override
public org.apache.lucene.index.DocValues.Source load()
throws IOException {
return new VarSortedSource(cloneData(), cloneIndex(), comparator);
}
@Override
public Source getDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
}
}
private static final class VarSortedSource extends BytesSortedSourceBase {
private final int valueCount;
VarSortedSource(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comp) throws IOException {
super(datIn, idxIn, comp, idxIn.readLong(), Type.BYTES_VAR_SORTED, true);
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
closeIndexInput();
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
final long offset = ordToOffsetIndex.get(ord);
final long nextOffset = ordToOffsetIndex.get(1 + ord);
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
return bytesRef;
}
@Override
public int getValueCount() {
return valueCount;
}
}
private static final class DirectSortedSource extends SortedSource {
private final PackedInts.Reader docToOrdIndex;
private final PackedInts.Reader ordToOffsetIndex;
private final IndexInput datIn;
private final long basePointer;
private final int valueCount;
DirectSortedSource(IndexInput datIn, IndexInput idxIn,
Comparator<BytesRef> comparator, Type type) throws IOException {
super(type, comparator);
idxIn.readLong();
ordToOffsetIndex = PackedInts.getDirectReader(idxIn);
valueCount = ordToOffsetIndex.size()-1; // the last value here is just a dummy value to get the length of the last value
// advance this iterator to the end and clone the stream once it points to the docToOrdIndex header
ordToOffsetIndex.get(valueCount);
docToOrdIndex = PackedInts.getDirectReader(idxIn.clone()); // read the ords in to prevent too many random disk seeks
basePointer = datIn.getFilePointer();
this.datIn = datIn;
}
@Override
public int ord(int docID) {
return (int) docToOrdIndex.get(docID);
}
@Override
public boolean hasPackedDocToOrd() {
return true;
}
@Override
public PackedInts.Reader getDocToOrd() {
return docToOrdIndex;
}
@Override
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
try {
final long offset = ordToOffsetIndex.get(ord);
// 1+ord is safe because we write a sentinel at the end
final long nextOffset = ordToOffsetIndex.get(1+ord);
datIn.seek(basePointer + offset);
final int length = (int) (nextOffset - offset);
bytesRef.offset = 0;
bytesRef.grow(length);
datIn.readBytes(bytesRef.bytes, 0, length);
bytesRef.length = length;
return bytesRef;
} catch (IOException ex) {
throw new IllegalStateException("failed", ex);
}
}
@Override
public int getValueCount() {
return valueCount;
}
}
}