/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import java.io.EOFException;
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LongBitSet;
/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}.
*
* @lucene.internal */
public final class OfflinePointReader extends PointReader {
long countLeft;
final IndexInput in;
private final byte[] packedValue;
final boolean singleValuePerDoc;
final int bytesPerDoc;
private long ord;
private int docID;
// true if ords are written as long (8 bytes), else 4 bytes
private boolean longOrds;
private boolean checked;
// File name we are reading
final String name;
public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
boolean longOrds, boolean singleValuePerDoc) throws IOException {
this.singleValuePerDoc = singleValuePerDoc;
int bytesPerDoc = packedBytesLength + Integer.BYTES;
if (singleValuePerDoc == false) {
if (longOrds) {
bytesPerDoc += Long.BYTES;
} else {
bytesPerDoc += Integer.BYTES;
}
}
this.bytesPerDoc = bytesPerDoc;
if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
}
// Best-effort checksumming:
if (start == 0 && length*bytesPerDoc == tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
// If we are going to read the entire file, e.g. because BKDWriter is now
// partitioning it, we open with checksums:
in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE);
} else {
// Since we are going to seek somewhere in the middle of a possibly huge
// file, and not read all bytes from there, don't use ChecksumIndexInput here.
// This is typically fine, because this same file will later be read fully,
// at another level of the BKDWriter recursion
in = tempDir.openInput(tempFileName, IOContext.READONCE);
}
name = tempFileName;
long seekFP = start * bytesPerDoc;
in.seek(seekFP);
countLeft = length;
packedValue = new byte[packedBytesLength];
this.longOrds = longOrds;
}
@Override
public boolean next() throws IOException {
if (countLeft >= 0) {
if (countLeft == 0) {
return false;
}
countLeft--;
}
try {
in.readBytes(packedValue, 0, packedValue.length);
} catch (EOFException eofe) {
assert countLeft == -1;
return false;
}
docID = in.readInt();
if (singleValuePerDoc == false) {
if (longOrds) {
ord = in.readLong();
} else {
ord = in.readInt();
}
} else {
ord = docID;
}
return true;
}
@Override
public byte[] packedValue() {
return packedValue;
}
@Override
public long ord() {
return ord;
}
@Override
public int docID() {
return docID;
}
@Override
public void close() throws IOException {
try {
if (countLeft == 0 && in instanceof ChecksumIndexInput && checked == false) {
//System.out.println("NOW CHECK: " + name);
checked = true;
CodecUtil.checkFooter((ChecksumIndexInput) in);
}
} finally {
in.close();
}
}
@Override
public void markOrds(long count, LongBitSet ordBitSet) throws IOException {
if (countLeft < count) {
throw new IllegalStateException("only " + countLeft + " points remain, but " + count + " were requested");
}
long fp = in.getFilePointer() + packedValue.length;
if (singleValuePerDoc == false) {
fp += Integer.BYTES;
}
for(long i=0;i<count;i++) {
in.seek(fp);
long ord;
if (longOrds) {
ord = in.readLong();
} else {
ord = in.readInt();
}
assert ordBitSet.get(ord) == false: "ord=" + ord + " i=" + i + " was seen twice from " + this;
ordBitSet.set(ord);
fp += bytesPerDoc;
}
}
@Override
public long split(long count, LongBitSet rightTree, PointWriter left, PointWriter right, boolean doClearBits) throws IOException {
if (left instanceof OfflinePointWriter == false ||
right instanceof OfflinePointWriter == false) {
return super.split(count, rightTree, left, right, doClearBits);
}
// We specialize the offline -> offline split since the default impl
// is somewhat wasteful otherwise (e.g. decoding docID when we don't
// need to)
int packedBytesLength = packedValue.length;
int bytesPerDoc = packedBytesLength + Integer.BYTES;
if (singleValuePerDoc == false) {
if (longOrds) {
bytesPerDoc += Long.BYTES;
} else {
bytesPerDoc += Integer.BYTES;
}
}
long rightCount = 0;
IndexOutput rightOut = ((OfflinePointWriter) right).out;
IndexOutput leftOut = ((OfflinePointWriter) left).out;
assert count <= countLeft: "count=" + count + " countLeft=" + countLeft;
countLeft -= count;
long countStart = count;
byte[] buffer = new byte[bytesPerDoc];
while (count > 0) {
in.readBytes(buffer, 0, buffer.length);
long ord;
if (longOrds) {
// A long ord, after the docID:
ord = readLong(buffer, packedBytesLength+Integer.BYTES);
} else if (singleValuePerDoc) {
// docID is the ord:
ord = readInt(buffer, packedBytesLength);
} else {
// An int ord, after the docID:
ord = readInt(buffer, packedBytesLength+Integer.BYTES);
}
if (rightTree.get(ord)) {
rightOut.writeBytes(buffer, 0, bytesPerDoc);
if (doClearBits) {
rightTree.clear(ord);
}
rightCount++;
} else {
leftOut.writeBytes(buffer, 0, bytesPerDoc);
}
count--;
}
((OfflinePointWriter) right).count = rightCount;
((OfflinePointWriter) left).count = countStart-rightCount;
return rightCount;
}
// Poached from ByteArrayDataInput:
private static long readLong(byte[] bytes, int pos) {
final int i1 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
final int i2 = ((bytes[pos++] & 0xff) << 24) | ((bytes[pos++] & 0xff) << 16) |
((bytes[pos++] & 0xff) << 8) | (bytes[pos++] & 0xff);
return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
}
// Poached from ByteArrayDataInput:
private static int readInt(byte[] bytes, int pos) {
return ((bytes[pos++] & 0xFF) << 24) | ((bytes[pos++] & 0xFF) << 16)
| ((bytes[pos++] & 0xFF) << 8) | (bytes[pos++] & 0xFF);
}
}