/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.index.codecs.siren10;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.sindice.siren.index.codecs.block.BlockDecompressor;
import org.sindice.siren.index.codecs.block.BlockIndexInput;
import org.sindice.siren.util.ArrayUtils;
/**
* Implementation of the {@link BlockIndexInput} for the .doc file of the SIREn
* postings format.
*/
public class DocsFreqBlockIndexInput extends BlockIndexInput {
protected BlockDecompressor docDecompressor;
protected BlockDecompressor freqDecompressor;
public DocsFreqBlockIndexInput(final IndexInput in,
final BlockDecompressor docDecompressor,
final BlockDecompressor freqDecompressor)
throws IOException {
super(in);
this.docDecompressor = docDecompressor;
this.freqDecompressor = freqDecompressor;
}
@Override
public DocsFreqBlockReader getBlockReader() {
// Clone index input. A cloned index input does not need to be closed
// by the block reader, as the underlying stream will be closed by the
// input it was cloned from
return new DocsFreqBlockReader(in.clone());
}
/**
* Implementation of the {@link BlockReader} for the .doc file.
*
* <p>
*
* Read and decode blocks containing the document identifiers and the node
* frequencies. It also decodes the pointers of the associated
* blocks from the .nod and .pos files and updates the specified
* {@link Index}s.
*/
public class DocsFreqBlockReader extends BlockReader {
protected int blockSize;
IntsRef docBuffer = new IntsRef();
IntsRef nodFreqBuffer = new IntsRef();
boolean docsReadPending = true;
boolean nodFreqsReadPending = true;
int docCompressedBufferLength;
int nodFreqCompressedBufferLength;
BytesRef docCompressedBuffer = new BytesRef();
BytesRef nodFreqCompressedBuffer = new BytesRef();
int firstDocId, lastDocId;
long dataBlockOffset = -1;
NodBlockIndexInput.Index nodeBlockIndex;
PosBlockIndexInput.Index posBlockIndex;
private DocsFreqBlockReader(final IndexInput in) {
super(in);
// ensure that the output buffers have the minimum size required
docBuffer = ArrayUtils.grow(docBuffer, docDecompressor.getWindowSize());
nodFreqBuffer = ArrayUtils.grow(nodFreqBuffer, freqDecompressor.getWindowSize());
}
/**
* Set the {@link Index} of the {@link NodBlockIndexInput}. The
* {@link Index} is used to update the current file pointer of the
* {@link NodBlockIndexInput} when decoding a block.
*/
public void setNodeBlockIndex(final NodBlockIndexInput.Index index) throws IOException {
this.nodeBlockIndex = index;
}
/**
* Set the {@link Index} of the {@link PosBlockIndexInput}. The
* {@link Index} is used to update the current file pointer of the
* {@link PosBlockIndexInput} when decoding a block.
*/
public void setPosBlockIndex(final PosBlockIndexInput.Index index) throws IOException {
this.posBlockIndex = index;
}
@Override
protected void readHeader() throws IOException {
// logger.debug("Read DocFreq header: {}", this.hashCode());
// logger.debug("DocFreq header start at {}", in.getFilePointer());
// read blockSize and check buffer size
blockSize = in.readVInt();
// ensure that the output buffers has the minimum size required
final int docBufferLength = this.getMinimumBufferSize(blockSize, docDecompressor.getWindowSize());
docBuffer = ArrayUtils.grow(docBuffer, docBufferLength);
final int nodFreqBufferLength = this.getMinimumBufferSize(blockSize, freqDecompressor.getWindowSize());
nodFreqBuffer = ArrayUtils.grow(nodFreqBuffer, nodFreqBufferLength);
// read size of each compressed data block and check buffer size
docCompressedBufferLength = in.readVInt();
docCompressedBuffer = ArrayUtils.grow(docCompressedBuffer, docCompressedBufferLength);
docsReadPending = true;
nodFreqCompressedBufferLength = in.readVInt();
nodFreqCompressedBuffer = ArrayUtils.grow(nodFreqCompressedBuffer, nodFreqCompressedBufferLength);
nodFreqsReadPending = true;
// read first and last doc id
firstDocId = in.readVInt();
lastDocId = firstDocId + in.readVInt();
// read node and pos skip data
nodeBlockIndex.read(in, true);
posBlockIndex.read(in, true);
// record file pointer as data block offset for skipping
dataBlockOffset = in.getFilePointer();
}
@Override
protected void skipData() {
int size = docCompressedBufferLength;
size += nodFreqCompressedBufferLength;
this.seek(dataBlockOffset + size);
// logger.debug("Skip DocFreq data: {}", dataBlockOffset + size);
}
private void decodeDocs() throws IOException {
// logger.debug("Decode Doc block: {}", this.hashCode());
in.seek(dataBlockOffset); // skip to doc data block
in.readBytes(docCompressedBuffer.bytes, 0, docCompressedBufferLength);
docCompressedBuffer.offset = 0;
docCompressedBuffer.length = docCompressedBufferLength;
docDecompressor.decompress(docCompressedBuffer, docBuffer);
// set length limit based on block size, as certain decompressor with
// large window size can set it larger than the blockSize, e.g., AFor
docBuffer.length = blockSize;
docsReadPending = false;
}
private void decodeNodeFreqs() throws IOException {
// logger.debug("Decode Node Freqs block: {}", this.hashCode());
in.seek(dataBlockOffset + docCompressedBufferLength); // skip to node freq data block
in.readBytes(nodFreqCompressedBuffer.bytes, 0, nodFreqCompressedBufferLength);
nodFreqCompressedBuffer.offset = 0;
nodFreqCompressedBuffer.length = nodFreqCompressedBufferLength;
freqDecompressor.decompress(nodFreqCompressedBuffer, nodFreqBuffer);
// set length limit based on block size, as certain decompressor with
// large window size can set it larger than the blockSize, e.g., AFor
nodFreqBuffer.length = blockSize;
nodFreqsReadPending = false;
}
/**
* Return the first document identifier of the current block.
*/
public int getFirstDocId() {
return firstDocId;
}
/**
* Return the last document identifier of the current block.
*/
public int getLastDocId() {
return lastDocId;
}
private int currentDocId;
/**
* Decode and return the next document identifier of the current block.
*/
public int nextDocument() throws IOException {
if (!docsReadPending) {
// decode delta and increment by one
currentDocId += docBuffer.ints[docBuffer.offset++] + 1;
return currentDocId;
}
// if new block, first value is always equal to 0, no delta decoding
this.decodeDocs();
// set current doc with first doc
currentDocId = firstDocId;
// increment doc buffer offset to skip first value (== 0)
docBuffer.offset++;
return currentDocId;
}
/**
* Decode and return the next node frequency of the current block.
*/
public int nextNodeFreq() throws IOException {
if (nodFreqsReadPending) {
this.decodeNodeFreqs();
}
// Increment freq
return nodFreqBuffer.ints[nodFreqBuffer.offset++] + 1;
}
@Override
public boolean isExhausted() {
return docBuffer.offset >= docBuffer.length;
}
@Override
protected void initBlock() {
docBuffer.offset = docBuffer.length = 0;
nodFreqBuffer.offset = nodFreqBuffer.length = 0;
docsReadPending = true;
nodFreqsReadPending = true;
docCompressedBufferLength = 0;
nodFreqCompressedBufferLength = 0;
dataBlockOffset = -1;
}
}
}