/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.index.codecs.siren10; import java.io.IOException; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.sindice.siren.index.codecs.block.BlockCompressor; import org.sindice.siren.index.codecs.block.BlockIndexOutput; import org.sindice.siren.index.codecs.siren10.NodBlockIndexOutput.NodBlockWriter; import org.sindice.siren.index.codecs.siren10.PosBlockIndexOutput.PosBlockWriter; /** * Implementation of the {@link BlockIndexOutput} for the .doc file of the SIREn * postings format. */ public class DocsFreqBlockIndexOutput extends BlockIndexOutput { private final int maxBlockSize; private final BlockCompressor docCompressor; private final BlockCompressor freqCompressor; public DocsFreqBlockIndexOutput(final IndexOutput out, final int maxBlockSize, final BlockCompressor docCompressor, final BlockCompressor freqCompressor) throws IOException { super(out); this.docCompressor = docCompressor; this.freqCompressor = freqCompressor; this.maxBlockSize = maxBlockSize; } @Override public DocsFreqBlockWriter getBlockWriter() { return new DocsFreqBlockWriter(); } /** * Implementation of the {@link BlockWriter} for the .doc file. * * <p> * * Encode and write blocks containing the document identifiers and the node * frequencies. It also encodes the pointers of the associated blocks from * the .nod and .pos files into the block header. * * <p> * * This class must be associated to the {@link Index}s of the * {@link NodBlockWriter} and {@link PosBlockWriter} using * {@link #setNodeBlockIndex(Index)} and {@link #setPosBlockIndex(Index)}. */ public class DocsFreqBlockWriter extends BlockWriter { IntsRef docBuffer; IntsRef nodFreqBuffer; int firstDocId, lastDocId = 0; NodBlockIndexOutput.Index nodeBlockIndex; PosBlockIndexOutput.Index posBlockIndex; BytesRef docCompressedBuffer; BytesRef nodFreqCompressedBuffer; public DocsFreqBlockWriter() { // ensure that the input buffers has the minimum size required docBuffer = new IntsRef(this.getMinimumBufferSize(maxBlockSize, docCompressor.getWindowSize())); nodFreqBuffer = new IntsRef(this.getMinimumBufferSize(maxBlockSize, freqCompressor.getWindowSize())); // determine max size of compressed buffer to avoid overflow int size = docCompressor.maxCompressedSize(maxBlockSize); docCompressedBuffer = new BytesRef(size); size = freqCompressor.maxCompressedSize(maxBlockSize); nodFreqCompressedBuffer = new BytesRef(size); } public int getMaxBlockSize() { return maxBlockSize; } public int getFirstDocId() { return firstDocId; } /** * Set the {@link Index} of the {@link NodBlockIndexOutput}. The * {@link Index} is used to read the current file pointer of the * {@link NodBlockIndexOutput} when flushing a block. */ public void setNodeBlockIndex(final NodBlockIndexOutput.Index index) throws IOException { this.nodeBlockIndex = index; } /** * Set the {@link Index} of the {@link PosBlockIndexOutput}. The * {@link Index} is used to read the current file pointer of the * {@link PosBlockIndexOutput} when flushing a block. */ public void setPosBlockIndex(final PosBlockIndexOutput.Index index) throws IOException { this.posBlockIndex = index; } /** * Add a document identifier to the buffer. */ public void write(final int docId) throws IOException { int delta; // compute delta - first value in the block is always 0 if (docBuffer.offset != 0) { assert docId > lastDocId; // encode delta and decrement by one delta = docId - lastDocId - 1; } else { delta = 0; firstDocId = docId; } // copy delta to buffer docBuffer.ints[docBuffer.offset++] = delta; // update last doc id lastDocId = docId; } /** * Add a node frenquency to the buffer. */ public void writeNodeFreq(final int nodeFreqInDoc) { // decrement freq by one nodFreqBuffer.ints[nodFreqBuffer.offset++] = nodeFreqInDoc - 1; } @Override public boolean isEmpty() { return docBuffer.offset == 0; } @Override public boolean isFull() { return docBuffer.offset >= maxBlockSize; } @Override protected void compress() { // Flip buffer before compression docBuffer.length = nodFreqBuffer.length = docBuffer.offset; docBuffer.offset = nodFreqBuffer.offset = 0; docCompressor.compress(docBuffer, docCompressedBuffer); freqCompressor.compress(nodFreqBuffer, nodFreqCompressedBuffer); } @Override protected void writeHeader() throws IOException { // logger.debug("Write DocFreq header - writer-id={}", this.hashCode()); // logger.debug("DocFreq header start at fp={}", out.getFilePointer()); // write block size (same for all of them) out.writeVInt(docBuffer.length); // logger.debug("blockSize: {}", docBuffer.length); // write size of each compressed data block out.writeVInt(docCompressedBuffer.length); // logger.debug("docCompressedBuffer.length: {}", docCompressedBuffer.length); out.writeVInt(nodFreqCompressedBuffer.length); // logger.debug("nodFreqCompressedBuffer.length: {}", nodFreqCompressedBuffer.length); // write first and last doc id out.writeVInt(firstDocId); out.writeVInt(lastDocId - firstDocId); // logger.debug("firstDocId: {}, lastDocId: {}", firstDocId, lastDocId); // write node and pos skip data // logger.debug("Write node and pos skip data"); nodeBlockIndex.mark(); nodeBlockIndex.write(out, true); posBlockIndex.mark(); posBlockIndex.write(out, true); } @Override protected void writeData() throws IOException { out.writeBytes(docCompressedBuffer.bytes, docCompressedBuffer.length); out.writeBytes(nodFreqCompressedBuffer.bytes, nodFreqCompressedBuffer.length); } @Override protected void initBlock() { docBuffer.offset = 0; nodFreqBuffer.offset = 0; } } }