/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.common.compress; import gnu.trove.iterator.TLongIterator; import gnu.trove.list.array.TLongArrayList; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexOutput; import java.io.IOException; /** */ public abstract class CompressedIndexOutput<T extends CompressorContext> extends IndexOutput { final IndexOutput out; protected final T context; protected byte[] uncompressed; protected int uncompressedLength; private int position = 0; private long uncompressedPosition; private boolean closed; private final long metaDataPointer; // need to have a growing segment long array list here... private TLongArrayList offsets = new TLongArrayList(); public CompressedIndexOutput(IndexOutput out, T context) throws IOException { this.out = out; this.context = context; writeHeader(out); out.writeInt(0); // version metaDataPointer = out.getFilePointer(); out.writeLong(-1); // the pointer to the end of the file metadata } public IndexOutput underlying() { return this.out; } @Override public void writeByte(byte b) throws IOException { if (position >= uncompressedLength) { flushBuffer(); } uncompressedPosition++; uncompressed[position++] = b; } @Override public void writeBytes(byte[] input, int offset, int length) throws IOException { // ES, check if length is 0, and don't write in this case if (length == 0) { return; } final int BUFFER_LEN = uncompressedLength; // simple case first: buffering only (for trivially short writes) int free = BUFFER_LEN - position; if (free >= length) { System.arraycopy(input, offset, uncompressed, position, length); position += length; uncompressedPosition += length; return; } // fill partial input as much as possible and flush if (position > 0) { System.arraycopy(input, offset, uncompressed, position, free); position += free; uncompressedPosition += free; flushBuffer(); offset += free; length -= free; } // then write intermediate full block, if any, without copying: while (length >= BUFFER_LEN) { offsets.add(out.getFilePointer()); compress(input, offset, BUFFER_LEN, out); offset += BUFFER_LEN; length -= BUFFER_LEN; uncompressedPosition += BUFFER_LEN; } // and finally, copy leftovers in input, if any if (length > 0) { System.arraycopy(input, offset, uncompressed, 0, length); } position = length; uncompressedPosition += length; } @Override public void copyBytes(DataInput input, long length) throws IOException { final int BUFFER_LEN = uncompressedLength; // simple case first: buffering only (for trivially short writes) int free = BUFFER_LEN - position; if (free >= length) { input.readBytes(uncompressed, position, (int) length, false); position += length; uncompressedPosition += length; return; } // fill partial input as much as possible and flush if (position > 0) { input.readBytes(uncompressed, position, free, false); position += free; uncompressedPosition += free; flushBuffer(); length -= free; } // then write intermediate full block, if any, without copying: // Note, if we supported flushing buffers not on "chunkSize", then // we could have flushed up to the rest of non compressed data in the input // and then copy compressed segments. This means though that we need to // store the compressed sizes of each segment on top of the offsets, and // CompressedIndexInput#seek would be more costly, since it can't do (pos / chunk) // to get the index... while (length >= BUFFER_LEN) { offsets.add(out.getFilePointer()); input.readBytes(uncompressed, 0, BUFFER_LEN); compress(uncompressed, 0, BUFFER_LEN, out); length -= BUFFER_LEN; uncompressedPosition += BUFFER_LEN; } // and finally, copy leftovers in input, if any if (length > 0) { input.readBytes(uncompressed, 0, (int) length, false); } position = (int) length; uncompressedPosition += length; } @Override public void flush() throws IOException { // ignore flush, we always want to flush on actual block size //flushBuffer(); out.flush(); } @Override public void close() throws IOException { if (!closed) { flushBuffer(); // write metadata, and update pointer long metaDataPointerValue = out.getFilePointer(); // length uncompressed out.writeVLong(uncompressedPosition); // compressed pointers out.writeVInt(offsets.size()); for (TLongIterator it = offsets.iterator(); it.hasNext(); ) { out.writeVLong(it.next()); } out.seek(metaDataPointer); out.writeLong(metaDataPointerValue); closed = true; doClose(); out.close(); } } protected abstract void doClose() throws IOException; @Override public long getFilePointer() { return uncompressedPosition; } @Override public void seek(long pos) throws IOException { throw new IOException("seek not supported on compressed output"); } @Override public long length() throws IOException { return uncompressedPosition; } private void flushBuffer() throws IOException { if (position > 0) { offsets.add(out.getFilePointer()); compress(uncompressed, 0, position, out); position = 0; } } protected abstract void writeHeader(IndexOutput out) throws IOException; /** * Compresses the data into the output */ protected abstract void compress(byte[] data, int offset, int len, IndexOutput out) throws IOException; }