/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.io.RawComparator;
/**
* Encoding of KeyValue. It aims to be fast and efficient using assumptions:
* <ul>
* <li>the KeyValues are stored sorted by key</li>
* <li>we know the structure of KeyValue</li>
* <li>the values are always iterated forward from beginning of block</li>
* <li>knowledge of Key Value format</li>
* </ul>
* It is designed to work fast enough to be feasible as in memory compression.
*
* After encoding, it also optionally compresses the encoded data if a
* compression algorithm is specified in HFileBlockEncodingContext argument of
* {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
*/
@InterfaceAudience.Private
public interface DataBlockEncoder {
/**
* Encodes KeyValues. It will first encode key value pairs, and then
* optionally do the compression for the encoded data.
*
* @param in
* Source of KeyValue for compression.
* @param includesMemstoreTS
* true if including memstore timestamp after every key-value pair
* @param encodingContext
* the encoding context which will contain encoded uncompressed bytes
* as well as compressed encoded bytes if compression is enabled, and
* also it will reuse resources across multiple calls.
* @throws IOException
* If there is an error writing to output stream.
*/
public void encodeKeyValues(
ByteBuffer in, boolean includesMemstoreTS,
HFileBlockEncodingContext encodingContext) throws IOException;
/**
* Decode.
* @param source Compressed stream of KeyValues.
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @return Uncompressed block of KeyValues.
* @throws IOException If there is an error in source.
*/
public ByteBuffer decodeKeyValues(DataInputStream source,
boolean includesMemstoreTS) throws IOException;
/**
* Uncompress.
* @param source encoded stream of KeyValues.
* @param allocateHeaderLength allocate this many bytes for the header.
* @param skipLastBytes Do not copy n last bytes.
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @return Uncompressed block of KeyValues.
* @throws IOException If there is an error in source.
*/
public ByteBuffer decodeKeyValues(DataInputStream source,
int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
throws IOException;
/**
* Return first key in block. Useful for indexing. Typically does not make
* a deep copy but returns a buffer wrapping a segment of the actual block's
* byte array. This is because the first key in block is usually stored
* unencoded.
* @param block encoded block we want index, the position will not change
* @return First key in block.
*/
public ByteBuffer getFirstKeyInBlock(ByteBuffer block);
/**
* Create a HFileBlock seeker which find KeyValues within a block.
* @param comparator what kind of comparison should be used
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @return A newly created seeker.
*/
public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
boolean includesMemstoreTS);
/**
* Creates a encoder specific encoding context
*
* @param compressionAlgorithm
* compression algorithm used if the final data needs to be
* compressed
* @param encoding
* encoding strategy used
* @param headerBytes
* header bytes to be written, put a dummy header here if the header
* is unknown
* @return a newly created encoding context
*/
public HFileBlockEncodingContext newDataBlockEncodingContext(
Algorithm compressionAlgorithm, DataBlockEncoding encoding,
byte[] headerBytes);
/**
* Creates an encoder specific decoding context, which will prepare the data
* before actual decoding
*
* @param compressionAlgorithm
* compression algorithm used if the data needs to be decompressed
* @return a newly created decoding context
*/
public HFileBlockDecodingContext newDataBlockDecodingContext(
Algorithm compressionAlgorithm);
/**
* An interface which enable to seek while underlying data is encoded.
*
* It works on one HFileBlock, but it is reusable. See
* {@link #setCurrentBuffer(ByteBuffer)}.
*/
public static interface EncodedSeeker {
/**
* Set on which buffer there will be done seeking.
* @param buffer Used for seeking.
*/
public void setCurrentBuffer(ByteBuffer buffer);
/**
* Does a deep copy of the key at the current position. A deep copy is
* necessary because buffers are reused in the decoder.
* @return key at current position
*/
public ByteBuffer getKeyDeepCopy();
/**
* Does a shallow copy of the value at the current position. A shallow
* copy is possible because the returned buffer refers to the backing array
* of the original encoded buffer.
* @return value at current position
*/
public ByteBuffer getValueShallowCopy();
/** @return key value at current position with position set to limit */
public ByteBuffer getKeyValueBuffer();
/**
* @return the KeyValue object at the current position. Includes memstore
* timestamp.
*/
public KeyValue getKeyValue();
/** Set position to beginning of given block */
public void rewind();
/**
* Move to next position
* @return true on success, false if there is no more positions.
*/
public boolean next();
/**
* Moves the seeker position within the current block to:
* <ul>
* <li>the last key that that is less than or equal to the given key if
* <code>seekBefore</code> is false</li>
* <li>the last key that is strictly less than the given key if <code>
* seekBefore</code> is true. The caller is responsible for loading the
* previous block if the requested key turns out to be the first key of the
* current block.</li>
* </ul>
* @param key byte array containing the key
* @param offset key position the array
* @param length key length in bytes
* @param seekBefore find the key strictly less than the given key in case
* of an exact match. Does not matter in case of an inexact match.
* @return 0 on exact match, 1 on inexact match.
*/
public int seekToKeyInBlock(byte[] key, int offset, int length,
boolean seekBefore);
}
}