DataBlockEncoder.java example

Explorer
hbase-trunk-mttr-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hadoop.hbase.io.encoding;

import java.io.DataInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.io.RawComparator;

/**
 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
 * <ul>
 * <li>the KeyValues are stored sorted by key</li>
 * <li>we know the structure of KeyValue</li>
 * <li>the values are always iterated forward from beginning of block</li>
 * <li>knowledge of Key Value format</li>
 * </ul>
 * It is designed to work fast enough to be feasible as in memory compression.
 *
 * After encoding, it also optionally compresses the encoded data if a
 * compression algorithm is specified in HFileBlockEncodingContext argument of
 * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
 */
@InterfaceAudience.Private
public interface DataBlockEncoder {

  /**
   * Encodes KeyValues. It will first encode key value pairs, and then
   * optionally do the compression for the encoded data.
   *
   * @param in
   *          Source of KeyValue for compression.
   * @param includesMemstoreTS
   *          true if including memstore timestamp after every key-value pair
   * @param encodingContext
   *          the encoding context which will contain encoded uncompressed bytes
   *          as well as compressed encoded bytes if compression is enabled, and
   *          also it will reuse resources across multiple calls.
   * @throws IOException
   *           If there is an error writing to output stream.
   */
  public void encodeKeyValues(
      ByteBuffer in, boolean includesMemstoreTS,
      HFileBlockEncodingContext encodingContext) throws IOException;

  /**
   * Decode.
   * @param source Compressed stream of KeyValues.
   * @param includesMemstoreTS true if including memstore timestamp after every
   *          key-value pair
   * @return Uncompressed block of KeyValues.
   * @throws IOException If there is an error in source.
   */
  public ByteBuffer decodeKeyValues(DataInputStream source,
      boolean includesMemstoreTS) throws IOException;

  /**
   * Uncompress.
   * @param source encoded stream of KeyValues.
   * @param allocateHeaderLength allocate this many bytes for the header.
   * @param skipLastBytes Do not copy n last bytes.
   * @param includesMemstoreTS true if including memstore timestamp after every
   *          key-value pair
   * @return Uncompressed block of KeyValues.
   * @throws IOException If there is an error in source.
   */
  public ByteBuffer decodeKeyValues(DataInputStream source,
      int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
      throws IOException;

  /**
   * Return first key in block. Useful for indexing. Typically does not make
   * a deep copy but returns a buffer wrapping a segment of the actual block's
   * byte array. This is because the first key in block is usually stored
   * unencoded.
   * @param block encoded block we want index, the position will not change
   * @return First key in block.
   */
  public ByteBuffer getFirstKeyInBlock(ByteBuffer block);

  /**
   * Create a HFileBlock seeker which find KeyValues within a block.
   * @param comparator what kind of comparison should be used
   * @param includesMemstoreTS true if including memstore timestamp after every
   *          key-value pair
   * @return A newly created seeker.
   */
  public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
      boolean includesMemstoreTS);

  /**
   * Creates a encoder specific encoding context
   *
   * @param compressionAlgorithm
   *          compression algorithm used if the final data needs to be
   *          compressed
   * @param encoding
   *          encoding strategy used
   * @param headerBytes
   *          header bytes to be written, put a dummy header here if the header
   *          is unknown
   * @return a newly created encoding context
   */
  public HFileBlockEncodingContext newDataBlockEncodingContext(
      Algorithm compressionAlgorithm, DataBlockEncoding encoding,
      byte[] headerBytes);

  /**
   * Creates an encoder specific decoding context, which will prepare the data
   * before actual decoding
   *
   * @param compressionAlgorithm
   *          compression algorithm used if the data needs to be decompressed
   * @return a newly created decoding context
   */
  public HFileBlockDecodingContext newDataBlockDecodingContext(
      Algorithm compressionAlgorithm);

  /**
   * An interface which enable to seek while underlying data is encoded.
   *
   * It works on one HFileBlock, but it is reusable. See
   * {@link #setCurrentBuffer(ByteBuffer)}.
   */
  public static interface EncodedSeeker {
    /**
     * Set on which buffer there will be done seeking.
     * @param buffer Used for seeking.
     */
    public void setCurrentBuffer(ByteBuffer buffer);

    /**
     * Does a deep copy of the key at the current position. A deep copy is
     * necessary because buffers are reused in the decoder.
     * @return key at current position
     */
    public ByteBuffer getKeyDeepCopy();

    /**
     * Does a shallow copy of the value at the current position. A shallow
     * copy is possible because the returned buffer refers to the backing array
     * of the original encoded buffer.
     * @return value at current position
     */
    public ByteBuffer getValueShallowCopy();

    /** @return key value at current position with position set to limit */
    public ByteBuffer getKeyValueBuffer();

    /**
     * @return the KeyValue object at the current position. Includes memstore
     *         timestamp.
     */
    public KeyValue getKeyValue();

    /** Set position to beginning of given block */
    public void rewind();

    /**
     * Move to next position
     * @return true on success, false if there is no more positions.
     */
    public boolean next();

    /**
     * Moves the seeker position within the current block to:
     * <ul>
     * <li>the last key that that is less than or equal to the given key if
     * <code>seekBefore</code> is false</li>
     * <li>the last key that is strictly less than the given key if <code>
     * seekBefore</code> is true. The caller is responsible for loading the
     * previous block if the requested key turns out to be the first key of the
     * current block.</li>
     * </ul>
     * @param key byte array containing the key
     * @param offset key position the array
     * @param length key length in bytes
     * @param seekBefore find the key strictly less than the given key in case
     *          of an exact match. Does not matter in case of an inexact match.
     * @return 0 on exact match, 1 on inexact match.
     */
    public int seekToKeyInBlock(byte[] key, int offset, int length,
        boolean seekBefore);
  }
}