DataSegmentReader.java example

Explorer
hadoop-20-master
- src
package org.apache.hadoop.io.simpleseekableformat;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.zip.CRC32;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.util.ReflectionUtils;

/**
 * This class holds the data related to a single data segment.
 */
class DataSegmentReader {


  static class EmptyDataSegmentException extends EOFException {
  }

  // uncompressed data stream
  private final InputStream uncompressedData;

  /**
   * May throw EOFException if InputStream does not have a
   * complete data segment.
   *
   * NOTE: This class holds reference to the Decompressor in
   * the decompressorCache, until the return value of
   * getInputStream() is closed.
   *
   * @param decompressorCache
   * @throws EmptyDataSegmentException  if there is nothing to read.
   * @throws EOFException  if the data segment is not complete.
   */
  DataSegmentReader(DataInputStream in, Configuration conf,
      HashMap<Text, Decompressor> decompressorCache)
      throws EmptyDataSegmentException, EOFException,
      ClassNotFoundException, IOException {

    // Read from DataInputStream
    // 1. Read length
    int length = 0;
    try {
      length = in.readInt();
    } catch (EOFException e) {
      throw new EmptyDataSegmentException();
    }

    // 2. Read codec
    int codecNameUTF8Length = in.readShort();
    byte[] codecNameUTF8 = new byte[codecNameUTF8Length];
    in.readFully(codecNameUTF8);
    Text codecNameText = new Text(codecNameUTF8);
    // 3. read CRC32 (only present when uncompressed)
    boolean hasCrc32 = (codecNameUTF8Length == 0);
    long crc32Value = 0;
    if (hasCrc32) {
      crc32Value = in.readLong();
    }
    // 4. read data
    byte[] storedData
        = new byte[length - (hasCrc32 ? 8 : 0)/*crc32*/
                   - 2/*codec length*/ - codecNameUTF8Length];
    in.readFully(storedData);

    // Verify the checksum
    if (hasCrc32) {
      CRC32 crc32 = new CRC32();
      crc32.update(storedData);
      if (crc32.getValue() != crc32Value) {
        throw new CorruptedDataException("Corrupted data segment with length " + length
            + " crc32 expected " + crc32Value + " but got " + crc32.getValue());
      }
    }

    // Uncompress the data if needed
    if (codecNameUTF8Length == 0) {
      // no compression
      uncompressedData = new ByteArrayInputStream(storedData);
    } else {
      CompressionCodec codec = getCodecFromName(codecNameText, conf);
      Decompressor decompressor = null;
      if (decompressorCache != null) {
        // Create decompressor and add to cache if needed.
        decompressor = decompressorCache.get(codecNameText);
        if (decompressor == null) {
          decompressor = codec.createDecompressor();
        } else {
          decompressor.reset();
        }
      }
      if (decompressor == null) {
        uncompressedData = codec.createInputStream(new ByteArrayInputStream(storedData));
      } else {
        uncompressedData = codec.createInputStream(new ByteArrayInputStream(storedData),
            decompressor);
      }
    }
  }

  InputStream getInputStream() {
    return uncompressedData;
  }

  /**
   * A simple cache to save the cost of reflection and object creation.
   */
  private static ConcurrentHashMap<Text, CompressionCodec> CODEC_CACHE
    = new ConcurrentHashMap<Text, CompressionCodec>();

  @SuppressWarnings("unchecked")
  private static CompressionCodec getCodecFromName(Text codecName, Configuration conf)
      throws ClassNotFoundException {
    CompressionCodec result = CODEC_CACHE.get(codecName);
    if (result == null) {
      Class<? extends CompressionCodec> codecClass =
        (Class<? extends CompressionCodec>)Class.forName(codecName.toString());
      result = ReflectionUtils.newInstance(codecClass, conf);
      CODEC_CACHE.put(codecName, result);
    }
    return result;
  }
}