/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.io.compress; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import org.apache.hadoop.io.compress.bzip2.BZip2DummyCompressor; import org.apache.hadoop.io.compress.bzip2.BZip2DummyDecompressor; import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream; import org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream; /** * This class provides CompressionOutputStream and CompressionInputStream for * compression and decompression. Currently we dont have an implementation of * the Compressor and Decompressor interfaces, so those methods of * CompressionCodec which have a Compressor or Decompressor type argument, throw * UnsupportedOperationException. */ public class BZip2Codec implements org.apache.hadoop.io.compress.CompressionCodec { private static final String HEADER = "BZ"; private static final int HEADER_LEN = HEADER.length(); /** * Creates a new instance of BZip2Codec */ public BZip2Codec() { } /** * Creates CompressionOutputStream for BZip2 * * @param out * The output Stream * @return The BZip2 CompressionOutputStream * @throws java.io.IOException * Throws IO exception */ public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { return new BZip2CompressionOutputStream(out); } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { return createOutputStream(out); } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public Class<? extends org.apache.hadoop.io.compress.Compressor> getCompressorType() { return BZip2DummyCompressor.class; } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public Compressor createCompressor() { return new BZip2DummyCompressor(); } /** * Creates CompressionInputStream to be used to read off uncompressed data. * * @param in * The InputStream * @return Returns CompressionInputStream for BZip2 * @throws java.io.IOException * Throws IOException */ public CompressionInputStream createInputStream(InputStream in) throws IOException { return new BZip2CompressionInputStream(in); } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { return createInputStream(in); } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public Class<? extends org.apache.hadoop.io.compress.Decompressor> getDecompressorType() { return BZip2DummyDecompressor.class; } /** * This functionality is currently not supported. * * @throws java.lang.UnsupportedOperationException * Throws UnsupportedOperationException */ public Decompressor createDecompressor() { return new BZip2DummyDecompressor(); } /** * .bz2 is recognized as the default extension for compressed BZip2 files * * @return A String telling the default bzip2 file extension */ public String getDefaultExtension() { return ".bz2"; } private static class BZip2CompressionOutputStream extends CompressionOutputStream { // class data starts here// private CBZip2OutputStream output; private boolean needsReset; // class data ends here// public BZip2CompressionOutputStream(OutputStream out) throws IOException { super(out); needsReset = true; } private void writeStreamHeader() throws IOException { if (super.out != null) { // The compressed bzip2 stream should start with the // identifying characters BZ. Caller of CBZip2OutputStream // i.e. this class must write these characters. out.write(HEADER.getBytes()); } } public void finish() throws IOException { if (needsReset) { // In the case that nothing is written to this stream, we still need to // write out the header before closing, otherwise the stream won't be // recognized by BZip2CompressionInputStream. internalReset(); } this.output.finish(); needsReset = true; } private void internalReset() throws IOException { if (needsReset) { needsReset = false; writeStreamHeader(); this.output = new CBZip2OutputStream(out); } } public void resetState() throws IOException { // Cannot write to out at this point because out might not be ready // yet, as in SequenceFile.Writer implementation. needsReset = true; } public void write(int b) throws IOException { if (needsReset) { internalReset(); } this.output.write(b); } public void write(byte[] b, int off, int len) throws IOException { if (needsReset) { internalReset(); } this.output.write(b, off, len); } public void close() throws IOException { if (needsReset) { // In the case that nothing is written to this stream, we still need to // write out the header before closing, otherwise the stream won't be // recognized by BZip2CompressionInputStream. internalReset(); } this.output.flush(); this.output.close(); needsReset = true; } }// end of class BZip2CompressionOutputStream private static class BZip2CompressionInputStream extends CompressionInputStream { // class data starts here// private CBZip2InputStream input; boolean needsReset; // class data ends here// public BZip2CompressionInputStream(InputStream in) throws IOException { super(in); needsReset = true; } private BufferedInputStream readStreamHeader() throws IOException { // We are flexible enough to allow the compressed stream not to // start with the header of BZ. So it works fine either we have // the header or not. BufferedInputStream bufferedIn = null; if (super.in != null) { bufferedIn = new BufferedInputStream(super.in); bufferedIn.mark(HEADER_LEN); byte[] headerBytes = new byte[HEADER_LEN]; int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN); if (actualRead != -1) { String header = new String(headerBytes); if (header.compareTo(HEADER) != 0) { bufferedIn.reset(); } } } if (bufferedIn == null) { throw new IOException("Failed to read bzip2 stream."); } return bufferedIn; }// end of method public void close() throws IOException { if (!needsReset) { input.close(); needsReset = true; } } public int read(byte[] b, int off, int len) throws IOException { if (needsReset) { internalReset(); } return this.input.read(b, off, len); } private void internalReset() throws IOException { if (needsReset) { needsReset = false; BufferedInputStream bufferedIn = readStreamHeader(); input = new CBZip2InputStream(bufferedIn); } } public void resetState() throws IOException { // Cannot read from bufferedIn at this point because bufferedIn might not be ready // yet, as in SequenceFile.Reader implementation. needsReset = true; } public int read() throws IOException { if (needsReset) { internalReset(); } return this.input.read(); } }// end of BZip2CompressionInputStream }