/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.io.compress; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.Random; import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.RandomDatum; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.lzma.LzmaCompressor; import org.apache.hadoop.io.compress.zlib.ZlibFactory; public class TestCodec extends TestCase { private static final Log LOG= LogFactory.getLog(TestCodec.class); private Configuration conf = new Configuration(); private int count = 10000; private int seed = new Random().nextInt(); public void testDefaultCodec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.DefaultCodec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.DefaultCodec"); } public void testGzipCodec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec"); } public void testBZip2Codec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.BZip2Codec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.BZip2Codec"); } public void testLzmaCodec() throws IOException { if (!LzmaCompressor.isNativeLzmaLoaded()) { LOG.warn("Lzma is not loaded. Skip Lzma test."); return; } codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.LzmaCodec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.LzmaCodec"); } public void testLz4Codec() throws IOException { if (Lz4Codec.isNativeCodeLoaded()) { conf.setBoolean( "io.compression.codec.lz4.use.lz4hc", false); codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); conf.setBoolean( "io.compression.codec.lz4.use.lz4hc", true); codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.Lz4Codec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.Lz4Codec"); } else { LOG.warn("Native hadoop library available but lz4 not"); return; } } private static void codecTest(Configuration conf, int seed, int count, String codecClass) throws IOException { // Create the codec CompressionCodec codec = null; try { codec = (CompressionCodec) ReflectionUtils.newInstance(conf.getClassByName(codecClass), conf); } catch (ClassNotFoundException cnfe) { throw new IOException("Illegal codec!"); } LOG.info("Created a Codec object of type: " + codecClass); // Generate data DataOutputBuffer data = new DataOutputBuffer(); RandomDatum.Generator generator = new RandomDatum.Generator(seed); for(int i=0; i < count; ++i) { generator.next(); RandomDatum key = generator.getKey(); RandomDatum value = generator.getValue(); key.write(data); value.write(data); } DataInputBuffer originalData = new DataInputBuffer(); DataInputStream originalIn = new DataInputStream(new BufferedInputStream(originalData)); originalData.reset(data.getData(), 0, data.getLength()); LOG.info("Generated " + count + " records"); // Compress data DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); CompressionOutputStream deflateFilter = codec.createOutputStream(compressedDataBuffer); DataOutputStream deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter)); deflateOut.write(data.getData(), 0, data.getLength()); deflateOut.flush(); deflateFilter.finish(); LOG.info("Finished compressing data"); // De-compress data DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, compressedDataBuffer.getLength()); CompressionInputStream inflateFilter = codec.createInputStream(deCompressedDataBuffer); DataInputStream inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter)); // Check for(int i=0; i < count; ++i) { RandomDatum k1 = new RandomDatum(); RandomDatum v1 = new RandomDatum(); k1.readFields(originalIn); v1.readFields(originalIn); RandomDatum k2 = new RandomDatum(); RandomDatum v2 = new RandomDatum(); k2.readFields(inflateIn); v2.readFields(inflateIn); } LOG.info("SUCCESS! Completed checking " + count + " records"); } public void testCodecPoolGzipReuse() throws Exception { Configuration conf = new Configuration(); conf.setBoolean("hadoop.native.lib", true); if (!ZlibFactory.isNativeZlibLoaded(conf)) { LOG.warn("testCodecPoolGzipReuse skipped: native libs not loaded"); return; } GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf); DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf); Compressor c1 = CodecPool.getCompressor(gzc); Compressor c2 = CodecPool.getCompressor(dfc); CodecPool.returnCompressor(c1); CodecPool.returnCompressor(c2); assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc)); } public void testSequenceFileDefaultCodec() throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.DefaultCodec", 100); sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DefaultCodec", 1000000); } public void testSequenceFileBZip2Codec() throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { sequenceFileCodecTest(conf, 0, "org.apache.hadoop.io.compress.BZip2Codec", 100); sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.BZip2Codec", 100); sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.BZip2Codec", 1000000); } private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { Path filePath = new Path("SequenceFileCodecTest." + codecClass); // Configuration conf.setInt("io.seqfile.compress.blocksize", blockSize); // Create the SequenceFile FileSystem fs = FileSystem.get(conf); LOG.info("Creating SequenceFile with codec \"" + codecClass + "\""); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, Text.class, Text.class, CompressionType.BLOCK, (CompressionCodec)Class.forName(codecClass).newInstance()); // Write some data LOG.info("Writing to SequenceFile..."); for (int i=0; i<lines; i++) { Text key = new Text("key" + i); Text value = new Text("value" + i); writer.append(key, value); } writer.close(); // Read the data back and check LOG.info("Reading from the SequenceFile..."); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Writable key = (Writable)reader.getKeyClass().newInstance(); Writable value = (Writable)reader.getValueClass().newInstance(); int lc = 0; try { while (reader.next(key, value)) { assertEquals("key" + lc, key.toString()); assertEquals("value" + lc, value.toString()); lc ++; } } finally { reader.close(); } assertEquals(lines, lc); // Delete temporary files fs.delete(filePath, false); LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\""); } public static void main(String[] args) { int count = 10000; String codecClass = "org.apache.hadoop.io.compress.DefaultCodec"; String usage = "TestCodec [-count N] [-codec <codec class>]"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } try { for (int i=0; i < args.length; ++i) { // parse command line if (args[i] == null) { continue; } else if (args[i].equals("-count")) { count = Integer.parseInt(args[++i]); } else if (args[i].equals("-codec")) { codecClass = args[++i]; } } Configuration conf = new Configuration(); int seed = 0; codecTest(conf, seed, count, codecClass); } catch (Exception e) { System.err.println("Caught: " + e); e.printStackTrace(); } } public TestCodec(String name) { super(name); } }