package com.manning.hip.ch5; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.*; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.*; import org.apache.hadoop.io.IOUtils; import java.io.*; import java.nio.ByteBuffer; public class SmallFilesRead { private static final String FIELD_FILENAME = "filename"; private static final String FIELD_CONTENTS = "contents"; public static void readFromAvro(InputStream is) throws IOException { DataFileStream<Object> reader = //<co id="ch02_smallfileread_comment1"/> new DataFileStream<Object>( is, new GenericDatumReader<Object>()); for (Object o : reader) { //<co id="ch02_smallfileread_comment2"/> GenericRecord r = (GenericRecord) o; //<co id="ch02_smallfileread_comment3"/> System.out.println( //<co id="ch02_smallfileread_comment4"/> r.get(FIELD_FILENAME) + ": " + DigestUtils.md5Hex( ((ByteBuffer) r.get(FIELD_CONTENTS)).array())); } IOUtils.cleanup(null, is); IOUtils.cleanup(null, reader); } public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); Path destFile = new Path(args[0]); InputStream is = hdfs.open(destFile); readFromAvro(is); } }