package com.cloudera.sa.hcu.io.out;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ConsoleOutAvroFile {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException
{
if (args.length < 1)
{
System.out.println("ConsoleOutAvroFile:");
System.out.println("");
System.out.println("Parameter: <inputFile>");
return;
}
String inputFile = args[0];
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(config);
Path inputFilePath = new Path(inputFile);
FSDataInputStream dataInputStream = hdfs.open(inputFilePath);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
//writer.setSchema(s); // I guess I don't need this
DataFileStream<GenericRecord> dataFileReader = new DataFileStream<GenericRecord>(dataInputStream, reader);
Schema s = dataFileReader.getSchema();
System.out.println(s.getName() + " " + s);
System.out.println("-");
while(dataFileReader.hasNext())
{
GenericRecord record = dataFileReader.next();
record.toString();
System.out.println(" " + record);
}
System.out.println("-");
dataFileReader.close();
dataInputStream.close();
}
}