package com.cloudera.sa.hcu.env2.arvo.io.examples; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumWriter; import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class GenerateBasicAvroFile { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { String outputFile = args[0]; String schemaDescription = " { \n" + " \"name\": \"FacebookUser\", \n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"num_likes\", \"type\": \"int\"},\n" + " {\"name\": \"num_photos\", \"type\": \"int\"},\n" + " {\"name\": \"num_groups\", \"type\": \"int\"} ]\n" + "}"; Schema s = Schema.parse(schemaDescription); Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); Path outputFilePath = new Path(outputFile); FSDataOutputStream dataOutputStream = hdfs.create(outputFilePath); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(); //writer.setSchema(s); // I guess I don't need this DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer); dataFileWriter.create(s, dataOutputStream); GenericRecord datum = new GenericData.Record(s); datum.put("name", new Utf8("ted")); datum.put("num_likes", 1); datum.put("num_groups", 423); datum.put("num_photos", 0); dataFileWriter.append(datum); datum.put("name", new Utf8("karen")); datum.put("num_likes", 2); datum.put("num_groups", 123); datum.put("num_photos", 1); dataFileWriter.append(datum); dataFileWriter.close(); dataOutputStream.close(); } }