package com.manning.hip.ch3.avro; import com.manning.hip.ch3.csv.CSVParser; import com.manning.hip.ch3.avro.gen.Stock; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileWriter; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import java.io.File; import java.io.IOException; import java.io.OutputStream; public class AvroStockFileWrite { static CSVParser parser = new CSVParser(); public static Stock createStock(String line) throws IOException { String parts[] = parser.parseLine(line); Stock stock = new Stock(); stock.setSymbol(parts[0]); stock.setDate(parts[1]); stock.setOpen(Double.valueOf(parts[2])); stock.setHigh(Double.valueOf(parts[3])); stock.setLow(Double.valueOf(parts[4])); stock.setClose(Double.valueOf(parts[5])); stock.setVolume(Integer.valueOf(parts[6])); stock.setAdjClose(Double.valueOf(parts[7])); return stock; } public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<Stock> writer = //<co id="ch03_avrospecific_comment1"/> new DataFileWriter<Stock>( new SpecificDatumWriter<Stock>()) .setSyncInterval(100); //<co id="ch03_avrospecific_comment2"/> writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch03_avrospecific_comment3"/> writer.create(Stock.SCHEMA$, outputStream); //<co id="ch03_avrospecific_comment4"/> for(String line: FileUtils.readLines(inputFile)) { writer.append(createStock(line)); //<co id="ch03_avrospecific_comment5"/> } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); } public static void main(String... args) throws Exception { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(config); File inputFile = new File(args[0]); Path destFile = new Path(args[1]); OutputStream os = hdfs.create(destFile); writeToAvro(inputFile, os); } }