package com.cloudera.sa.hcu.io.put.hdfs.writer;
import java.io.IOException;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Type;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.cloudera.sa.hcu.utils.PropertyUtils;
public class AvroWriter extends AbstractWriter
{
DataFileWriter<GenericRecord> dataFileWriter;
Schema schema;
public static final String CONF_SCHEMA_JSON = "avro.writer.schema.json";
public static final String CONF_COMPRESSION_CODEC = COMPRESSION_CODEC;
public AvroWriter(Properties p) throws Exception
{
super( p);
}
public AvroWriter(String outputPath, String schemaJson, String compressionCodec) throws IOException
{
super(makeProperties(outputPath, schemaJson, compressionCodec));
}
private static Properties makeProperties(String outputPath, String schemaJson, String compressionCodec)
{
Properties p = new Properties();
p.setProperty(CONF_OUTPUT_PATH, outputPath);
p.setProperty(CONF_SCHEMA_JSON, schemaJson);
p.setProperty(CONF_COMPRESSION_CODEC, compressionCodec);
return p;
}
@Override
protected void init(String outputPath, Properties p) throws IOException
{
schema = (new Schema.Parser()).parse(PropertyUtils.getStringProperty(p, CONF_SCHEMA_JSON));
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(config);
Path outputFilePath = new Path(outputPath);
FSDataOutputStream dataOutputStream = hdfs.create(outputFilePath);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
dataFileWriter = new DataFileWriter<GenericRecord>(writer);
dataFileWriter.create(schema, dataOutputStream);
}
public void writeRow(String rowType, String[] columns) throws IOException
{
GenericRecord datum = new GenericData.Record(schema);
for (int i = 0; i < columns.length; i++)
{
String fieldValue = columns[i];
Schema.Field field = schema.getFields().get(i);
Type type = field.schema().getType();
//This will give the value too avro in the right format
//TODO I've got to believe there is a better way to do this
if (type.equals(Type.STRING))
{
datum.put(field.name(), new Utf8(fieldValue));
} else if (type.equals(Type.DOUBLE))
{
datum.put(field.name(), new Double(fieldValue));
} else if (type.equals(Type.INT))
{
datum.put(field.name(), new Integer(fieldValue));
} else if (type.equals(Type.LONG))
{
datum.put(field.name(), new Long(fieldValue));
} else if (type.equals(Type.FLOAT))
{
datum.put(field.name(), new Float(fieldValue));
} else
{
throw new RuntimeException("ConvertEnvMultiTable2MultiAvro doesn't supper type :" + type + " yet. Put in a bug request");
}
}
dataFileWriter.append(datum);
}
public void close() throws IOException
{
dataFileWriter.close();
}
}