/** * Copyright 2014 IPONWEB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package net.iponweb.hadoop.streaming.avro; import org.apache.avro.Schema; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.mapred.AvroWrapper; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reporter; import java.io.IOException; public class AvroAsTextOutputFormat extends AvroAsJsonOutputFormat { protected static Log LOG = LogFactory.getLog(AvroAsTextOutputFormat.class); protected GenericDataTSV tsv = new GenericDataTSV(); @Override protected RecordWriter<Text, NullWritable> createRecordWriter(final DataFileWriter<GenericRecord> w, final Schema schema){ return new AvroAsTextRecordWriter<Text, NullWritable>(w, schema); } protected class AvroAsTextRecordWriter<K2, V2> implements RecordWriter<K2, V2> { private final DataFileWriter<GenericRecord> writer; private final Schema schema; public AvroAsTextRecordWriter(DataFileWriter<GenericRecord> writer, Schema schema) { this.writer = writer; this.schema = schema; } @Override public void write(K2 k, V2 v) throws IOException { GenericRecord record = fromText(k.toString() + "\t" + v.toString(), schema); AvroWrapper<GenericRecord> wrapper = new AvroWrapper<GenericRecord>(record); writer.append(wrapper.datum()); } @Override public void close(Reporter reporter) throws IOException { writer.close(); } protected GenericRecord fromText(String v, Schema schema) throws IOException { return tsv.getDatum(v, schema); } } }