package net.iponweb.hadoop.streaming.avro; import junit.framework.Assert; import net.iponweb.hadoop.streaming.dummyReporter; import org.apache.avro.Schema; import org.apache.avro.mapred.AvroOutputFormat; import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.*; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.File; import java.io.IOException; public class AvroInOutFormatsTest { private final String schema = "{\"fields\" : [ { \"name\" : \"x\", \"type\" : \"int\" }, { \"name\" : \"y\", \"type\" : \"string\" }, " + "{ \"name\" : \"z\", \"type\" : \"null\" }, { \"name\" : \"a\", \"type\" : { \"type\" : \"array\", \"items\" : \"int\"} }" + "], \"type\" : \"record\", \"name\" : \"log_record\"}"; private Schema.Parser p = new Schema.Parser(); private GenericDataTSV gd = new GenericDataTSV(); private static Path workDir = new Path("file:///tmp/iow-hadoop-streaming-" + Thread.currentThread().getId()); private static JobConf defaultConf = new JobConf(); private static String fname = "avroastexttest"; private static String fname2 = "avroasjsontest"; private static Path file = new Path(workDir, fname); private static Path file2 = new Path(workDir, fname2); private static String tsv = "25\twtf\t\t[1, 3, 4]"; private static String json = "{\"x\": 25, \"y\": \"wtf\", \"z\": null, \"a\": [1, 3, 4]}"; @Before public void setup() { defaultConf.set("iow.streaming.output.schema", schema); defaultConf.set("mapreduce.task.attempt.id", "attempt_200707121733_0003_m_000005_0"); } @After public void cleanup() throws IOException { FileUtils.deleteDirectory(new File(workDir.toUri())); } @Test public void testAvroAsTextFmt() throws IOException { AvroAsTextOutputFormat outfmt = new AvroAsTextOutputFormat(); FileOutputFormat.setOutputPath(defaultConf, file); RecordWriter<Text, NullWritable> writer = outfmt.getRecordWriter(file.getFileSystem(defaultConf), defaultConf, fname, new dummyReporter()); writer.write(new Text(tsv), NullWritable.get()); writer.close(null); FileInputFormat.setInputPaths(defaultConf, FileOutputFormat.getTaskOutputPath(defaultConf, fname + AvroOutputFormat.EXT)); AvroAsTextInputFormat informat = new AvroAsTextInputFormat(); RecordReader<Text, Text> reader = informat.getRecordReader(informat.getSplits(defaultConf, 1)[0], defaultConf, new dummyReporter()); Text k = new Text(); Text v = new Text(); reader.next(k, v); Assert.assertEquals("read back tsv", tsv, k.toString() + "\t" + v.toString()); } @Test public void testAvroAsJsonFmt() throws IOException { AvroAsJsonOutputFormat outfmt = new AvroAsJsonOutputFormat(); FileOutputFormat.setOutputPath(defaultConf, file2); RecordWriter<Text, NullWritable> writer = outfmt.getRecordWriter(file2.getFileSystem(defaultConf), defaultConf, fname2, new dummyReporter()); writer.write(new Text(json), NullWritable.get()); writer.close(null); FileInputFormat.setInputPaths(defaultConf, FileOutputFormat.getTaskOutputPath(defaultConf, fname2 + AvroOutputFormat.EXT)); AvroAsJsonInputFormat informat = new AvroAsJsonInputFormat(); RecordReader<Text, Text> reader = informat.getRecordReader(informat.getSplits(defaultConf, 1)[0], defaultConf, new dummyReporter()); Text k = new Text(); Text v = new Text(); reader.next(k, v); ObjectMapper mapper = new ObjectMapper(); JsonNode n0 = mapper.readTree(k.toString()); JsonNode n1 = mapper.readTree(json); Assert.assertEquals("read back json", n0, n1); } }