package com.scaleunlimited.cascading.local; import static org.junit.Assert.*; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.junit.Test; import com.scaleunlimited.cascading.TupleLogger; import com.scaleunlimited.cascading.local.KryoScheme; import cascading.flow.Flow; import cascading.flow.local.LocalFlowConnector; import cascading.flow.local.LocalFlowProcess; import cascading.operation.Identity; import cascading.pipe.Each; import cascading.pipe.Pipe; import cascading.pipe.assembly.Rename; import cascading.pipe.assembly.SumBy; import cascading.scheme.local.TextDelimited; import cascading.scheme.local.TextLine; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tap.local.FileTap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import cascading.tuple.TupleEntryIterator; public class KryoSchemeTest { private static class MyIntClass { private int _value; public MyIntClass(int value) { _value = value; } public int getValue() { return _value; } } private static class MyWritable implements Writable { private Object _value; public MyWritable() { // Empty constructor for Writable (not that it gets called w/Kryo) } public MyWritable(int value) { _value = new Integer(value); } public int getValue() { return (Integer)_value; } @Override public void readFields(DataInput in) throws IOException { _value = new Integer(in.readInt()); } @Override public void write(DataOutput out) throws IOException { out.writeInt((Integer)_value); } } @Test public void testSimple() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testSimple"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", "value11")); writer.add(new Tuple("key1", "value12")); writer.add(new Tuple("key2", "value21")); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("key")); assertEquals("value11", te.getString("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key1", te.getString("key")); assertEquals("value12", te.getString("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("key")); assertEquals("value21", te.getString("value")); assertFalse(iter.hasNext()); iter.close(); } @Test public void testEmptyFile() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testEmptyFile"; // Create a local tap that uses the KryoScheme, but don't write anything. // This will create an empty file. Fields fields = new Fields("key", "value"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertFalse(iter.hasNext()); iter.close(); } @Test public void testWritable() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testWritable"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "bytes", "value"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", new BytesWritable(new byte[] {1, 1}), new MyWritable(1))); writer.add(new Tuple("key1", new BytesWritable(new byte[] {1, 2}), new MyWritable(1))); writer.add(new Tuple("key2", new BytesWritable(new byte[] {2, 1}), new MyWritable(2))); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("key")); BytesWritable bw = (BytesWritable)te.getObject("bytes"); assertNotNull(bw); assertEquals(1, bw.getBytes()[0]); assertEquals(1, bw.getBytes()[1]); MyWritable mw = (MyWritable)te.getObject("value"); assertNotNull(mw); assertEquals(1, mw.getValue()); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key1", te.getString("key")); bw = (BytesWritable)te.getObject("bytes"); assertNotNull(bw); assertEquals(1, bw.getBytes()[0]); assertEquals(2, bw.getBytes()[1]); mw = (MyWritable)te.getObject("value"); assertNotNull(mw); assertEquals(1, mw.getValue()); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("key")); bw = (BytesWritable)te.getObject("bytes"); assertNotNull(bw); assertEquals(2, bw.getBytes()[0]); assertEquals(1, bw.getBytes()[1]); mw = (MyWritable)te.getObject("value"); assertNotNull(mw); assertEquals(2, mw.getValue()); assertFalse(iter.hasNext()); iter.close(); } @Test public void testCustomClassNoEmptyConstructor() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testCustomClassNoEmptyConstructor"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", new MyIntClass(1))); writer.add(new Tuple("key1", new MyIntClass(1))); writer.add(new Tuple("key2", new MyIntClass(2))); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("key")); MyIntClass mic = (MyIntClass)te.getObject("value"); assertNotNull(mic); assertEquals(1, mic.getValue()); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key1", te.getString("key")); mic = (MyIntClass)te.getObject("value"); assertNotNull(mic); assertEquals(1, mic.getValue()); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("key")); mic = (MyIntClass)te.getObject("value"); assertNotNull(mic); assertEquals(2, mic.getValue()); assertFalse(iter.hasNext()); iter.close(); } @Test public void testTupleInTuple() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testTupleInTuple"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", null)); writer.add(new Tuple("key1", new Tuple(1))); writer.add(new Tuple("key2", new Tuple(1, 2))); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("key")); assertNull(te.getObject("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key1", te.getString("key")); Tuple t = (Tuple)te.getObject("value"); assertNotNull(t); assertEquals(1, t.size()); assertEquals(1, t.getInteger(0)); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("key")); t = (Tuple)te.getObject("value"); assertNotNull(t); assertEquals(2, t.size()); assertEquals(1, t.getInteger(0)); assertEquals(2, t.getInteger(1)); assertFalse(iter.hasNext()); iter.close(); } @Test public void testDuplicateString() throws Exception { final String targetDir = "build/test/KryoSchemeTest/testDuplicateString"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("str1", "str2"); Tap out = new FileTap(new KryoScheme(fields), targetDir); TupleEntryCollector writer = out.openForWrite(new LocalFlowProcess()); String test = "this is a test"; writer.add(new Tuple(test, test)); writer.close(); Tap in = new FileTap(new KryoScheme(fields), targetDir); TupleEntryIterator iter = in.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals(test, te.getString("str1")); assertEquals(test, te.getString("str2")); assertFalse(iter.hasNext()); iter.close(); } @Test public void testInWorkflow() throws Exception { final String srcDir = "build/test/KryoSchemeTest/testInWorkflow/src"; final String dstFile = "build/test/KryoSchemeTest/testInWorkflow/dstFile"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value"); Tap tap = new DirectoryTap(new KryoScheme(fields), srcDir, SinkMode.REPLACE); TupleEntryCollector writer = tap.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key1", 12)); writer.add(new Tuple("key2", 21)); writer.close(); Tap sourceTap = new DirectoryTap(new KryoScheme(fields), srcDir); Pipe p = new Pipe("pipe"); p = new SumBy(p, new Fields("key"), new Fields("value"), new Fields("sum"), Integer.class); Tap sinkTap = new FileTap(new TextLine(), dstFile, SinkMode.REPLACE); Flow f = new LocalFlowConnector().connect(sourceTap, sinkTap, p); f.complete(); // TODO verify we have expected output Tap validationTap = new FileTap(new TextDelimited(new Fields("key", "sum"), "\t", new Class[] {String.class, Integer.class}), dstFile); TupleEntryIterator iter = validationTap.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("key")); assertEquals(23, te.getInteger("sum")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("key")); assertEquals(21, te.getInteger("sum")); assertFalse(iter.hasNext()); iter.close(); } @Test public void testFieldSelection() throws Exception { final String srcDir = "build/test/KryoSchemeTest/testFieldSelection/src"; final String dstFile = "build/test/KryoSchemeTest/testFieldSelection/dst"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value", "index"); Tap tap = new DirectoryTap(new KryoScheme(Fields.UNKNOWN, fields), srcDir, SinkMode.REPLACE); TupleEntryCollector writer = tap.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11, 1)); writer.add(new Tuple("key1", 12, 2)); writer.add(new Tuple("key2", 21, 3)); writer.close(); Tap sourceTap = new DirectoryTap(new KryoScheme(fields), srcDir, SinkMode.KEEP); Pipe p = new Pipe("pipe"); // Create a sink where we're writing out a subset of the fields, in a different order. Tap sinkTap = new DirectoryTap(new KryoScheme(Fields.UNKNOWN, new Fields("index", "value")), dstFile, SinkMode.REPLACE); Flow f = new LocalFlowConnector().connect(sourceTap, sinkTap, p); f.complete(); // Verify we have expected output Tap validationTap = new DirectoryTap(new KryoScheme(new Fields("index", "value")), dstFile); TupleEntryIterator iter = validationTap.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals(1, te.getInteger("index")); assertEquals(11, te.getInteger("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals(2, te.getInteger("index")); assertEquals(12, te.getInteger("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals(3, te.getInteger("index")); assertEquals(21, te.getInteger("value")); assertFalse(iter.hasNext()); iter.close(); // And that we don't have fields we don't expect to have. try { validationTap = new DirectoryTap(new KryoScheme(fields), dstFile); validationTap.openForRead(new LocalFlowProcess()); fail("Should have thrown exception"); } catch (Exception e) { // expected } } @Test public void testFieldReordering() throws Exception { final String srcDir = "build/test/KryoSchemeTest/testFieldReordering/src"; final String dstFile = "build/test/KryoSchemeTest/testFieldReordering/dst"; // Create a local tap that uses the KryoScheme Fields fields = new Fields("key", "value"); Tap tap = new DirectoryTap(new KryoScheme(Fields.UNKNOWN, fields), srcDir, SinkMode.REPLACE); TupleEntryCollector writer = tap.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key1", 12)); writer.add(new Tuple("key2", 21)); writer.close(); Tap sourceTap = new DirectoryTap(new KryoScheme(fields), srcDir, SinkMode.KEEP); Pipe p = new Pipe("pipe"); p = new Rename(p, new Fields("key"), new Fields("kee")); // Create a sink where we're writing out the fields in a different order. We also // test that we handle getting passed a single field when we're being used as a // sink (implicitly used for both source & sink). Tap sinkTap = new DirectoryTap(new KryoScheme(new Fields("kee", "value")), dstFile, SinkMode.REPLACE); Flow f = new LocalFlowConnector().connect(sourceTap, sinkTap, p); f.complete(); // Verify we have expected output Tap validationTap = new DirectoryTap(new KryoScheme(new Fields("kee", "value")), dstFile); TupleEntryIterator iter = validationTap.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1", te.getString("kee")); assertEquals(11, te.getInteger("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key1", te.getString("kee")); assertEquals(12, te.getInteger("value")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2", te.getString("kee")); assertEquals(21, te.getInteger("value")); assertFalse(iter.hasNext()); iter.close(); } }