package com.scaleunlimited.cascading.local; import static org.junit.Assert.*; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.junit.Test; import com.scaleunlimited.cascading.local.DirectoryTap; import com.scaleunlimited.cascading.local.KryoScheme; import cascading.flow.Flow; import cascading.flow.local.LocalFlowConnector; import cascading.flow.local.LocalFlowProcess; import cascading.pipe.Pipe; import cascading.scheme.local.TextLine; import cascading.tap.SinkMode; import cascading.tap.local.FileTap; import cascading.tap.local.TemplateTap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import cascading.tuple.TupleEntryIterator; public class DirectoryTapTest { @Test public void testAsSource() throws IOException { File dir = new File("build/test/DirectoryTapTest/testAsSource/"); FileUtils.deleteDirectory(dir); assertTrue(dir.mkdirs()); final Fields fields = new Fields("key", "value"); FileTap ft = new FileTap(new KryoScheme(fields), new File(dir, "file1").getAbsolutePath(), SinkMode.REPLACE); TupleEntryCollector writer = ft.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key1", 12)); writer.add(new Tuple("key2", 21)); writer.close(); ft = new FileTap(new KryoScheme(fields), new File(dir, "file2").getAbsolutePath(), SinkMode.REPLACE); writer = ft.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key2", 22)); writer.add(new Tuple("key3", 31)); writer.close(); DirectoryTap dt = new DirectoryTap(new KryoScheme(fields), dir.getAbsolutePath()); TupleEntryIterator iter = dt.openForRead(new LocalFlowProcess()); assertEquals(new Tuple("key1", 11), iter.next().getTuple()); assertEquals(new Tuple("key1", 12), iter.next().getTuple()); assertEquals(new Tuple("key2", 21), iter.next().getTuple()); assertEquals(new Tuple("key2", 22), iter.next().getTuple()); assertEquals(new Tuple("key3", 31), iter.next().getTuple()); assertFalse(iter.hasNext()); iter.close(); } @Test public void testAsSink() throws Exception { final String dirPath = "build/test/DirectoryTapTest/testAsSink/"; DirectoryTap dt = new DirectoryTap(new TextLine(), dirPath, SinkMode.REPLACE); TupleEntryCollector writer = dt.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key1", 12)); writer.add(new Tuple("key2", 21)); writer.close(); // We should have a single file, called part-00000, in the output directory File dirFile = new File(dirPath); assertTrue(dirFile.exists()); assertTrue(dirFile.isDirectory()); File resultFile = new File(dirFile, "part-00000"); assertTrue(resultFile.exists()); List<String> lines = IOUtils.readLines(new FileInputStream(resultFile)); assertEquals(3, lines.size()); assertEquals("key1\t11", lines.get(0)); assertEquals("key1\t12", lines.get(1)); assertEquals("key2\t21", lines.get(2)); } @Test public void testAsTamplateTapSink() throws Exception { final String dirPath = "build/test/DirectoryTapTest/testAsTamplateTapSink/in"; DirectoryTap dt = new DirectoryTap(new KryoScheme(new Fields("key", "value")), dirPath, SinkMode.REPLACE); TupleEntryCollector writer = dt.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key1", 12)); writer.add(new Tuple("key2", 21)); writer.close(); // We should have a single file, called part-00000, in the directory. // We'll use that as input, and use TemplateTap for the output. Pipe p = new Pipe("pipe"); final String out = "build/test/DirectoryTapTest/testAsTamplateTapSink/out"; DirectoryTap parentTap = new DirectoryTap(new TextLine(), out, SinkMode.REPLACE); TemplateTap sinkTap = new TemplateTap(parentTap, "key-%s", new Fields("key")); Flow<?> f= new LocalFlowConnector().connect(dt, sinkTap, p); f.complete(); // We should have two files, called "key-key1" and "key-key2", in the output directory. File outDir = new File(out); assertTrue(outDir.exists()); assertTrue(outDir.isDirectory()); File resultFile = new File(outDir, "key-key1"); assertTrue(resultFile.exists()); List<String> lines = IOUtils.readLines(new FileInputStream(resultFile)); assertEquals(2, lines.size()); assertEquals("key1\t11", lines.get(0)); assertEquals("key1\t12", lines.get(1)); resultFile = new File(outDir, "key-key2"); assertTrue(resultFile.exists()); lines = IOUtils.readLines(new FileInputStream(resultFile)); assertEquals(1, lines.size()); assertEquals("key2\t21", lines.get(0)); } @Test public void testWithOneInputFile() throws Exception { final String dirPath = "build/test/DirectoryTapTest/testWithOneInputFile/"; DirectoryTap outTap = new DirectoryTap(new TextLine(), dirPath, SinkMode.REPLACE); TupleEntryCollector writer = outTap.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key2", 21)); writer.close(); // We should have a single file, called part-00000, in the output directory File dirFile = new File(dirPath); File resultFile = new File(dirFile, "part-00000"); DirectoryTap inTap = new DirectoryTap(new TextLine(), resultFile.getAbsolutePath(), SinkMode.KEEP); TupleEntryIterator iter = inTap.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1\t11", te.getString("line")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2\t21", te.getString("line")); assertFalse(iter.hasNext()); iter.close(); } @Test public void testIgnoreCRCFiles() throws Exception { final String dirPath = "build/test/DirectoryTapTest/testIgnoreCRCFiles/"; DirectoryTap outTap = new DirectoryTap(new TextLine(), dirPath, SinkMode.REPLACE); TupleEntryCollector writer = outTap.openForWrite(new LocalFlowProcess()); writer.add(new Tuple("key1", 11)); writer.add(new Tuple("key2", 21)); writer.close(); // We should have a single file, called part-00000, in the output directory. // Let's add a file called .part-00000.crc File dirFile = new File(dirPath); File crcFile = new File(dirFile, ".part-00000.crc"); crcFile.createNewFile(); DirectoryTap inTap = new DirectoryTap(new TextLine(), dirPath, SinkMode.KEEP); assertEquals(1, inTap.getNumChildTaps()); TupleEntryIterator iter = inTap.openForRead(new LocalFlowProcess()); assertTrue(iter.hasNext()); TupleEntry te = iter.next(); assertEquals("key1\t11", te.getString("line")); assertTrue(iter.hasNext()); te = iter.next(); assertEquals("key2\t21", te.getString("line")); assertFalse(iter.hasNext()); iter.close(); } }