package com.scaleunlimited.cascading; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import org.junit.Test; import com.scaleunlimited.cascading.GroupLimit; import cascading.flow.Flow; import cascading.flow.hadoop.HadoopFlowConnector; import cascading.flow.hadoop.HadoopFlowProcess; import cascading.pipe.Every; import cascading.pipe.GroupBy; import cascading.pipe.Pipe; import cascading.scheme.hadoop.SequenceFile; import cascading.tap.SinkMode; import cascading.tap.hadoop.Lfs; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import cascading.tuple.TupleEntryIterator; public class GroupLimitTest { @Test public void test() throws Exception { final Fields groupField = new Fields("user"); final Fields sortField = new Fields("value"); final Fields testFields = new Fields("user", "value"); String in = "build/test/GroupLimitTest/test/in"; String out = "build/test/GroupLimitTest/test/out"; Lfs sourceTap = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE); TupleEntryCollector write = sourceTap.openForWrite(new HadoopFlowProcess()); write.add(new Tuple("user1", 1)); write.add(new Tuple("user1", 2)); write.add(new Tuple("user2", 1)); write.add(new Tuple("user2", 2)); write.add(new Tuple("user2", 3)); write.close(); Pipe pipe = new Pipe("test"); pipe = new GroupBy(pipe, groupField, sortField, true); pipe = new Every(pipe, new GroupLimit(1), Fields.RESULTS); Lfs sinkTap = new Lfs(new SequenceFile(testFields), out, SinkMode.REPLACE); Flow flow = new HadoopFlowConnector().connect(sourceTap, sinkTap, pipe); flow.complete(); TupleEntryIterator iter = sinkTap.openForRead(new HadoopFlowProcess()); TupleEntry te = iter.next(); assertEquals("user2", te.getString("user")); assertEquals(3, te.getInteger("value")); te = iter.next(); assertEquals("user1", te.getString("user")); assertEquals(2, te.getInteger("value")); assertFalse(iter.hasNext()); } }