package com.scaleunlimited.cascading;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;
import static junit.framework.Assert.assertFalse;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import cascading.flow.Flow;
import cascading.flow.FlowProcess;
import cascading.flow.StepCounters;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.flow.local.LocalFlowConnector;
import cascading.flow.local.LocalFlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Filter;
import cascading.operation.FilterCall;
import cascading.operation.expression.ExpressionFilter;
import cascading.operation.state.Counter;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.pipe.assembly.SumBy;
import cascading.scheme.hadoop.SequenceFile;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Lfs;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntryCollector;
import com.scaleunlimited.cascading.local.DirectoryTap;
import com.scaleunlimited.cascading.local.KryoScheme;
public class FlowCountersTest {
private enum FlowCountersTestEnum {
TUPLE_COUNT,
UNUSED_COUNT,
BOGUS_COUNT,
PRE_BREAK_COUNT,
POST_BREAK_COUNT,
LEFT_COUNT,
RIGHT_COUNT
}
@SuppressWarnings("serial")
private static class CountTuplesFunction extends BaseOperation<NullContext> implements Filter<NullContext> {
@SuppressWarnings("rawtypes")
@Override
public boolean isRemove(FlowProcess flowProcess, FilterCall<NullContext> filterCall) {
flowProcess.increment(FlowCountersTestEnum.TUPLE_COUNT, 1);
return false;
}
}
@Test
public void testGetCounterKey() throws Throwable {
assertEquals( FlowCountersTestEnum.class.getName() + "." + FlowCountersTestEnum.TUPLE_COUNT.name(),
FlowCounters.getCounterKey(FlowCountersTestEnum.TUPLE_COUNT));
assertEquals( "group.counter",
FlowCounters.getCounterKey("group", "counter"));
}
@Test
public void testIsCounterKeyInGroup() throws Throwable {
assertTrue(FlowCounters.isCounterKeyInGroup("group.counter", "group"));
assertFalse(FlowCounters.isCounterKeyInGroup("group.counter", "group.counter"));
}
@Test
public void testGetCounterNameFromCounterKey() throws Throwable {
assertEquals( "counter",
FlowCounters.getCounterNameFromCounterKey( "group.counter",
"group"));
assertNull(FlowCounters.getCounterNameFromCounterKey( "group.counter",
"group.counter"));
}
@Test
@SuppressWarnings("rawtypes")
public void testCounters() throws Throwable {
final Fields testFields = new Fields("user", "value");
final int numDatums = 1;
final String testDir = "build/test/FlowCountersTest/testCounters/";
String in = testDir + "in";
Lfs sourceTap = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = sourceTap.openForWrite(new HadoopFlowProcess());
for (int i = 0; i < numDatums; i++) {
String username = "user-" + (i % 3);
write.add(new Tuple(username, i));
}
write.close();
Pipe pipe = new Pipe("test");
pipe = new Each(pipe, new CountTuplesFunction());
Tap sinkTap = new NullSinkTap(testFields);
Flow flow = new HadoopFlowConnector().connect(sourceTap, sinkTap, pipe);
Map<Enum, Long> counters = FlowCounters.run(flow, FlowCountersTestEnum.TUPLE_COUNT,
FlowCountersTestEnum.UNUSED_COUNT);
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.TUPLE_COUNT));
assertEquals(0, (long)counters.get(FlowCountersTestEnum.UNUSED_COUNT));
assertNull(counters.get(FlowCountersTestEnum.BOGUS_COUNT));
}
@Test
@SuppressWarnings("rawtypes")
public void testCountersWithLocalMode() throws Exception {
final int numDatums = 8;
Map<Enum, Long> counters = FlowCounters.run(makeCountersFlow(numDatums), FlowCountersTestEnum.PRE_BREAK_COUNT, FlowCountersTestEnum.POST_BREAK_COUNT,
FlowCountersTestEnum.LEFT_COUNT, FlowCountersTestEnum.RIGHT_COUNT);
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.PRE_BREAK_COUNT));
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.POST_BREAK_COUNT));
assertEquals(1, (long)counters.get(FlowCountersTestEnum.LEFT_COUNT));
assertEquals(2, (long)counters.get(FlowCountersTestEnum.RIGHT_COUNT));
// Do the same thing, but this time run it with no counters specified. We should get all of the same
// counters, plus a few more.
counters = FlowCounters.run(makeCountersFlow(numDatums));
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.PRE_BREAK_COUNT));
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.POST_BREAK_COUNT));
assertEquals(1, (long)counters.get(FlowCountersTestEnum.LEFT_COUNT));
assertEquals(2, (long)counters.get(FlowCountersTestEnum.RIGHT_COUNT));
assertEquals(numDatums, (long)counters.get(StepCounters.Tuples_Read));
// One more time, but now we pass in a single Enum
counters = FlowCounters.run(makeCountersFlow(numDatums), FlowCountersTestEnum.class);
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.PRE_BREAK_COUNT));
assertEquals(numDatums, (long)counters.get(FlowCountersTestEnum.POST_BREAK_COUNT));
assertEquals(1, (long)counters.get(FlowCountersTestEnum.LEFT_COUNT));
assertEquals(2, (long)counters.get(FlowCountersTestEnum.RIGHT_COUNT));
// But wait, now get back all counters as strings.
Map<String, Long> countersAsStrings = FlowCounters.runAndReturnAllCounters(makeCountersFlow(numDatums));
assertEquals(numDatums, (long)countersAsStrings.get(FlowCounters.getCounterKey(FlowCountersTestEnum.PRE_BREAK_COUNT)));
assertEquals(numDatums, (long)countersAsStrings.get(FlowCounters.getCounterKey(FlowCountersTestEnum.POST_BREAK_COUNT)));
assertEquals(1, (long)countersAsStrings.get(FlowCounters.getCounterKey(FlowCountersTestEnum.LEFT_COUNT)));
assertEquals(2, (long)countersAsStrings.get(FlowCounters.getCounterKey(FlowCountersTestEnum.RIGHT_COUNT)));
}
private Flow makeCountersFlow(int numDatums) throws IOException {
// We want to create a Flow with two tail pipes, and have each of the
// tail pipes set counters that we'll check.
final Fields testFields = new Fields("user", "value");
final String testDir = "build/test/FlowCountersTest/makeCountersFlow/";
String in = testDir + "in";
DirectoryTap sourceTap = new DirectoryTap(new KryoScheme(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = sourceTap.openForWrite(new LocalFlowProcess());
for (int i = 0; i < numDatums; i++) {
String username = "user-" + (i % 2);
write.add(new Tuple(username, i));
}
write.close();
Pipe headPipe = new Pipe("head");
headPipe = new Each(headPipe, new Counter(FlowCountersTestEnum.PRE_BREAK_COUNT));
headPipe = new Each(headPipe, new Counter(FlowCountersTestEnum.POST_BREAK_COUNT));
Pipe leftPipe = new Pipe("left", headPipe);
leftPipe = new Each(leftPipe, new Fields("value"), new ExpressionFilter("value != 0", Integer.class));
leftPipe = new Each(leftPipe, new Counter(FlowCountersTestEnum.LEFT_COUNT));
Pipe rightPipe = new Pipe("right", headPipe);
rightPipe = new Each(rightPipe, new Fields("value"), new ExpressionFilter("value == 0", Integer.class));
rightPipe = new SumBy(rightPipe, new Fields("user"), new Fields("value"), new Fields("sum"), Integer.class);
// We have two different users, so that's how many unique user name+sum values we should be getting.
rightPipe = new Each(rightPipe, new Counter(FlowCountersTestEnum.RIGHT_COUNT));
Map<String, Tap> sinks = new HashMap<String, Tap>();
sinks.put(leftPipe.getName(), new NullSinkTap());
sinks.put(rightPipe.getName(), new NullSinkTap());
Flow flow = new LocalFlowConnector().connect(sourceTap, sinks, leftPipe, rightPipe);
return flow;
}
}