package com.scaleunlimited.cascading;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.Random;
import org.junit.Test;
import com.scaleunlimited.cascading.StdDeviation;
import cascading.flow.Flow;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.pipe.Every;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.scheme.hadoop.SequenceFile;
import cascading.tap.SinkMode;
import cascading.tap.hadoop.Lfs;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
public class StdDeviationTest {
@Test
public void test() throws Exception {
// Set up input tuples that have two groups, each with 10K values.
final Fields groupField = new Fields("user");
final Fields testFields = new Fields("user", "value");
String in = "build/test/StatisticsTest/test/in";
String out = "build/test/GroupLimitTest/test/out";
Lfs sourceTap = new Lfs(new SequenceFile(testFields), in, SinkMode.REPLACE);
TupleEntryCollector write = sourceTap.openForWrite(new HadoopFlowProcess());
Random gen = new Random(1L);
for (int i = 0; i < 10000; i++) {
write.add(new Tuple("user1", gen.nextGaussian()));
}
gen = new Random(1L);
for (int i = 0; i < 10000; i++) {
write.add(new Tuple("user2", gen.nextGaussian()));
}
write.close();
Pipe pipe = new Pipe("test");
pipe = new GroupBy(pipe, groupField);
pipe = new Every(pipe, new Fields("value"), new StdDeviation(), Fields.ALL);
Lfs sinkTap = new Lfs(new SequenceFile(new Fields("user", StdDeviation.FIELD_NAME)), out, SinkMode.REPLACE);
Flow flow = new HadoopFlowConnector().connect(sourceTap, sinkTap, pipe);
flow.complete();
TupleEntryIterator iter = sinkTap.openForRead(new HadoopFlowProcess());
TupleEntry te = iter.next();
assertEquals("user1", te.getString("user"));
double stdDeviation = te.getDouble(StdDeviation.FIELD_NAME);
assertTrue(stdDeviation >= 0.988395);
assertTrue(stdDeviation <= 1.011883);
te = iter.next();
assertEquals("user2", te.getString("user"));
stdDeviation = te.getDouble(StdDeviation.FIELD_NAME);
assertTrue(stdDeviation >= 0.988395);
assertTrue(stdDeviation <= 1.011883);
assertFalse(iter.hasNext());
}
}