package edu.washington.escience.myria.operator; import static org.junit.Assert.assertEquals; import java.util.Arrays; import org.junit.After; import org.junit.Before; import org.junit.Test; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleBatchBuffer; import edu.washington.escience.myria.util.TestEnvVars; /** * Tests SampleWR by verifying the results of various scenarios. */ public class SampleWRTest { final long RANDOM_SEED = 42; final int[] INPUT_VALS = {0, 1, 2, 3, 4, 5}; // values generated by rand.nextInt(INPUT_VALS.length) w/ seed=42 final int[] SEED_OUTPUT = {2, 3, 0, 2, 0, 1, 5, 2, 1, 5, 2, 2}; final Schema LEFT_SCHEMA = Schema.ofFields( "WorkerID", Type.INT_TYPE, "PartitionSize", Type.INT_TYPE, "SampleSize", Type.INT_TYPE, "SampleType", Type.STRING_TYPE); final Schema RIGHT_SCHEMA = Schema.ofFields(Type.INT_TYPE, "SomeValue"); final Schema OUTPUT_SCHEMA = RIGHT_SCHEMA; TupleBatchBuffer leftInput; TupleBatchBuffer rightInput; Sample sampOp; @Before public void setup() { leftInput = new TupleBatchBuffer(LEFT_SCHEMA); leftInput.putInt(0, -1); // WorkerID for testing rightInput = new TupleBatchBuffer(RIGHT_SCHEMA); for (int val : INPUT_VALS) { rightInput.putInt(0, val); } } /** Sample size 0. */ @Test public void testSampleSizeZero() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = 0; int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize); Arrays.sort(expected); verifyExpectedResults(partitionSize, sampleSize, expected); } /** Sample size 1. */ @Test public void testSampleSizeOne() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = 1; int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize); Arrays.sort(expected); verifyExpectedResults(partitionSize, sampleSize, expected); } /** Sample size 50%. */ @Test public void testSampleSizeHalf() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length / 2; int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize); Arrays.sort(expected); verifyExpectedResults(partitionSize, sampleSize, expected); } /** Sample size all. */ @Test public void testSampleSizeAll() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length; int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize); Arrays.sort(expected); verifyExpectedResults(partitionSize, sampleSize, expected); } /** Sample size 200%. */ @Test public void testSampleSizeDouble() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length * 2; int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize); Arrays.sort(expected); verifyExpectedResults(partitionSize, sampleSize, expected); } /** Cannot have a negative sample size. */ @Test(expected = IllegalStateException.class) public void testSampleSizeNegative() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = -1; drainOperator(partitionSize, sampleSize); } /** Cannot have a negative partition size. */ @Test(expected = IllegalStateException.class) public void testSamplePartitionNegative() throws DbException { int partitionSize = -1; int sampleSize = 3; drainOperator(partitionSize, sampleSize); } @After public void cleanup() throws DbException { if (sampOp != null && sampOp.isOpen()) { sampOp.close(); } } /** Tests the correctness of a sampling operation using a seeded value. */ private void verifyExpectedResults(int partitionSize, int sampleSize, int[] expected) throws DbException { leftInput.putInt(1, partitionSize); leftInput.putInt(2, sampleSize); leftInput.putString(3, "WithReplacement"); sampOp = new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED); sampOp.open(TestEnvVars.get()); int rowIdx = 0; while (!sampOp.eos()) { TupleBatch result = sampOp.nextReady(); if (result != null) { assertEquals(OUTPUT_SCHEMA, result.getSchema()); for (int i = 0; i < result.numTuples(); ++i, ++rowIdx) { assertEquals(result.getInt(0, i), expected[rowIdx]); } } } assertEquals(sampleSize, rowIdx); } /** Run through all results without doing anything. */ private void drainOperator(int partitionSize, int sampleSize) throws DbException { leftInput.putInt(1, partitionSize); leftInput.putInt(2, sampleSize); leftInput.putString(3, "WithReplacement"); sampOp = new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED); sampOp.open(TestEnvVars.get()); while (!sampOp.eos()) { sampOp.nextReady(); } } }