package edu.washington.escience.myria.operator; import static org.junit.Assert.assertEquals; import org.junit.After; import org.junit.Before; import org.junit.Test; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleBatchBuffer; import edu.washington.escience.myria.util.TestEnvVars; /** * Tests SampleWoR by verifying the results of various scenarios. */ public class SampleWoRTest { final long RANDOM_SEED = 42; final int[] INPUT_VALS = {0, 1, 2, 3, 4, 5}; final Schema LEFT_SCHEMA = Schema.ofFields( "WorkerID", Type.INT_TYPE, "PartitionSize", Type.INT_TYPE, "SampleSize", Type.INT_TYPE, "SampleType", Type.STRING_TYPE); final Schema RIGHT_SCHEMA = Schema.ofFields(Type.INT_TYPE, "SomeValue"); final Schema OUTPUT_SCHEMA = RIGHT_SCHEMA; TupleBatchBuffer leftInput; TupleBatchBuffer rightInput; Sample sampOp; @Before public void setup() { leftInput = new TupleBatchBuffer(LEFT_SCHEMA); leftInput.putInt(0, -1); // WorkerID for testing rightInput = new TupleBatchBuffer(RIGHT_SCHEMA); for (int val : INPUT_VALS) { rightInput.putInt(0, val); } } /** Sample size 0. */ @Test public void testSampleSizeZero() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = 0; verifyExpectedResults(partitionSize, sampleSize); } /** Sample size 1. */ @Test public void testSampleSizeOne() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = 1; verifyExpectedResults(partitionSize, sampleSize); } /** Sample size 50%. */ @Test public void testSampleSizeHalf() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length / 2; verifyExpectedResults(partitionSize, sampleSize); } /** Sample size all. */ @Test public void testSampleSizeAll() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length; verifyExpectedResults(partitionSize, sampleSize); } /** Sample size greater than partition size. */ @Test(expected = IllegalStateException.class) public void testSampleSizeTooMany() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = INPUT_VALS.length + 1; drainOperator(partitionSize, sampleSize); } /** Cannot have a negative sample size. */ @Test(expected = IllegalStateException.class) public void testSampleSizeNegative() throws DbException { int partitionSize = INPUT_VALS.length; int sampleSize = -1; drainOperator(partitionSize, sampleSize); } /** Cannot have a negative partition size. */ @Test(expected = IllegalStateException.class) public void testSamplePartitionNegative() throws DbException { int partitionSize = -1; int sampleSize = 3; drainOperator(partitionSize, sampleSize); } @After public void cleanup() throws DbException { if (sampOp != null && sampOp.isOpen()) { sampOp.close(); } } /** * Tests whether the output could be a valid distribution. Note: doesn't * currently test for statistical randomness. */ private void verifyExpectedResults(int partitionSize, int sampleSize) throws DbException { leftInput.putInt(1, partitionSize); leftInput.putInt(2, sampleSize); leftInput.putString(3, "WithoutReplacement"); sampOp = new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED); sampOp.open(TestEnvVars.get()); int rowIdx = 0; while (!sampOp.eos()) { TupleBatch result = sampOp.nextReady(); if (result != null) { assertEquals(OUTPUT_SCHEMA, result.getSchema()); rowIdx += result.numTuples(); } } assertEquals(sampleSize, rowIdx); } /** Run through all results without doing anything. */ private void drainOperator(int partitionSize, int sampleSize) throws DbException { leftInput.putInt(1, partitionSize); leftInput.putInt(2, sampleSize); leftInput.putString(3, "WithoutReplacement"); sampOp = new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED); sampOp.open(TestEnvVars.get()); while (!sampOp.eos()) { sampOp.nextReady(); } } }