package edu.washington.escience.myria.operator;
import static org.junit.Assert.assertEquals;
import java.util.Arrays;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.util.TestEnvVars;
/**
* Tests SampleWR by verifying the results of various scenarios.
*/
public class SampleWRTest {
final long RANDOM_SEED = 42;
final int[] INPUT_VALS = {0, 1, 2, 3, 4, 5};
// values generated by rand.nextInt(INPUT_VALS.length) w/ seed=42
final int[] SEED_OUTPUT = {2, 3, 0, 2, 0, 1, 5, 2, 1, 5, 2, 2};
final Schema LEFT_SCHEMA =
Schema.ofFields(
"WorkerID",
Type.INT_TYPE,
"PartitionSize",
Type.INT_TYPE,
"SampleSize",
Type.INT_TYPE,
"SampleType",
Type.STRING_TYPE);
final Schema RIGHT_SCHEMA = Schema.ofFields(Type.INT_TYPE, "SomeValue");
final Schema OUTPUT_SCHEMA = RIGHT_SCHEMA;
TupleBatchBuffer leftInput;
TupleBatchBuffer rightInput;
Sample sampOp;
@Before
public void setup() {
leftInput = new TupleBatchBuffer(LEFT_SCHEMA);
leftInput.putInt(0, -1); // WorkerID for testing
rightInput = new TupleBatchBuffer(RIGHT_SCHEMA);
for (int val : INPUT_VALS) {
rightInput.putInt(0, val);
}
}
/** Sample size 0. */
@Test
public void testSampleSizeZero() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = 0;
int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize);
Arrays.sort(expected);
verifyExpectedResults(partitionSize, sampleSize, expected);
}
/** Sample size 1. */
@Test
public void testSampleSizeOne() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = 1;
int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize);
Arrays.sort(expected);
verifyExpectedResults(partitionSize, sampleSize, expected);
}
/** Sample size 50%. */
@Test
public void testSampleSizeHalf() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = INPUT_VALS.length / 2;
int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize);
Arrays.sort(expected);
verifyExpectedResults(partitionSize, sampleSize, expected);
}
/** Sample size all. */
@Test
public void testSampleSizeAll() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = INPUT_VALS.length;
int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize);
Arrays.sort(expected);
verifyExpectedResults(partitionSize, sampleSize, expected);
}
/** Sample size 200%. */
@Test
public void testSampleSizeDouble() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = INPUT_VALS.length * 2;
int[] expected = Arrays.copyOf(SEED_OUTPUT, sampleSize);
Arrays.sort(expected);
verifyExpectedResults(partitionSize, sampleSize, expected);
}
/** Cannot have a negative sample size. */
@Test(expected = IllegalStateException.class)
public void testSampleSizeNegative() throws DbException {
int partitionSize = INPUT_VALS.length;
int sampleSize = -1;
drainOperator(partitionSize, sampleSize);
}
/** Cannot have a negative partition size. */
@Test(expected = IllegalStateException.class)
public void testSamplePartitionNegative() throws DbException {
int partitionSize = -1;
int sampleSize = 3;
drainOperator(partitionSize, sampleSize);
}
@After
public void cleanup() throws DbException {
if (sampOp != null && sampOp.isOpen()) {
sampOp.close();
}
}
/** Tests the correctness of a sampling operation using a seeded value. */
private void verifyExpectedResults(int partitionSize, int sampleSize, int[] expected)
throws DbException {
leftInput.putInt(1, partitionSize);
leftInput.putInt(2, sampleSize);
leftInput.putString(3, "WithReplacement");
sampOp =
new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED);
sampOp.open(TestEnvVars.get());
int rowIdx = 0;
while (!sampOp.eos()) {
TupleBatch result = sampOp.nextReady();
if (result != null) {
assertEquals(OUTPUT_SCHEMA, result.getSchema());
for (int i = 0; i < result.numTuples(); ++i, ++rowIdx) {
assertEquals(result.getInt(0, i), expected[rowIdx]);
}
}
}
assertEquals(sampleSize, rowIdx);
}
/** Run through all results without doing anything. */
private void drainOperator(int partitionSize, int sampleSize) throws DbException {
leftInput.putInt(1, partitionSize);
leftInput.putInt(2, sampleSize);
leftInput.putString(3, "WithReplacement");
sampOp =
new Sample(new BatchTupleSource(leftInput), new BatchTupleSource(rightInput), RANDOM_SEED);
sampOp.open(TestEnvVars.get());
while (!sampOp.eos()) {
sampOp.nextReady();
}
}
}