package edu.washington.escience.myria.operator; import static org.junit.Assert.assertEquals; import java.util.LinkedList; import java.util.List; import org.junit.Test; import com.google.common.collect.HashMultiset; import com.google.common.collect.ImmutableList; import com.google.common.collect.Multiset; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.expression.CastExpression; import edu.washington.escience.myria.expression.Expression; import edu.washington.escience.myria.expression.TypeExpression; import edu.washington.escience.myria.expression.VariableExpression; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleBatchBuffer; import edu.washington.escience.myria.util.TestEnvVars; import edu.washington.escience.myria.util.TestUtils; public class UnionAllTest { @Test public void testUnionAllConstructorWithNull() throws DbException { BatchTupleSource[] children = new BatchTupleSource[1]; children[0] = new BatchTupleSource(TestUtils.generateRandomTuples(10, 1000, false)); UnionAll union = new UnionAll(null); union.setChildren(children); } @Test public void testUnionAllCorrectTuples() throws DbException { TupleBatchBuffer[] randomTuples = new TupleBatchBuffer[2]; randomTuples[0] = TestUtils.generateRandomTuples(12300, 5000, false); randomTuples[1] = TestUtils.generateRandomTuples(4200, 2000, false); BatchTupleSource[] children = new BatchTupleSource[2]; children[0] = new BatchTupleSource(randomTuples[0]); children[1] = new BatchTupleSource(randomTuples[1]); UnionAll union = new UnionAll(children); union.open(TestEnvVars.get()); TupleBatch tb; Multiset<Long> actualCounts = HashMultiset.create(); while (!union.eos()) { tb = union.nextReady(); if (tb != null) { assertEquals(union.getSchema(), tb.getSchema()); for (int i = 0; i < tb.numTuples(); i++) { long index = tb.getLong(0, i); actualCounts.add(index); } } } union.close(); Multiset<Long> expectedCounts = HashMultiset.create(); for (TupleBatchBuffer randomTuple : randomTuples) { for (TupleBatch tuples : randomTuple.getAll()) { for (int j = 0; j < tuples.numTuples(); j++) { Long index = tuples.getLong(0, j); expectedCounts.add(index); } } } for (Multiset.Entry<Long> expectedEntry : expectedCounts.entrySet()) { assertEquals(expectedEntry.getCount(), actualCounts.count(expectedEntry.getElement())); } } @Test public void testUnionAllCount() throws DbException { BatchTupleSource[] children = new BatchTupleSource[3]; children[0] = new BatchTupleSource(TestUtils.generateRandomTuples(12300, 5000, false)); children[1] = new BatchTupleSource(TestUtils.generateRandomTuples(4200, 2000, false)); children[2] = new BatchTupleSource(TestUtils.generateRandomTuples(19900, 5000, false)); UnionAll union = new UnionAll(children); union.open(TestEnvVars.get()); TupleBatch tb = null; int count = 0; while (!union.eos()) { tb = union.nextReady(); if (tb != null) { assertEquals(union.getSchema(), tb.getSchema()); count += tb.numTuples(); } } union.close(); assertEquals(12300 + 4200 + 19900, count); } @Test public void testUnionAllVaryingSchemas() throws DbException { BatchTupleSource[] children = new BatchTupleSource[3]; children[0] = new BatchTupleSource(TestUtils.generateRandomTuples(12300, 5000, false)); children[1] = new BatchTupleSource(TestUtils.generateRandomTuples(4200, 2000, false)); /* Child 2 will have tuples with different names */ TupleBatchBuffer tuples2 = TestUtils.generateRandomTuples(19900, 5000, false); Schema normalSchema = tuples2.getSchema(); List<String> renames = new LinkedList<>(); for (String s : tuples2.getSchema().getColumnNames()) { renames.add(s + "_2only"); } Schema renamedSchema = new Schema(normalSchema.getColumnTypes(), renames); TupleBatchBuffer tuples2renamed = new TupleBatchBuffer(renamedSchema); for (TupleBatch tb : tuples2.getAll()) { tuples2renamed.appendTB(tb.rename(renames)); } children[2] = new BatchTupleSource(tuples2renamed); UnionAll union = new UnionAll(children); union.open(TestEnvVars.get()); TupleBatch tb = null; int count = 0; while (!union.eos()) { tb = union.nextReady(); if (tb != null) { assertEquals(union.getSchema(), tb.getSchema()); count += tb.numTuples(); } } union.close(); assertEquals(12300 + 4200 + 19900, count); } @Test(expected = IllegalArgumentException.class) public void testUnionIncompatibleSchemas() throws DbException { Operator[] children = new Operator[3]; // range always returns INT_TYPE children[0] = new BatchTupleSource(TestUtils.range(5)); children[1] = new BatchTupleSource(TestUtils.range(50)); /* Child 2 will have tuples with different type -- cast int to long */ children[2] = new Apply( new BatchTupleSource(TestUtils.range(50)), ImmutableList.of( new Expression( "long", new CastExpression( new VariableExpression(0), new TypeExpression(Type.LONG_TYPE))))); UnionAll union = new UnionAll(children); union.open(TestEnvVars.get(2)); } }