package edu.washington.escience.myria.operator.apply;
import static org.junit.Assert.assertEquals;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Stream;
import org.junit.Test;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.expression.ConstantExpression;
import edu.washington.escience.myria.expression.Expression;
import edu.washington.escience.myria.expression.ExpressionOperator;
import edu.washington.escience.myria.expression.SplitExpression;
import edu.washington.escience.myria.expression.VariableExpression;
import edu.washington.escience.myria.operator.Apply;
import edu.washington.escience.myria.operator.BatchTupleSource;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.util.TestEnvVars;
public class CartesianProductApplyTest {
private final String SEPARATOR = ",";
@Test
public void testApply() throws DbException {
final Schema schema = Schema.ofFields("col_1", Type.STRING_TYPE, "col_2", Type.STRING_TYPE);
final Schema expectedResultSchema =
Schema.ofFields("col_1_split", Type.STRING_TYPE, "col_2_split", Type.STRING_TYPE);
final TupleBatchBuffer input = new TupleBatchBuffer(schema);
final ImmutableList<String> input1 = ImmutableList.of("a", "b", "c");
final ImmutableList<String> input2 = ImmutableList.of("d", "e", "f");
Stream<List<String>> inputPairsStream =
input1.stream().flatMap(i1 -> input2.stream().map(i2 -> ImmutableList.of(i1, i2)));
Iterator<List<String>> inputPairs = inputPairsStream.iterator();
input.putString(0, Joiner.on(SEPARATOR).join(input1));
input.putString(1, Joiner.on(SEPARATOR).join(input2));
ImmutableList.Builder<Expression> Expressions = ImmutableList.builder();
ExpressionOperator regex = new ConstantExpression(SEPARATOR);
ExpressionOperator col1splits = new SplitExpression(new VariableExpression(0), regex);
ExpressionOperator col2splits = new SplitExpression(new VariableExpression(1), regex);
Expressions.add(new Expression("col_1_split", col1splits));
Expressions.add(new Expression("col_2_split", col2splits));
Apply apply = new Apply(new BatchTupleSource(input), Expressions.build());
apply.open(TestEnvVars.get());
int rowIdx = 0;
while (!apply.eos()) {
TupleBatch result = apply.nextReady();
if (result != null) {
assertEquals(expectedResultSchema, result.getSchema());
for (int batchIdx = 0; batchIdx < result.numTuples(); ++batchIdx, ++rowIdx) {
List<String> pair = inputPairs.next();
assertEquals(pair.get(0), result.getString(0, batchIdx));
assertEquals(pair.get(1), result.getString(1, batchIdx));
}
}
}
assertEquals(input1.size() * input2.size(), rowIdx);
apply.close();
}
}