package edu.washington.escience.myria.operator;
import static org.junit.Assert.assertEquals;
import java.nio.file.Paths;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import edu.washington.escience.myria.CsvTupleReader;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.io.FileSource;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.storage.TupleUtils;
import edu.washington.escience.myria.util.TestEnvVars;
public class LeapFrogJoinTest {
@Test
public void testLeapFrogJoinOnMultipleTBInBuffer() throws DbException {
final Schema schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.STRING_TYPE), ImmutableList.of("id2", "name2"));
TupleBatchBuffer leftTbb = new TupleBatchBuffer(schema);
TupleBatchBuffer rightTbb = new TupleBatchBuffer(schema);
for (int i = 0; i < 2; ++i) {
leftTbb.putLong(0, 0);
leftTbb.putString(1, "hello world");
}
for (int i = 0; i < TupleUtils.getBatchSize(Type.DOUBLE_TYPE) + 1; ++i) {
rightTbb.putLong(0, 0);
rightTbb.putString(1, "hello world");
}
BatchTupleSource[] children = new BatchTupleSource[2];
children[0] = new BatchTupleSource(leftTbb);
children[1] = new BatchTupleSource(rightTbb);
final ImmutableList<String> outputColumnNames =
ImmutableList.of("id1", "name1", "id2", "name2");
final Schema outputSchema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.STRING_TYPE, Type.LONG_TYPE, Type.STRING_TYPE),
outputColumnNames);
int[][][] fieldMap = new int[][][] {{{0, 0}, {1, 0}}};
int[][] outputMap = new int[][] {{0, 0}, {0, 1}, {1, 0}, {1, 1}};
NAryOperator join = new LeapFrogJoin(children, fieldMap, outputMap, outputColumnNames, null);
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(2 * (TupleUtils.getBatchSize(Type.DOUBLE_TYPE) + 1), batches.numTuples());
}
@Test
public void testLeapFrogUsingTwitterTriangularJoin() throws DbException {
final Schema r_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("r_x", "r_y"));
final Schema s_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("s_y", "s_z"));
final Schema t_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("t_z", "t_x"));
/* read data from files. */
final String realFilename = Paths.get("testdata", "twitter", "TwitterK.csv").toString();
TupleSource dataInputR =
new TupleSource(new CsvTupleReader(r_schema), new FileSource(realFilename));
TupleSource dataInputS =
new TupleSource(new CsvTupleReader(s_schema), new FileSource(realFilename));
TupleSource dataInputT =
new TupleSource(new CsvTupleReader(t_schema), new FileSource(realFilename));
/* order the tables. */
InMemoryOrderBy orderR =
new InMemoryOrderBy(dataInputR, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderS =
new InMemoryOrderBy(dataInputS, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderT =
new InMemoryOrderBy(dataInputT, new int[] {1, 0}, new boolean[] {true, true});
/* leapfrog join. */
int[][][] fieldMap = new int[][][] {{{0, 0}, {2, 1}}, {{0, 1}, {1, 0}}, {{1, 1}, {2, 0}}};
int[][] outputMap = new int[][] {{0, 0}, {0, 1}, {1, 1}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x", "y", "z");
final Schema outputSchema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderR, orderS, orderT},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {true, true, true});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(16826, batches.numTuples());
}
@Test
public void binaryJoinWithMoreColumns() throws DbException {
/* Query: Result(x,y,z) :- R(x,y),T(x,y,z). */
final Schema r_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("r_x", "r_y"));
final Schema t_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE),
ImmutableList.of("t_x", "t_z", "t_y"));
/* read data from files. */
final String r_path = Paths.get("testdata", "multiwayjoin", "R.csv").toString();
final String t_path = Paths.get("testdata", "multiwayjoin", "T.csv").toString();
TupleSource dataInputR = new TupleSource(new CsvTupleReader(r_schema), new FileSource(r_path));
TupleSource dataInputT = new TupleSource(new CsvTupleReader(t_schema), new FileSource(t_path));
/* order the tables. */
InMemoryOrderBy orderR =
new InMemoryOrderBy(dataInputR, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderT =
new InMemoryOrderBy(dataInputT, new int[] {0, 1}, new boolean[] {true, true});
/* leapfrog join. */
int[][][] fieldMap = new int[][][] {{{0, 0}, {1, 0}}, {{0, 1}, {1, 2}}};
int[][] outputMap = new int[][] {{0, 0}, {0, 1}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x", "y");
final Schema outputSchema =
new Schema(ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderR, orderT},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {false, false});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(0, batches.numTuples());
}
@Test
public void strangeTriangle() throws DbException {
/* Query: Result(x,y,z) :- R(x,y),S(y,z),T(x,y,z). */
final Schema r_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("r_x", "r_y"));
final Schema s_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("s_y", "s_z"));
final Schema t_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE),
ImmutableList.of("t_x", "t_y", "t_z"));
/* read data from files. */
final String r_path = Paths.get("testdata", "multiwayjoin", "R.csv").toString();
final String s_path = Paths.get("testdata", "multiwayjoin", "S.csv").toString();
final String t_path = Paths.get("testdata", "multiwayjoin", "T.csv").toString();
TupleSource dataInputR = new TupleSource(new CsvTupleReader(r_schema), new FileSource(r_path));
TupleSource dataInputS = new TupleSource(new CsvTupleReader(s_schema), new FileSource(s_path));
TupleSource dataInputT = new TupleSource(new CsvTupleReader(t_schema), new FileSource(t_path));
/* order the tables. */
InMemoryOrderBy orderR =
new InMemoryOrderBy(dataInputR, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderS =
new InMemoryOrderBy(dataInputS, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderT =
new InMemoryOrderBy(dataInputT, new int[] {0, 1, 2}, new boolean[] {true, true, true});
/* leapfrog join. */
int[][][] fieldMap =
new int[][][] {{{0, 0}, {2, 0}}, {{0, 1}, {1, 0}, {2, 1}}, {{1, 1}, {2, 2}}};
int[][] outputMap = new int[][] {{0, 0}, {0, 1}, {1, 1}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x", "y", "z");
final Schema outputSchema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderR, orderS, orderT},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {true, true, false});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(8, batches.numTuples());
}
@Test
public void strangeRectangle() throws DbException {
/* Rectangle(x,y,z,p) :- R(x,y),S(y,z),T(z,p),K(p,x),M(x,y,z). */
final Schema r_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("r_x", "r_y"));
final Schema s_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("s_y", "s_z"));
final Schema t_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("t_z", "t_p"));
final Schema k_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("k_p", "k_x"));
final Schema m_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE),
ImmutableList.of("m_x", "m_y", "m_z"));
/* read data from files. */
final String r_path = Paths.get("testdata", "multiwayjoin", "rectangles.csv").toString();
final String m_path = Paths.get("testdata", "multiwayjoin", "rec_2_hop.csv").toString();
TupleSource dataInputR = new TupleSource(new CsvTupleReader(r_schema), new FileSource(r_path));
TupleSource dataInputS = new TupleSource(new CsvTupleReader(s_schema), new FileSource(r_path));
TupleSource dataInputT = new TupleSource(new CsvTupleReader(t_schema), new FileSource(r_path));
TupleSource dataInputK = new TupleSource(new CsvTupleReader(k_schema), new FileSource(r_path));
TupleSource dataInputM = new TupleSource(new CsvTupleReader(m_schema), new FileSource(m_path));
/* order the tables. */
InMemoryOrderBy orderR =
new InMemoryOrderBy(dataInputR, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderS =
new InMemoryOrderBy(dataInputS, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderT =
new InMemoryOrderBy(dataInputT, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderK =
new InMemoryOrderBy(dataInputK, new int[] {1, 0}, new boolean[] {true, true});
InMemoryOrderBy orderM =
new InMemoryOrderBy(dataInputM, new int[] {0, 1, 2}, new boolean[] {true, true, true});
/* leapfrog join, Rectangle(x,y,z,p) :- R(x,y),S(y,z),T(z,p),K(p,x),M(x,y,z). */
int[][][] fieldMap =
new int[][][] {
{{0, 0}, {3, 1}, {4, 0}},
{{0, 1}, {1, 0}, {4, 1}},
{{1, 1}, {2, 0}, {4, 2}},
{{2, 1}, {3, 0}}
};
int[][] outputMap = new int[][] {{0, 0}, {0, 1}, {1, 1}, {2, 1}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x", "y", "z", "p");
final Schema outputSchema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE),
outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderR, orderS, orderT, orderK, orderM},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {false, false, false, false, true});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(4, batches.numTuples());
}
@Test
public void tridentQuery() throws DbException {
/* Query: Result(x,y,z) :- R(x),S(x),T(y,x). */
final Schema r_schema =
new Schema(ImmutableList.of(Type.INT_TYPE), ImmutableList.of("film_id_a"));
final Schema s_schema =
new Schema(ImmutableList.of(Type.INT_TYPE), ImmutableList.of("film_id_b"));
final Schema t_schema =
new Schema(
ImmutableList.of(Type.INT_TYPE, Type.INT_TYPE),
ImmutableList.of("film_id", "perform_id"));
/* read data from files, note */
final String r_path = Paths.get("testdata", "multiwayjoin", "fa.csv").toString();
final String s_path = Paths.get("testdata", "multiwayjoin", "fb.csv").toString();
final String t_path = Paths.get("testdata", "multiwayjoin", "pf.csv").toString();
TupleSource dataInputR = new TupleSource(new CsvTupleReader(r_schema), new FileSource(r_path));
TupleSource dataInputS = new TupleSource(new CsvTupleReader(s_schema), new FileSource(s_path));
TupleSource dataInputT = new TupleSource(new CsvTupleReader(t_schema), new FileSource(t_path));
/* order the tables. */
InMemoryOrderBy orderR = new InMemoryOrderBy(dataInputR, new int[] {0}, new boolean[] {true});
InMemoryOrderBy orderS = new InMemoryOrderBy(dataInputS, new int[] {0}, new boolean[] {true});
InMemoryOrderBy orderT = new InMemoryOrderBy(dataInputT, new int[] {1}, new boolean[] {true});
/* leapfrog join. */
int[][][] fieldMap = new int[][][] {{{0, 0}, {1, 0}, {2, 1}}};
int[][] outputMap = new int[][] {{0, 0}, {2, 0}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x", "y");
final Schema outputSchema =
new Schema(ImmutableList.of(Type.INT_TYPE, Type.INT_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderR, orderS, orderT},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {false, false, false});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(2, batches.numTuples());
}
@Test
public void intersect3tables() throws DbException {
/* I(x) :- A(x),B(x),C(x). */
final Schema a_schema = new Schema(ImmutableList.of(Type.INT_TYPE), ImmutableList.of("a_x"));
final Schema b_schema = new Schema(ImmutableList.of(Type.INT_TYPE), ImmutableList.of("b_x"));
final Schema c_schema = new Schema(ImmutableList.of(Type.INT_TYPE), ImmutableList.of("c_x"));
/* read data from files, note */
final String a_path = Paths.get("testdata", "multiwayjoin", "a.csv").toString();
final String b_path = Paths.get("testdata", "multiwayjoin", "b.csv").toString();
final String c_path = Paths.get("testdata", "multiwayjoin", "c.csv").toString();
TupleSource dataInputA = new TupleSource(new CsvTupleReader(a_schema), new FileSource(a_path));
TupleSource dataInputB = new TupleSource(new CsvTupleReader(b_schema), new FileSource(b_path));
TupleSource dataInputC = new TupleSource(new CsvTupleReader(c_schema), new FileSource(c_path));
/* order the tables. */
InMemoryOrderBy orderA = new InMemoryOrderBy(dataInputA, new int[] {0}, new boolean[] {true});
InMemoryOrderBy orderB = new InMemoryOrderBy(dataInputB, new int[] {0}, new boolean[] {true});
InMemoryOrderBy orderC = new InMemoryOrderBy(dataInputC, new int[] {0}, new boolean[] {true});
/* leapfrog join. */
int[][][] fieldMap = new int[][][] {{{0, 0}, {1, 0}, {2, 0}}};
int[][] outputMap = new int[][] {{0, 0}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("x");
final Schema outputSchema = new Schema(ImmutableList.of(Type.INT_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderA, orderB, orderC},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {false, false, true});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(2, batches.numTuples());
}
@Test
public void outputFreeVariable() throws DbException {
/* Result(x):- o(k,x), p(x,y), q(y,z) */
final Schema o_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("o_k", "o_x"));
final Schema p_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("p_x", "p_y"));
final Schema q_schema =
new Schema(
ImmutableList.of(Type.LONG_TYPE, Type.LONG_TYPE), ImmutableList.of("q_y", "q_z"));
/* order the tables. */
final String o_path = Paths.get("testdata", "multiwayjoin", "o.csv").toString();
final String p_path = Paths.get("testdata", "multiwayjoin", "p.csv").toString();
final String q_path = Paths.get("testdata", "multiwayjoin", "q.csv").toString();
TupleSource dataInputO = new TupleSource(new CsvTupleReader(o_schema), new FileSource(o_path));
TupleSource dataInputP = new TupleSource(new CsvTupleReader(p_schema), new FileSource(p_path));
TupleSource dataInputQ = new TupleSource(new CsvTupleReader(q_schema), new FileSource(q_path));
InMemoryOrderBy orderO = new InMemoryOrderBy(dataInputO, new int[] {1}, new boolean[] {true});
InMemoryOrderBy orderP =
new InMemoryOrderBy(dataInputP, new int[] {0, 1}, new boolean[] {true, true});
InMemoryOrderBy orderQ = new InMemoryOrderBy(dataInputQ, new int[] {0}, new boolean[] {true});
/* leapfrog join. */
int[][][] fieldMap = new int[][][] {{{0, 1}, {1, 0}}, {{1, 1}, {2, 0}}};
int[][] outputMap = new int[][] {{0, 0}};
final ImmutableList<String> outputColumnNames = ImmutableList.of("k");
final Schema outputSchema = new Schema(ImmutableList.of(Type.LONG_TYPE), outputColumnNames);
LeapFrogJoin join =
new LeapFrogJoin(
new Operator[] {orderO, orderP, orderQ},
fieldMap,
outputMap,
outputColumnNames,
new boolean[] {false, false, false});
join.open(TestEnvVars.get());
TupleBatch tb;
TupleBatchBuffer batches = new TupleBatchBuffer(outputSchema);
while (!join.eos()) {
tb = join.nextReady();
if (tb != null) {
batches.appendTB(tb);
}
}
join.close();
assertEquals(9, batches.numTuples());
}
}