/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.join;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.TestBuilder;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.junit.Ignore;
import org.junit.Test;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
public class TestMergeJoinWithSchemaChanges extends BaseTestQuery {
@Test
//@Ignore
public void testNumericTypes() throws Exception {
final File left_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-left"));
final File right_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-right"));
left_dir.mkdirs();
right_dir.mkdirs();
// First create data for numeric types.
// left side int and float vs right side float
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(left_dir, "l1.json")));
for (int i = 0; i < 5000; ++i) {
writer.write(String.format("{ \"kl\" : %d , \"vl\": %d }\n", i, i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(left_dir, "l2.json")));
for (int i = 1000; i < 6000; ++i) {
writer.write(String.format("{ \"kl\" : %f , \"vl\": %f }\n", (float) i, (float) i));
}
writer.close();
// right side is int and float
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r1.json")));
for (int i = 2000; i < 7000; ++i) {
writer.write(String.format("{ \"kr\" : %d , \"vr\": %d }\n", i, i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r2.json")));
for (int i = 3000; i < 8000; ++i) {
writer.write(String.format("{ \"kr\" : %f, \"vr\": %f }\n", (float) i, (float) i));
}
writer.close();
// INNER JOIN
String query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "inner", right_dir.toPath().toString());
TestBuilder builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr");
for (long i = 2000; i < 3000; ++i) {
builder.baselineValues(i, i, i, i);
builder.baselineValues((double)i, (double)i, i, i);
}
for (long i = 3000; i < 5000; ++i) {
builder.baselineValues(i, i, i, i);
builder.baselineValues(i, i, (double)i, (double)i);
builder.baselineValues((double)i, (double)i, i, i);
builder.baselineValues((double)i, (double)i, (double)i, (double)i);
}
for (long i = 5000; i < 6000; ++i) {
builder.baselineValues((double)i, (double)i, i, i);
builder.baselineValues((double) i, (double) i, (double) i, (double) i);
}
builder.go();
// LEFT JOIN
query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "left", right_dir.toPath().toString());
builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr");
for (long i = 0; i < 2000; ++i) {
builder.baselineValues(i, i, null, null);
}
for (long i = 1000; i < 2000; ++i) {
builder.baselineValues((double)i, (double)i, null, null);
}
for (long i = 2000; i < 3000; ++i) {
builder.baselineValues(i, i, i, i);
builder.baselineValues((double)i, (double)i, i, i);
}
for (long i = 3000; i < 5000; ++i) {
builder.baselineValues(i, i, i, i);
builder.baselineValues(i, i, (double)i, (double)i);
builder.baselineValues((double)i, (double)i, i, i);
builder.baselineValues((double)i, (double)i, (double)i, (double)i);
}
for (long i = 5000; i < 6000; ++i) {
builder.baselineValues((double) i, (double)i, i, i);
builder.baselineValues((double)i, (double)i, (double)i, (double)i);
}
builder.go();
}
@Test
//@Ignore
public void testNumericStringTypes() throws Exception {
final File left_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-left"));
final File right_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-right"));
left_dir.mkdirs();
right_dir.mkdirs();
// left side int and strings
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(left_dir, "l1.json")));
for (int i = 0; i < 5000; ++i) {
writer.write(String.format("{ \"kl\" : %d , \"vl\": %d }\n", i, i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(left_dir, "l2.json")));
for (int i = 1000; i < 6000; ++i) {
writer.write(String.format("{ \"kl\" : \"%s\" , \"vl\": \"%s\" }\n", i, i));
}
writer.close();
// right side is float and strings
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r1.json")));
for (int i = 2000; i < 7000; ++i) {
writer.write(String.format("{ \"kr\" : %f , \"vr\": %f }\n", (float)i, (float)i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r2.json")));
for (int i = 3000; i < 8000; ++i) {
writer.write(String.format("{ \"kr\" : \"%s\", \"vr\": \"%s\" }\n", i, i));
}
writer.close();
// INNER JOIN
String query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "inner", right_dir.toPath().toString());
TestBuilder builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr");
for (long i = 2000; i < 5000; ++i) {
builder.baselineValues(i, i, (double)i, (double)i);
}
for (long i = 3000; i < 6000; ++i) {
final String d = Long.toString(i);
builder.baselineValues(d, d, d, d);
}
builder.go();
// RIGHT JOIN
query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "right", right_dir.toPath().toString());
builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr");
for (long i = 2000; i < 5000; ++i) {
builder.baselineValues(i, i, (double)i, (double)i);
}
for (long i = 3000; i < 6000; ++i) {
final String d = Long.toString(i);
builder.baselineValues(d, d, d, d);
}
for (long i = 5000; i < 7000; ++i) {
builder.baselineValues(null, null, (double)i, (double)i);
}
for (long i = 6000; i < 8000; ++i) {
final String d = Long.toString(i);
builder.baselineValues(null, null, d, d);
}
builder.go();
}
@Test
//@Ignore
public void testMissingAndNewColumns() throws Exception {
final File left_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-left"));
final File right_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-right"));
left_dir.mkdirs();
right_dir.mkdirs();
System.out.println(left_dir);
System.out.println(right_dir);
// missing column kl
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(left_dir, "l1.json")));
for (int i = 0; i < 50; ++i) {
writer.write(String.format("{ \"kl1\" : %d , \"vl1\": %d }\n", i, i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(left_dir, "l2.json")));
for (int i = 50; i < 100; ++i) {
writer.write(String.format("{ \"kl\" : %d , \"vl\": %d }\n", i, i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(left_dir, "l3.json")));
for (int i = 100; i < 150; ++i) {
writer.write(String.format("{ \"kl2\" : %d , \"vl2\": %d }\n", i, i));
}
writer.close();
// right missing column kr
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r1.json")));
for (int i = 0; i < 50; ++i) {
writer.write(String.format("{ \"kr1\" : %f , \"vr1\": %f }\n", (float)i, (float)i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r2.json")));
for (int i = 50; i < 100; ++i) {
writer.write(String.format("{ \"kr\" : %f , \"vr\": %f }\n", (float)i, (float)i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r3.json")));
for (int i = 100; i < 150; ++i) {
writer.write(String.format("{ \"kr2\" : %f , \"vr2\": %f }\n", (float)i, (float)i));
}
writer.close();
// INNER JOIN
String query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "inner", right_dir.toPath().toString());
TestBuilder builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr", "kl1", "vl1", "kl2", "vl2", "kr1", "vr1", "kr2", "vr2");
for (long i = 50; i < 100; ++i) {
builder.baselineValues(i, i, (double)i, (double)i, null, null, null, null, null, null, null, null);
}
builder.go();
// LEFT JOIN
query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "left", right_dir.toPath().toString());
builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr", "kl1", "vl1", "kl2", "vl2", "kr1", "vr1", "kr2", "vr2");
for (long i = 0; i < 50; ++i) {
builder.baselineValues(null, null, null, null, i, i, null, null, null, null, null, null);
}
for (long i = 50; i < 100; ++i) {
builder.baselineValues(i, i, (double)i, (double)i, null, null, null, null, null, null, null, null);
}
for (long i = 100; i < 150; ++i) {
builder.baselineValues(null, null, null, null, null, null, i, i, null, null, null, null);
}
builder.go();
// RIGHT JOIN
query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kr",
left_dir.toPath().toString(), "right", right_dir.toPath().toString());
builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kr", "vr", "kl1", "vl1", "kl2", "vl2", "kr1", "vr1", "kr2", "vr2");
for (long i = 0; i < 50; ++i) {
builder.baselineValues(null, null, null, null, null, null, null, null, (double)i, (double)i, null, null);
}
for (long i = 50; i < 100; ++i) {
builder.baselineValues(i, i, (double)i, (double)i, null, null, null, null, null, null, null, null);
}
for (long i = 100; i < 150; ++i) {
builder.baselineValues(null, null, null, null, null, null, null, null, null, null, (double)i, (double)i);
}
builder.go();
}
@Test
//@Ignore
public void testOneSideSchemaChanges() throws Exception {
final File left_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-left"));
final File right_dir = new File(BaseTestQuery.getTempDir("mergejoin-schemachanges-right"));
left_dir.mkdirs();
right_dir.mkdirs();
System.out.println(left_dir);
System.out.println(right_dir);
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(left_dir, "l1.json")));
for (int i = 0; i < 50; ++i) {
writer.write(String.format("{ \"kl\" : %d , \"vl\": %d }\n", i, i));
}
for (int i = 50; i < 100; ++i) {
writer.write(String.format("{ \"kl\" : %f , \"vl\": %f }\n", (float) i, (float) i));
}
writer.close();
writer = new BufferedWriter(new FileWriter(new File(right_dir, "r1.json")));
for (int i = 0; i < 50; ++i) {
writer.write(String.format("{ \"kl\" : %d , \"vl\": %d }\n", i, i));
}
writer.close();
String query = String.format("select * from dfs_test.`%s` L %s join dfs_test.`%s` R on L.kl=R.kl",
left_dir.toPath().toString(), "inner", right_dir.toPath().toString());
TestBuilder builder = testBuilder()
.sqlQuery(query)
.optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false; alter session set `exec.enable_union_type` = true")
.unOrdered()
.baselineColumns("kl", "vl", "kl0", "vl0");
for (long i = 0; i < 50; ++i) {
builder.baselineValues(i, i, i, i);
}
builder.go();
}
}