/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.physical.impl.join; import org.apache.drill.BaseTestQuery; import org.apache.drill.common.util.TestTools; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.planner.physical.PlannerSettings; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestRule; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Random; public class TestMergeJoinAdvanced extends BaseTestQuery { @Rule public final TestRule TIMEOUT = TestTools.getTimeoutRule(120000); // Longer timeout than usual. // Have to disable hash join to test merge join in this class @BeforeClass public static void disableMergeJoin() throws Exception { test("alter session set `planner.enable_hashjoin` = false"); } @AfterClass public static void enableMergeJoin() throws Exception { test("alter session set `planner.enable_hashjoin` = true"); } @Test public void testJoinWithDifferentTypesInCondition() throws Exception { String query = "select t1.full_name from cp.`employee.json` t1, cp.`department.json` t2 " + "where cast(t1.department_id as double) = t2.department_id and t1.employee_id = 1"; testBuilder() .sqlQuery(query) .optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = true") .unOrdered() .baselineColumns("full_name") .baselineValues("Sheri Nowmer") .go(); query = "select t1.bigint_col from cp.`jsoninput/implicit_cast_join_1.json` t1, cp.`jsoninput/implicit_cast_join_1.json` t2 " + " where t1.bigint_col = cast(t2.bigint_col as int) and" + // join condition with bigint and int " t1.double_col = cast(t2.double_col as float) and" + // join condition with double and float " t1.bigint_col = cast(t2.bigint_col as double)"; // join condition with bigint and double testBuilder() .sqlQuery(query) .optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = true") .unOrdered() .baselineColumns("bigint_col") .baselineValues(1l) .go(); query = "select count(*) col1 from " + "(select t1.date_opt from cp.`parquet/date_dictionary.parquet` t1, cp.`parquet/timestamp_table.parquet` t2 " + "where t1.date_opt = t2.timestamp_col)"; // join condition contains date and timestamp testBuilder() .sqlQuery(query) .unOrdered() .baselineColumns("col1") .baselineValues(4l) .go(); } @Test @Ignore // TODO file JIRA to fix this public void testFix2967() throws Exception { setSessionOption(PlannerSettings.BROADCAST.getOptionName(), "false"); setSessionOption(PlannerSettings.HASHJOIN.getOptionName(), "false"); setSessionOption(ExecConstants.SLICE_TARGET, "1"); setSessionOption(ExecConstants.MAX_WIDTH_PER_NODE_KEY, "23"); final String TEST_RES_PATH = TestTools.getWorkingPath() + "/src/test/resources"; try { test("select * from dfs_test.`%s/join/j1` j1 left outer join dfs_test.`%s/join/j2` j2 on (j1.c_varchar = j2.c_varchar)", TEST_RES_PATH, TEST_RES_PATH); } finally { setSessionOption(PlannerSettings.BROADCAST.getOptionName(), String.valueOf(PlannerSettings.BROADCAST.getDefault().bool_val)); setSessionOption(PlannerSettings.HASHJOIN.getOptionName(), String.valueOf(PlannerSettings.HASHJOIN.getDefault().bool_val)); setSessionOption(ExecConstants.SLICE_TARGET, String.valueOf(ExecConstants.SLICE_TARGET_DEFAULT)); setSessionOption(ExecConstants.MAX_WIDTH_PER_NODE_KEY, String.valueOf(ExecConstants.MAX_WIDTH_PER_NODE.getDefault().num_val)); } } private static void generateData(final BufferedWriter leftWriter, final BufferedWriter rightWriter, final long left, final long right) throws IOException { for (int i=0; i < left; ++i) { leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10000, i)); } leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10001, 10001)); leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10002, 10002)); for (int i=0; i < right; ++i) { rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10000, i)); } rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10004, 10004)); rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10005, 10005)); rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10006, 10006)); leftWriter.close(); rightWriter.close(); } private static void testMultipleBatchJoin(final long right, final long left, final String joinType, final long expected) throws Exception { final String leftSide = BaseTestQuery.getTempDir("merge-join-left.json"); final String rightSide = BaseTestQuery.getTempDir("merge-join-right.json"); final BufferedWriter leftWriter = new BufferedWriter(new FileWriter(new File(leftSide))); final BufferedWriter rightWriter = new BufferedWriter(new FileWriter(new File(rightSide))); generateData(leftWriter, rightWriter, left, right); final String query1 = String.format("select count(*) c1 from dfs_test.`%s` L %s join dfs_test.`%s` R on L.k=R.k1", leftSide, joinType, rightSide); testBuilder() .sqlQuery(query1) .optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false") .unOrdered() .baselineColumns("c1") .baselineValues(expected) .go(); } @Test public void testMergeInnerJoinLargeRight() throws Exception { testMultipleBatchJoin(1000l, 5000l, "inner", 5000l * 1000l); } @Test public void testMergeLeftJoinLargeRight() throws Exception { testMultipleBatchJoin(1000l, 5000l, "left", 5000l * 1000l +2l); } @Test public void testMergeRightJoinLargeRight() throws Exception { testMultipleBatchJoin(1000l, 5000l, "right", 5000l*1000l +3l); } @Test public void testMergeInnerJoinLargeLeft() throws Exception { testMultipleBatchJoin(5000l, 1000l, "inner", 5000l*1000l); } @Test public void testMergeLeftJoinLargeLeft() throws Exception { testMultipleBatchJoin(5000l, 1000l, "left", 5000l*1000l + 2l); } @Test public void testMergeRightJoinLargeLeft() throws Exception { testMultipleBatchJoin(5000l, 1000l, "right", 5000l*1000l + 3l); } // Following tests can take some time. @Test @Ignore public void testMergeInnerJoinRandomized() throws Exception { final Random r = new Random(); final long right = r.nextInt(10001) + 1l; final long left = r.nextInt(10001) + 1l; testMultipleBatchJoin(left, right, "inner", left*right); } @Test @Ignore public void testMergeLeftJoinRandomized() throws Exception { final Random r = new Random(); final long right = r.nextInt(10001) + 1l; final long left = r.nextInt(10001) + 1l; testMultipleBatchJoin(left, right, "left", left*right + 2l); } @Test @Ignore public void testMergeRightJoinRandomized() throws Exception { final Random r = new Random(); final long right = r.nextInt(10001) + 1l; final long left = r.nextInt(10001) + 1l; testMultipleBatchJoin(left, right, "right", left * right + 3l); } @Test public void testDrill4165() throws Exception { final String query1 = "select count(*) cnt from cp.`tpch/lineitem.parquet` l1, cp.`tpch/lineitem.parquet` l2 where l1.l_partkey = l2.l_partkey and l1.l_suppkey < 30 and l2.l_suppkey < 30"; testBuilder() .sqlQuery(query1) .unOrdered() .baselineColumns("cnt") .baselineValues(202452l) .go(); } @Test public void testDrill4196() throws Exception { final String leftSide = BaseTestQuery.getTempDir("merge-join-left.json"); final String rightSide = BaseTestQuery.getTempDir("merge-join-right.json"); final BufferedWriter leftWriter = new BufferedWriter(new FileWriter(new File(leftSide))); final BufferedWriter rightWriter = new BufferedWriter(new FileWriter(new File(rightSide))); // output batch is 32k, create 60k left batch leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 9999, 9999)); for (int i=0; i < 6000; ++i) { leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10000, 10000)); } leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10001, 10001)); leftWriter.write(String.format("{ \"k\" : %d , \"v\": %d }", 10002, 10002)); // Keep all values same. Jon will consume entire right side. for (int i=0; i < 800; ++i) { rightWriter.write(String.format("{ \"k1\" : %d , \"v1\": %d }", 10000, 10000)); } leftWriter.close(); rightWriter.close(); final String query1 = String.format("select count(*) c1 from dfs_test.`%s` L %s join dfs_test.`%s` R on L.k=R.k1", leftSide, "inner", rightSide); testBuilder() .sqlQuery(query1) .optionSettingQueriesForTestQuery("alter session set `planner.enable_hashjoin` = false") .unOrdered() .baselineColumns("c1") .baselineValues(6000*800L) .go(); } }