/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.engine.planner.global;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.tajo.LocalTajoTestingUtility;
import org.apache.tajo.QueryId;
import org.apache.tajo.QueryIdFactory;
import org.apache.tajo.TajoTestingCluster;
import org.apache.tajo.algebra.Expr;
import org.apache.tajo.catalog.*;
import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.datum.Datum;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.TextDatum;
import org.apache.tajo.engine.parser.SQLAnalyzer;
import org.apache.tajo.engine.planner.LogicalOptimizer;
import org.apache.tajo.engine.planner.LogicalPlan;
import org.apache.tajo.engine.planner.LogicalPlanner;
import org.apache.tajo.engine.planner.PlanningException;
import org.apache.tajo.engine.planner.logical.GroupbyNode;
import org.apache.tajo.engine.planner.logical.JoinNode;
import org.apache.tajo.engine.planner.logical.LogicalNode;
import org.apache.tajo.engine.planner.logical.NodeType;
import org.apache.tajo.engine.query.QueryContext;
import org.apache.tajo.master.TajoMaster;
import org.apache.tajo.storage.*;
import org.apache.tajo.util.CommonTestingUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.Collection;
import static junit.framework.Assert.assertNotNull;
import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME;
import static org.apache.tajo.TajoConstants.DEFAULT_TABLESPACE_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TestBroadcastJoinPlan {
private TajoConf conf;
private final String TEST_PATH = "target/test-data/TestBroadcastJoinPlan";
private TajoTestingCluster util;
private CatalogService catalog;
private SQLAnalyzer analyzer;
private Path testDir;
private TableDesc smallTable1;
private TableDesc smallTable2;
private TableDesc smallTable3;
private TableDesc largeTable1;
private TableDesc largeTable2;
@Before
public void setUp() throws Exception {
util = new TajoTestingCluster();
conf = util.getConfiguration();
conf.setLongVar(TajoConf.ConfVars.DIST_QUERY_BROADCAST_JOIN_THRESHOLD, 500 * 1024);
conf.setBoolVar(TajoConf.ConfVars.DIST_QUERY_BROADCAST_JOIN_AUTO, true);
testDir = CommonTestingUtil.getTestDir(TEST_PATH);
catalog = util.startCatalogCluster().getCatalog();
catalog.createTablespace(DEFAULT_TABLESPACE_NAME, testDir.toUri().toString());
catalog.createDatabase(DEFAULT_DATABASE_NAME, DEFAULT_TABLESPACE_NAME);
util.getMiniCatalogCluster().getCatalogServer().reloadBuiltinFunctions(TajoMaster.initBuiltinFunctions());
Schema smallTable1Schema = new Schema();
smallTable1Schema.addColumn("small1_id", TajoDataTypes.Type.INT4);
smallTable1Schema.addColumn("small1_contents", TajoDataTypes.Type.TEXT);
smallTable1 = makeTestData("default.small1", smallTable1Schema, 10 * 1024);
Schema smallTable2Schema = new Schema();
smallTable2Schema.addColumn("small2_id", TajoDataTypes.Type.INT4);
smallTable2Schema.addColumn("small2_contents", TajoDataTypes.Type.TEXT);
smallTable2 = makeTestData("default.small2", smallTable2Schema, 10 * 1024);
Schema smallTable3Schema = new Schema();
smallTable3Schema.addColumn("small3_id", TajoDataTypes.Type.INT4);
smallTable3Schema.addColumn("small3_contents", TajoDataTypes.Type.TEXT);
smallTable3 = makeTestData("default.small3", smallTable3Schema, 10 * 1024);
Schema largeTable1Schema = new Schema();
largeTable1Schema.addColumn("large1_id", TajoDataTypes.Type.INT4);
largeTable1Schema.addColumn("large1_contents", TajoDataTypes.Type.TEXT);
largeTable1 = makeTestData("default.large1", largeTable1Schema, 1024 * 1024); //1M
Schema largeTable2Schema = new Schema();
largeTable2Schema.addColumn("large2_id", TajoDataTypes.Type.INT4);
largeTable2Schema.addColumn("large2_contents", TajoDataTypes.Type.TEXT);
largeTable2 = makeTestData("default.large2", largeTable2Schema, 1024 * 1024); //1M
catalog.createTable(smallTable1);
catalog.createTable(smallTable2);
catalog.createTable(largeTable1);
catalog.createTable(largeTable2);
analyzer = new SQLAnalyzer();
}
private TableDesc makeTestData(String tableName, Schema schema, int dataSize) throws Exception {
TableMeta tableMeta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.CSV);
Path dataPath = new Path(testDir, tableName + ".csv");
String contentsData = "";
for (int i = 0; i < 1000; i++) {
for (int j = 0; j < 10; j++) {
contentsData += j;
}
}
Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(tableMeta, schema,
dataPath);
appender.init();
Tuple tuple = new VTuple(schema.size());
int writtenSize = 0;
int count = 0;
while (true) {
TextDatum textDatum = DatumFactory.createText(count + "_" + contentsData);
tuple.put(new Datum[] {
DatumFactory.createInt4(count), textDatum });
appender.addTuple(tuple);
writtenSize += textDatum.size();
if (writtenSize >= dataSize) {
break;
}
}
appender.flush();
appender.close();
TableDesc tableDesc = CatalogUtil.newTableDesc(tableName, schema, tableMeta, dataPath);
TableStats tableStats = new TableStats();
FileSystem fs = dataPath.getFileSystem(conf);
tableStats.setNumBytes(fs.getFileStatus(dataPath).getLen());
tableDesc.setStats(tableStats);
return tableDesc;
}
@After
public void tearDown() throws Exception {
util.shutdownCatalogCluster();
}
@Test
public final void testBroadcastJoin() throws IOException, PlanningException {
String query = "select count(*) from large1 " +
"join small1 on large1_id = small1_id " +
"join small2 on small1_id = small2_id";
LogicalPlanner planner = new LogicalPlanner(catalog);
LogicalOptimizer optimizer = new LogicalOptimizer(conf);
Expr expr = analyzer.parse(query);
LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
optimizer.optimize(plan);
QueryId queryId = QueryIdFactory.newQueryId(System.currentTimeMillis(), 0);
QueryContext queryContext = new QueryContext();
MasterPlan masterPlan = new MasterPlan(queryId, queryContext, plan);
GlobalPlanner globalPlanner = new GlobalPlanner(conf, catalog);
globalPlanner.build(masterPlan);
/*
|-eb_1395714781593_0000_000007 (TERMINAL)
|-eb_1395714781593_0000_000006 (ROOT)
|-eb_1395714781593_0000_000005 (LEAF)
*/
ExecutionBlock terminalEB = masterPlan.getRoot();
assertEquals(1, masterPlan.getChildCount(terminalEB.getId()));
ExecutionBlock rootEB = masterPlan.getChild(terminalEB.getId(), 0);
assertEquals(1, masterPlan.getChildCount(rootEB.getId()));
ExecutionBlock leafEB = masterPlan.getChild(rootEB.getId(), 0);
assertNotNull(leafEB);
assertEquals(0, masterPlan.getChildCount(leafEB.getId()));
Collection<String> broadcastTables = leafEB.getBroadcastTables();
assertEquals(2, broadcastTables.size());
assertTrue(broadcastTables.contains("default.small1"));
assertTrue(broadcastTables.contains("default.small2"));
assertTrue(!broadcastTables.contains("default.large1"));
LogicalNode leafNode = leafEB.getPlan();
assertEquals(NodeType.GROUP_BY, leafNode.getType());
LogicalNode joinNode = ((GroupbyNode)leafNode).getChild();
assertEquals(NodeType.JOIN, joinNode.getType());
LogicalNode leftNode = ((JoinNode)joinNode).getLeftChild();
LogicalNode rightNode = ((JoinNode)joinNode).getRightChild();
assertEquals(NodeType.JOIN, leftNode.getType());
assertEquals(NodeType.SCAN, rightNode.getType());
LogicalNode lastLeftNode = ((JoinNode)leftNode).getLeftChild();
LogicalNode lastRightNode = ((JoinNode)leftNode).getRightChild();
assertEquals(NodeType.SCAN, lastLeftNode.getType());
assertEquals(NodeType.SCAN, lastRightNode.getType());
}
@Test
public final void testNotBroadcastJoinTwoLargeTable() throws IOException, PlanningException {
// This query is not broadcast join
String query = "select count(*) from large1 " +
"join large2 on large1_id = large2_id ";
LogicalPlanner planner = new LogicalPlanner(catalog);
LogicalOptimizer optimizer = new LogicalOptimizer(conf);
Expr expr = analyzer.parse(query);
LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
optimizer.optimize(plan);
QueryId queryId = QueryIdFactory.newQueryId(System.currentTimeMillis(), 0);
QueryContext queryContext = new QueryContext();
MasterPlan masterPlan = new MasterPlan(queryId, queryContext, plan);
GlobalPlanner globalPlanner = new GlobalPlanner(conf, catalog);
globalPlanner.build(masterPlan);
ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
while (ebCursor.hasNext()) {
ExecutionBlock eb = ebCursor.nextBlock();
Collection<String> broadcastTables = eb.getBroadcastTables();
assertTrue(broadcastTables == null || broadcastTables.isEmpty());
}
}
@Test
public final void testTwoBroadcastJoin() throws IOException, PlanningException {
String query = "select count(*) from large1 " +
"join small1 on large1_id = small1_id " +
"join large2 on large1_id = large2_id " +
"join small2 on large2_id = small2_id";
LogicalPlanner planner = new LogicalPlanner(catalog);
LogicalOptimizer optimizer = new LogicalOptimizer(conf);
Expr expr = analyzer.parse(query);
LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
optimizer.optimize(plan);
QueryId queryId = QueryIdFactory.newQueryId(System.currentTimeMillis(), 0);
QueryContext queryContext = new QueryContext();
MasterPlan masterPlan = new MasterPlan(queryId, queryContext, plan);
GlobalPlanner globalPlanner = new GlobalPlanner(conf, catalog);
globalPlanner.build(masterPlan);
/*
|-eb_1395736346625_0000_000009
|-eb_1395736346625_0000_000008 (GROUP-BY)
|-eb_1395736346625_0000_000007 (GROUP-BY, JOIN)
|-eb_1395736346625_0000_000006 (LEAF, JOIN)
|-eb_1395736346625_0000_000003 (LEAF, JOIN)
*/
ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
int index = 0;
while (ebCursor.hasNext()) {
ExecutionBlock eb = ebCursor.nextBlock();
if(index == 0) {
Collection<String> broadcastTables = eb.getBroadcastTables();
assertEquals(1, broadcastTables.size());
assertTrue(!broadcastTables.contains("default.large1"));
assertTrue(broadcastTables.contains("default.small1"));
} else if(index == 1) {
Collection<String> broadcastTables = eb.getBroadcastTables();
assertEquals(1, broadcastTables.size());
assertTrue(!broadcastTables.contains("default.large2"));
assertTrue(broadcastTables.contains("default.small2"));
}
index++;
}
assertEquals(5, index);
}
@Test
public final void testNotBroadcastJoinSubquery() throws IOException, PlanningException {
// This query is not broadcast join;
String query = "select count(*) from large1 " +
"join (select * from small1) a on large1_id = a.small1_id " +
"join small2 on a.small1_id = small2_id";
LogicalPlanner planner = new LogicalPlanner(catalog);
LogicalOptimizer optimizer = new LogicalOptimizer(conf);
Expr expr = analyzer.parse(query);
LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
optimizer.optimize(plan);
QueryId queryId = QueryIdFactory.newQueryId(System.currentTimeMillis(), 0);
QueryContext queryContext = new QueryContext();
MasterPlan masterPlan = new MasterPlan(queryId, queryContext, plan);
GlobalPlanner globalPlanner = new GlobalPlanner(conf, catalog);
globalPlanner.build(masterPlan);
/*
|-eb_1395749810370_0000_000007
|-eb_1395749810370_0000_000006 (GROUP-BY)
|-eb_1395749810370_0000_000005 (GROUP-BY, JOIN)
|-eb_1395749810370_0000_000004 (LEAF, SCAN, large1)
|-eb_1395749810370_0000_000003 (JOIN)
|-eb_1395749810370_0000_000002 (LEAF, SCAN, small2)
|-eb_1395749810370_0000_000001 (LEAF, TABLE_SUBQUERY, small1)
*/
ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
int index = 0;
while (ebCursor.hasNext()) {
ExecutionBlock eb = ebCursor.nextBlock();
Collection<String> broadcastTables = eb.getBroadcastTables();
assertTrue(broadcastTables == null || broadcastTables.isEmpty());
index++;
}
assertEquals(7, index);
}
@Test
public final void testBroadcastJoinSubquery() throws IOException, PlanningException {
String query = "select count(*) from large1 " +
"join (select * from small1) a on large1_id = a.small1_id " +
"join small2 on large1_id = small2_id";
LogicalPlanner planner = new LogicalPlanner(catalog);
LogicalOptimizer optimizer = new LogicalOptimizer(conf);
Expr expr = analyzer.parse(query);
LogicalPlan plan = planner.createPlan(LocalTajoTestingUtility.createDummySession(), expr);
optimizer.optimize(plan);
QueryId queryId = QueryIdFactory.newQueryId(System.currentTimeMillis(), 0);
QueryContext queryContext = new QueryContext();
MasterPlan masterPlan = new MasterPlan(queryId, queryContext, plan);
GlobalPlanner globalPlanner = new GlobalPlanner(conf, catalog);
globalPlanner.build(masterPlan);
/*
|-eb_1395794091662_0000_000007
|-eb_1395794091662_0000_000006
|-eb_1395794091662_0000_000005 (JOIN)
|-eb_1395794091662_0000_000004 (LEAF, SUBQUERY)
|-eb_1395794091662_0000_000003 (LEAF, JOIN)
*/
ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
int index = 0;
while (ebCursor.hasNext()) {
ExecutionBlock eb = ebCursor.nextBlock();
if(index == 0) {
//LEAF, JOIN
Collection<String> broadcastTables = eb.getBroadcastTables();
assertEquals(1, broadcastTables.size());
assertTrue(!broadcastTables.contains("default.large1"));
assertTrue(broadcastTables.contains("default.small2"));
} else if(index == 1) {
//LEAF, SUBQUERY
Collection<String> broadcastTables = eb.getBroadcastTables();
assertTrue(broadcastTables == null || broadcastTables.isEmpty());
} else if(index == 2) {
//JOIN
Collection<String> broadcastTables = eb.getBroadcastTables();
assertTrue(broadcastTables == null || broadcastTables.isEmpty());
}
index++;
}
assertEquals(5, index);
}
}