/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinInfo;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import java.util.ArrayList;
import java.util.List;
/**
* Planner rule that creates a {@code SemiJoinRule} from a
* {@link org.apache.calcite.rel.core.Join} on top of a
* {@link org.apache.calcite.rel.logical.LogicalAggregate}.
*
* TODO Remove this rule and use Calcite's SemiJoinRule. Not possible currently
* since Calcite doesnt use RelBuilder for this rule and we want to generate HiveSemiJoin rel here.
*/
public class HiveSemiJoinRule extends RelOptRule {
public static final HiveSemiJoinRule INSTANCE = new HiveSemiJoinRule(HiveRelFactories.HIVE_BUILDER);
protected static final Logger LOG = LoggerFactory.getLogger(HiveSemiJoinRule.class);
private HiveSemiJoinRule(RelBuilderFactory relBuilder) {
super(
operand(Project.class,
some(
operand(Join.class,
some(operand(RelNode.class, any()),
operand(Aggregate.class, any()))))), relBuilder, null);
}
@Override public void onMatch(RelOptRuleCall call) {
LOG.debug("Matched HiveSemiJoinRule");
final Project project = call.rel(0);
final Join join = call.rel(1);
final RelNode left = call.rel(2);
final Aggregate aggregate = call.rel(3);
final RelOptCluster cluster = join.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final ImmutableBitSet bits =
RelOptUtil.InputFinder.bits(project.getProjects(), null);
final ImmutableBitSet rightBits =
ImmutableBitSet.range(left.getRowType().getFieldCount(),
join.getRowType().getFieldCount());
if (bits.intersects(rightBits)) {
return;
}
final JoinInfo joinInfo = join.analyzeCondition();
if (!joinInfo.rightSet().equals(
ImmutableBitSet.range(aggregate.getGroupCount()))) {
// Rule requires that aggregate key to be the same as the join key.
// By the way, neither a super-set nor a sub-set would work.
return;
}
if(join.getJoinType() == JoinRelType.LEFT) {
// since for LEFT join we are only interested in rows from LEFT we can get rid of right side
call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build());
return;
}
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (!joinInfo.isEqui()) {
return;
}
LOG.debug("All conditions matched for HiveSemiJoinRule. Going to apply transformation.");
final List<Integer> newRightKeyBuilder = Lists.newArrayList();
final List<Integer> aggregateKeys = aggregate.getGroupSet().asList();
for (int key : joinInfo.rightKeys) {
newRightKeyBuilder.add(aggregateKeys.get(key));
}
final ImmutableIntList newRightKeys =
ImmutableIntList.copyOf(newRightKeyBuilder);
final RelNode newRight = aggregate.getInput();
final RexNode newCondition =
RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight,
newRightKeys, rexBuilder);
RelNode semi = null;
//HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input
// is not expected further down the pipeline. see jira for more details
if(aggregate.getInput() instanceof HepRelVertex
&& ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof Join) {
Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel());
List<RexNode> projects = new ArrayList<>();
for(int i=0; i<rightJoin.getRowType().getFieldCount(); i++){
projects.add(rexBuilder.makeInputRef(rightJoin, i));
}
RelNode topProject = call.builder().push(rightJoin).project(projects, rightJoin.getRowType().getFieldNames(), true).build();
semi = call.builder().push(left).push(topProject).semiJoin(newCondition).build();
}
else {
semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build();
}
call.transformTo(call.builder().push(semi).project(project.getProjects(), project.getRowType().getFieldNames()).build());
}
}
// End SemiJoinRule.java