/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.hep.HepRelVertex;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.metadata.BuiltInMetadata;
import org.apache.calcite.rel.metadata.MetadataDef;
import org.apache.calcite.rel.metadata.MetadataHandler;
import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.BitSets;
import org.apache.calcite.util.BuiltInMethod;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.plan.ColStatistics;
public class HiveRelMdUniqueKeys implements MetadataHandler<BuiltInMetadata.UniqueKeys> {
public static final RelMetadataProvider SOURCE =
ReflectiveRelMetadataProvider.reflectiveSource(
BuiltInMethod.UNIQUE_KEYS.method, new HiveRelMdUniqueKeys());
@Override
public MetadataDef<BuiltInMetadata.UniqueKeys> getDef() {
return BuiltInMetadata.UniqueKeys.DEF;
}
/*
* Infer Uniquenes if: - rowCount(col) = ndv(col) - TBD for numerics: max(col)
* - min(col) = rowCount(col)
*
* Why are we intercepting Project and not TableScan? Because if we
* have a method for TableScan, it will not know which columns to check for.
* Inferring Uniqueness for all columns is very expensive right now. The flip
* side of doing this is, it only works post Field Trimming.
*/
public Set<ImmutableBitSet> getUniqueKeys(Project rel, RelMetadataQuery mq, boolean ignoreNulls) {
HiveTableScan tScan = getTableScan(rel.getInput(), false);
if (tScan == null) {
// If HiveTableScan is not found, e.g., not sequence of Project and
// Filter operators, execute the original getUniqueKeys method
// LogicalProject maps a set of rows to a different set;
// Without knowledge of the mapping function(whether it
// preserves uniqueness), it is only safe to derive uniqueness
// info from the child of a project when the mapping is f(a) => a.
//
// Further more, the unique bitset coming from the child needs
// to be mapped to match the output of the project.
final Map<Integer, Integer> mapInToOutPos = new HashMap<>();
final List<RexNode> projExprs = rel.getProjects();
final Set<ImmutableBitSet> projUniqueKeySet = new HashSet<>();
// Build an input to output position map.
for (int i = 0; i < projExprs.size(); i++) {
RexNode projExpr = projExprs.get(i);
if (projExpr instanceof RexInputRef) {
mapInToOutPos.put(((RexInputRef) projExpr).getIndex(), i);
}
}
if (mapInToOutPos.isEmpty()) {
// if there's no RexInputRef in the projected expressions
// return empty set.
return projUniqueKeySet;
}
Set<ImmutableBitSet> childUniqueKeySet =
mq.getUniqueKeys(rel.getInput(), ignoreNulls);
if (childUniqueKeySet != null) {
// Now add to the projUniqueKeySet the child keys that are fully
// projected.
for (ImmutableBitSet colMask : childUniqueKeySet) {
ImmutableBitSet.Builder tmpMask = ImmutableBitSet.builder();
boolean completeKeyProjected = true;
for (int bit : colMask) {
if (mapInToOutPos.containsKey(bit)) {
tmpMask.set(mapInToOutPos.get(bit));
} else {
// Skip the child unique key if part of it is not
// projected.
completeKeyProjected = false;
break;
}
}
if (completeKeyProjected) {
projUniqueKeySet.add(tmpMask.build());
}
}
}
return projUniqueKeySet;
}
Map<Integer, Integer> posMap = new HashMap<Integer, Integer>();
int projectPos = 0;
int colStatsPos = 0;
BitSet projectedCols = new BitSet();
for (RexNode r : rel.getProjects()) {
if (r instanceof RexInputRef) {
projectedCols.set(((RexInputRef) r).getIndex());
posMap.put(colStatsPos, projectPos);
colStatsPos++;
}
projectPos++;
}
double numRows = tScan.getRows();
List<ColStatistics> colStats = tScan.getColStat(BitSets
.toList(projectedCols));
Set<ImmutableBitSet> keys = new HashSet<ImmutableBitSet>();
colStatsPos = 0;
for (ColStatistics cStat : colStats) {
boolean isKey = false;
if (cStat.getCountDistint() >= numRows) {
isKey = true;
}
if ( !isKey && cStat.getRange() != null &&
cStat.getRange().maxValue != null &&
cStat.getRange().minValue != null) {
double r = cStat.getRange().maxValue.doubleValue() -
cStat.getRange().minValue.doubleValue() + 1;
isKey = (Math.abs(numRows - r) < RelOptUtil.EPSILON);
}
if ( isKey ) {
ImmutableBitSet key = ImmutableBitSet.of(posMap.get(colStatsPos));
keys.add(key);
}
colStatsPos++;
}
return keys;
}
/*
* traverse a path of Filter, Projects to get to the TableScan.
* In case of Unique keys, stop if you reach a Project, it will be handled
* by the invocation on the Project.
* In case of getting the base rowCount of a Path, keep going past a Project.
*/
static HiveTableScan getTableScan(RelNode r, boolean traverseProject) {
while (r != null && !(r instanceof HiveTableScan)) {
if (r instanceof HepRelVertex) {
r = ((HepRelVertex) r).getCurrentRel();
} else if (r instanceof Filter) {
r = ((Filter) r).getInput();
} else if (traverseProject && r instanceof Project) {
r = ((Project) r).getInput();
} else {
r = null;
}
}
return r == null ? null : (HiveTableScan) r;
}
}