/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.cube.parse;
import static java.util.stream.Collectors.toSet;
import static org.apache.lens.cube.parse.CubeQueryConfUtil.*;
import static org.apache.hadoop.hive.ql.parse.HiveParser.*;
import static com.google.common.base.Preconditions.checkArgument;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Predicate;
import org.apache.lens.cube.error.LensCubeErrorCode;
import org.apache.lens.cube.error.NoCandidateDimAvailableException;
import org.apache.lens.cube.error.NoCandidateFactAvailableException;
import org.apache.lens.cube.metadata.*;
import org.apache.lens.cube.metadata.join.TableRelationship;
import org.apache.lens.cube.parse.join.AutoJoinContext;
import org.apache.lens.cube.parse.join.JoinClause;
import org.apache.lens.cube.parse.join.JoinTree;
import org.apache.lens.cube.parse.join.JoinUtils;
import org.apache.lens.server.api.error.LensException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.*;
import org.apache.hadoop.util.ReflectionUtils;
import org.codehaus.jackson.map.ObjectMapper;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import lombok.*;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class CubeQueryContext extends TracksQueriedColumns implements QueryAST, TrackDenormContext {
static final String TIME_RANGE_FUNC = "time_range_in";
public static final String NOW = "now";
static final String DEFAULT_TABLE = "_default_";
@Getter
private final ASTNode ast;
@Getter
private final QB qb;
private String clauseName = null;
@Getter
private final Configuration conf;
@Getter
private final List<TimeRange> timeRanges;
// metadata
@Getter
private CubeInterface cube;
// Dimensions accessed in the query, contains dimensions that are joinchain destinations
// of the joinchains used.
@Getter
protected Set<Dimension> dimensions = new HashSet<Dimension>();
// The dimensions accessed by name in the query directly, via tablename.columname
@Getter
protected Set<Dimension> nonChainedDimensions = new HashSet<Dimension>();
// Joinchains accessed in the query
@Getter
protected Map<String, JoinChain> joinchains = new HashMap<String, JoinChain>();
@Getter
private final Set<String> queriedMsrs = new HashSet<String>();
@Getter
private final Set<String> queriedExprs = new HashSet<String>();
private final Set<String> queriedTimeDimCols = new LinkedHashSet<String>();
@Getter
private final Set<String> queriedExprsWithMeasures = new HashSet<String>();
@Getter
// Mapping of a qualified column name to its table alias
private final Map<String, String> colToTableAlias = new HashMap<>();
/**
* This is the set of working Candidates that gets updated during different phases of
* query resolution. Each {@link ContextRewriter} may add/remove/update Candiadtes in
* this working set and from the final set of Candidates single {@link #pickedCandidate}
* is chosen.
*/
@Getter
private final Set<Candidate> candidates = new HashSet<>();
@Getter
// would be added through join chains and de-normalized resolver
protected Map<Aliased<Dimension>, OptionalDimCtx> optionalDimensionMap = new HashMap<>();
// Alias to table object mapping of tables accessed in this query
@Getter
private final Map<String, AbstractCubeTable> cubeTbls = new HashMap<>();
void addSelectPhrase(SelectPhraseContext sel) {
selectPhrases.add(sel);
addQueriedPhrase(sel);
}
boolean isColumnAnAlias(String col) {
return selectPhrases.stream().map(SelectPhraseContext::getActualAlias).anyMatch(Predicate.isEqual(col));
}
void addQueriedPhrase(QueriedPhraseContext qur) {
queriedPhrases.add(qur);
qur.setPosition(queriedPhrases.size() -1);
}
@Getter
private final List<SelectPhraseContext> selectPhrases = new ArrayList<>();
@Getter
private final List<QueriedPhraseContext> queriedPhrases = new ArrayList<>();
// Join conditions used in all join expressions
@Getter
private final Map<QBJoinTree, String> joinConds = new HashMap<>();
@Getter
protected final Map<Dimension, Set<CandidateDim>> candidateDims = new HashMap<>();
// query trees
@Getter
@Setter
private ASTNode havingAST;
@Getter
@Setter
private ASTNode selectAST;
// Will be set after the Fact is picked and time ranges replaced
@Getter
@Setter
private ASTNode whereAST;
@Getter
@Setter
private ASTNode orderByAST;
// Setter is used in promoting the select when promotion is on.
@Getter
@Setter
private ASTNode groupByAST;
@Getter
private CubeMetastoreClient metastoreClient;
@Getter
@Setter
private AutoJoinContext autoJoinCtx;
@Getter
@Setter
private ExpressionResolver.ExpressionResolverContext exprCtx;
@Getter
@Setter
private DenormalizationResolver.DenormalizationContext deNormCtx;
@Getter
private PruneCauses<Candidate> storagePruningMsgs = new PruneCauses<>();
@Getter
private Map<Dimension, PruneCauses<CubeDimensionTable>> dimPruningMsgs =
new HashMap<Dimension, PruneCauses<CubeDimensionTable>>();
@Setter
@Getter
private String fromString;
@Getter
private TimeRangeWriter rangeWriter = null;
public CubeQueryContext(ASTNode ast, QB qb, Configuration queryConf, HiveConf metastoreConf)
throws LensException {
this.ast = ast;
this.qb = qb;
this.conf = queryConf;
this.clauseName = getClause();
this.timeRanges = new ArrayList<>();
try {
metastoreClient = CubeMetastoreClient.getInstance(metastoreConf);
} catch (HiveException e) {
throw new LensException(e);
}
if (qb.getParseInfo().getWhrForClause(clauseName) != null) {
this.whereAST = qb.getParseInfo().getWhrForClause(clauseName);
}
if (qb.getParseInfo().getHavingForClause(clauseName) != null) {
this.havingAST = qb.getParseInfo().getHavingForClause(clauseName);
}
if (qb.getParseInfo().getOrderByForClause(clauseName) != null) {
this.orderByAST = qb.getParseInfo().getOrderByForClause(clauseName);
}
if (qb.getParseInfo().getGroupByForClause(clauseName) != null) {
this.groupByAST = qb.getParseInfo().getGroupByForClause(clauseName);
}
if (qb.getParseInfo().getSelForClause(clauseName) != null) {
this.selectAST = qb.getParseInfo().getSelForClause(clauseName);
}
extractMetaTables();
this.rangeWriter = ReflectionUtils.newInstance(conf.getClass(CubeQueryConfUtil.TIME_RANGE_WRITER_CLASS,
CubeQueryConfUtil.DEFAULT_TIME_RANGE_WRITER, TimeRangeWriter.class), conf);
}
boolean hasCubeInQuery() {
return cube != null;
}
private void extractMetaTables() throws LensException {
List<String> tabAliases = new ArrayList<String>(qb.getTabAliases());
Set<String> missing = new HashSet<String>();
for (String alias : tabAliases) {
boolean added = addQueriedTable(alias);
if (!added) {
missing.add(alias);
}
}
for (String alias : missing) {
// try adding them as joinchains
boolean added = addJoinChain(alias, false);
if (!added) {
log.info("Queried tables do not exist. Missing table:{}", alias);
throw new LensException(LensCubeErrorCode.NEITHER_CUBE_NOR_DIMENSION.getLensErrorInfo());
}
}
}
private boolean addJoinChain(String alias, boolean isOptional) throws LensException {
boolean retVal = false;
String aliasLowerCaseStr = alias.toLowerCase();
JoinChain joinchain = null;
if (getCube() != null) {
JoinChain chainByName = getCube().getChainByName(aliasLowerCaseStr);
if (chainByName != null) {
joinchain = chainByName;
retVal = true;
}
}
if (!retVal) {
for (Dimension table : dimensions) {
JoinChain chainByName = table.getChainByName(aliasLowerCaseStr);
if (chainByName != null) {
joinchain = chainByName;
retVal = true;
break;
}
}
}
if (retVal) {
joinchains.put(aliasLowerCaseStr, new JoinChain(joinchain));
String destTable = joinchain.getDestTable();
boolean added = addQueriedTable(alias, destTable, isOptional, true);
if (!added) {
log.info("Queried tables do not exist. Missing tables:{}", destTable);
throw new LensException(LensCubeErrorCode.NEITHER_CUBE_NOR_DIMENSION.getLensErrorInfo());
}
log.info("Added join chain for {}", destTable);
return true;
}
return false;
}
boolean addQueriedTable(String alias) throws LensException {
return addQueriedTable(alias, false);
}
private boolean addQueriedTable(String alias, boolean isOptional) throws LensException {
String tblName = qb.getTabNameForAlias(alias);
if (tblName == null) {
tblName = alias;
}
boolean added = addQueriedTable(alias, tblName, isOptional, false);
if (!added) {
// try adding as joinchain
added = addJoinChain(alias, isOptional);
}
return added;
}
/**
* destination table : a table whose columns are getting queried intermediate table : a table which is only used as a
* link between cube and destination table
*
* @param alias
* @param tblName
* @param isOptional pass false when it's a destination table pass true when it's an intermediate table when
* join chain destination is being added, this will be false.
* @param isChainedDimension pass true when you're adding the dimension as a joinchain destination, pass false when
* this table is mentioned by name in the user query
* @return true if added
* @throws LensException
*/
private boolean addQueriedTable(String alias, String tblName, boolean isOptional, boolean isChainedDimension)
throws LensException {
alias = alias.toLowerCase();
if (cubeTbls.containsKey(alias)) {
return true;
}
try {
if (metastoreClient.isCube(tblName)) {
if (cube != null) {
if (!cube.getName().equalsIgnoreCase(tblName)) {
throw new LensException(LensCubeErrorCode.MORE_THAN_ONE_CUBE.getLensErrorInfo(), cube.getName(), tblName);
}
}
cube = metastoreClient.getCube(tblName);
cubeTbls.put(alias, (AbstractCubeTable) cube);
} else if (metastoreClient.isDimension(tblName)) {
Dimension dim = metastoreClient.getDimension(tblName);
if (!isOptional) {
dimensions.add(dim);
}
if (!isChainedDimension) {
nonChainedDimensions.add(dim);
}
cubeTbls.put(alias, dim);
} else {
return false;
}
} catch (LensException e) {
return false;
}
return true;
}
boolean isAutoJoinResolved() {
return autoJoinCtx != null && autoJoinCtx.isJoinsResolved();
}
Cube getBaseCube() {
return cube instanceof Cube ? (Cube) cube : ((DerivedCube) cube).getParent();
}
Set<String> getPartitionColumnsQueried() {
return getTimeRanges().stream().map(TimeRange::getPartitionColumn).collect(toSet());
}
// map of ref column in query to set of Dimension that have the column - which are added as optional dims
@Getter
private Map<String, Set<Aliased<Dimension>>> refColToDim = Maps.newHashMap();
private void updateRefColDim(String col, Aliased<Dimension> dim) {
refColToDim.computeIfAbsent(col.toLowerCase(), k -> Sets.newHashSet()).add(dim);
}
@Data
@AllArgsConstructor
static class QueriedExprColumn {
private String exprCol;
private String alias;
}
// map of expression column in query to set of Dimension that are accessed in the expression column - which are added
// as optional dims
@Getter
private Map<QueriedExprColumn, Set<Aliased<Dimension>>> exprColToDim = Maps.newHashMap();
private void updateExprColDim(String tblAlias, String col, Aliased<Dimension> dim) {
exprColToDim.computeIfAbsent(new QueriedExprColumn(col, tblAlias), k -> Sets.newHashSet()).add(dim);
}
// Holds the context of optional dimension
// A dimension is optional if it is not queried directly by the user, but is
// required by a candidate table to get a denormalized field from reference
// or required in a join chain
@ToString
static class OptionalDimCtx {
OptionalDimCtx() {
}
Set<String> colQueried = new HashSet<String>();
Set<CandidateTable> requiredForCandidates = new HashSet<CandidateTable>();
boolean isRequiredInJoinChain = false;
}
void addOptionalJoinDimTable(String alias, boolean isRequired) throws LensException {
addOptionalDimTable(alias, null, isRequired, null, false, (String[]) null);
}
void addOptionalExprDimTable(String dimAlias, String queriedExpr, String srcTableAlias,
CandidateTable candidate, String... cols) throws LensException {
addOptionalDimTable(dimAlias, candidate, false, queriedExpr, false, srcTableAlias, cols);
}
void addOptionalDimTable(String alias, CandidateTable candidate, boolean isRequiredInJoin, String cubeCol,
boolean isRef, String... cols) throws LensException {
addOptionalDimTable(alias, candidate, isRequiredInJoin, cubeCol, isRef, null, cols);
}
private void addOptionalDimTable(String alias, CandidateTable candidate, boolean isRequiredInJoin, String cubeCol,
boolean isRef, String tableAlias, String... cols) throws LensException {
alias = alias.toLowerCase();
if (!addQueriedTable(alias, true)) {
throw new LensException(LensCubeErrorCode.QUERIED_TABLE_NOT_FOUND.getLensErrorInfo(), alias);
}
Dimension dim = (Dimension) cubeTbls.get(alias);
Aliased<Dimension> aliasedDim = Aliased.create(dim, alias);
OptionalDimCtx optDim = optionalDimensionMap.computeIfAbsent(aliasedDim, k -> new OptionalDimCtx());
if (cols != null && candidate != null) {
optDim.colQueried.addAll(Arrays.asList(cols));
optDim.requiredForCandidates.add(candidate);
}
if (cubeCol != null) {
if (isRef) {
updateRefColDim(cubeCol, aliasedDim);
} else {
updateExprColDim(tableAlias, cubeCol, aliasedDim);
}
}
if (!optDim.isRequiredInJoinChain) {
optDim.isRequiredInJoinChain = isRequiredInJoin;
}
if (log.isDebugEnabled()) {
log.debug("Adding optional dimension:{} optDim:{} {} isRef:{}", aliasedDim, optDim,
(cubeCol == null ? "" : " for column:" + cubeCol), isRef);
}
}
public AbstractCubeTable getCubeTableForAlias(String alias) {
return cubeTbls.get(alias);
}
private String getClause() {
if (clauseName == null) {
TreeSet<String> ks = new TreeSet<String>(qb.getParseInfo().getClauseNames());
clauseName = ks.first();
}
return clauseName;
}
public Map<Dimension, Set<CandidateDim>> getCandidateDimTables() {
return candidateDims;
}
void addCandidatePruningMsg(Collection<Candidate> candidateCollection, CandidateTablePruneCause pruneCause) {
for (Candidate c : candidateCollection){
addCandidatePruningMsg(c, pruneCause);
}
}
void addCandidatePruningMsg(Candidate cand, CandidateTablePruneCause pruneCause) {
if (cand instanceof SegmentationCandidate) {
addStoragePruningMsg(cand, pruneCause);
} else {
Set<StorageCandidate> scs = CandidateUtil.getStorageCandidates(cand);
for (StorageCandidate sc : scs) {
addStoragePruningMsg(sc, pruneCause);
}
}
}
void addStoragePruningMsg(Candidate sc, CandidateTablePruneCause... factPruningMsgs) {
for (CandidateTablePruneCause factPruningMsg: factPruningMsgs) {
log.info("Pruning Storage {} with cause: {}", sc, factPruningMsg);
storagePruningMsgs.addPruningMsg(sc, factPruningMsg);
}
}
public void addDimPruningMsgs(Dimension dim, CubeDimensionTable dimtable, CandidateTablePruneCause msg) {
dimPruningMsgs.computeIfAbsent(dim, k -> new PruneCauses<>()).addPruningMsg(dimtable, msg);
}
public String getAliasForTableName(Named named) {
return getAliasForTableName(named.getName());
}
public String getAliasForTableName(String tableName) {
for (String alias : qb.getTabAliases()) {
String table = qb.getTabNameForAlias(alias);
if (table != null && table.equalsIgnoreCase(tableName)) {
return alias;
}
}
// get alias from cubeTbls
for (Map.Entry<String, AbstractCubeTable> cubeTblEntry : cubeTbls.entrySet()) {
if (cubeTblEntry.getValue().getName().equalsIgnoreCase(tableName)) {
return cubeTblEntry.getKey();
}
}
return tableName.toLowerCase();
}
public void print() {
if (!log.isDebugEnabled()) {
return;
}
StringBuilder builder = new StringBuilder()
.append("ASTNode:").append(ast.dump()).append("\n")
.append("QB:")
.append("\n numJoins:").append(qb.getNumJoins())
.append("\n numGbys:").append(qb.getNumGbys())
.append("\n numSels:").append(qb.getNumSels())
.append("\n numSelDis:").append(qb.getNumSelDi())
.append("\n aliasToTabs:");
Set<String> tabAliases = qb.getTabAliases();
for (String alias : tabAliases) {
builder.append("\n\t").append(alias).append(":").append(qb.getTabNameForAlias(alias));
}
builder.append("\n aliases:");
for (String alias : qb.getAliases()) {
builder.append(alias);
builder.append(", ");
}
builder
.append("id:").append(qb.getId())
.append("isQuery:").append(qb.getIsQuery())
.append("\n QBParseInfo");
QBParseInfo parseInfo = qb.getParseInfo();
builder
.append("\n isSubQ: ").append(parseInfo.getIsSubQ())
.append("\n alias: ").append(parseInfo.getAlias());
if (parseInfo.getJoinExpr() != null) {
builder.append("\n joinExpr: ").append(parseInfo.getJoinExpr().dump());
}
builder.append("\n hints: ").append(parseInfo.getHints());
builder.append("\n aliasToSrc: ");
for (String alias : tabAliases) {
builder.append("\n\t").append(alias).append(": ").append(parseInfo.getSrcForAlias(alias).dump());
}
TreeSet<String> clauses = new TreeSet<String>(parseInfo.getClauseNames());
for (String clause : clauses) {
builder.append("\n\t").append(clause).append(": ").append(parseInfo.getClauseNamesForDest());
}
String clause = clauses.first();
if (parseInfo.getWhrForClause(clause) != null) {
builder.append("\n whereexpr: ").append(parseInfo.getWhrForClause(clause).dump());
}
if (parseInfo.getGroupByForClause(clause) != null) {
builder.append("\n groupby expr: ").append(parseInfo.getGroupByForClause(clause).dump());
}
if (parseInfo.getSelForClause(clause) != null) {
builder.append("\n sel expr: ").append(parseInfo.getSelForClause(clause).dump());
}
if (parseInfo.getHavingForClause(clause) != null) {
builder.append("\n having expr: ").append(parseInfo.getHavingForClause(clause).dump());
}
if (parseInfo.getDestLimit(clause) != null) {
builder.append("\n limit: ").append(parseInfo.getDestLimit(clause));
}
if (parseInfo.getAllExprToColumnAlias() != null && !parseInfo.getAllExprToColumnAlias().isEmpty()) {
builder.append("\n exprToColumnAlias:");
for (Map.Entry<ASTNode, String> entry : parseInfo.getAllExprToColumnAlias().entrySet()) {
builder.append("\n\t expr: ").append(entry.getKey().dump()).append(" ColumnAlias: ").append(entry.getValue());
}
}
if (parseInfo.getAggregationExprsForClause(clause) != null) {
builder.append("\n aggregateexprs:");
for (Map.Entry<String, ASTNode> entry : parseInfo.getAggregationExprsForClause(clause).entrySet()) {
builder.append("\n\t key: ").append(entry.getKey()).append(" expr: ").append(entry.getValue().dump());
}
}
if (parseInfo.getDistinctFuncExprsForClause(clause) != null) {
builder.append("\n distinctFuncExprs:");
for (ASTNode entry : parseInfo.getDistinctFuncExprsForClause(clause)) {
builder.append("\n\t expr: ").append(entry.dump());
}
}
if (qb.getQbJoinTree() != null) {
builder.append("\n\n JoinTree");
QBJoinTree joinTree = qb.getQbJoinTree();
printJoinTree(joinTree, builder);
}
if (qb.getParseInfo().getDestForClause(clause) != null) {
builder.append("\n Destination:")
.append("\n\t dest expr:").append(qb.getParseInfo().getDestForClause(clause).dump());
}
log.debug(builder.toString());
}
private void printJoinTree(QBJoinTree joinTree, StringBuilder builder) {
builder.append("leftAlias:").append(joinTree.getLeftAlias());
if (joinTree.getLeftAliases() != null) {
builder.append("\n leftAliases:");
for (String alias : joinTree.getLeftAliases()) {
builder.append("\n\t ").append(alias);
}
}
if (joinTree.getRightAliases() != null) {
builder.append("\n rightAliases:");
for (String alias : joinTree.getRightAliases()) {
builder.append("\n\t ").append(alias);
}
}
if (joinTree.getJoinSrc() != null) {
builder.append("\n JoinSrc: {");
printJoinTree(joinTree.getJoinSrc(), builder);
builder.append("\n }");
}
if (joinTree.getBaseSrc() != null) {
builder.append("\n baseSrcs:");
for (String src : joinTree.getBaseSrc()) {
builder.append("\n\t ").append(src);
}
}
builder.append("\n noOuterJoin: ").append(joinTree.getNoOuterJoin());
builder.append("\n noSemiJoin: ").append(joinTree.getNoSemiJoin());
builder.append("\n mapSideJoin: ").append(joinTree.isMapSideJoin());
if (joinTree.getJoinCond() != null) {
builder.append("\n joinConds:");
for (JoinCond cond : joinTree.getJoinCond()) {
builder.append("\n\t left: ").append(cond.getLeft())
.append(" right: ").append(cond.getRight())
.append(" type:").append(cond.getJoinType())
.append(" preserved:").append(cond.getPreserved());
}
}
}
public String getSelectString() {
return HQLParser.getString(selectAST);
}
public void setWhereString(String whereString) {
//NO OP
}
public String getWhereString() {
if (whereAST != null) {
return HQLParser.getString(whereAST);
}
return null;
}
public String getGroupByString() {
if (groupByAST != null) {
return HQLParser.getString(groupByAST);
}
return null;
}
public String getHavingString() {
if (havingAST != null) {
return HQLParser.getString(havingAST);
}
return null;
}
public ASTNode getJoinAST() {
return qb.getParseInfo().getJoinExpr();
}
@Override
public void setJoinAST(ASTNode node) {
//NO-OP
}
public String getOrderByString() {
if (orderByAST != null) {
return HQLParser.getString(orderByAST);
}
return null;
}
public Integer getLimitValue() {
return qb.getParseInfo().getDestLimit(getClause());
}
public void setLimitValue(Integer value) {
qb.getParseInfo().setDestLimit(getClause(), 0, value);
}
private String getStorageStringWithAlias(StorageCandidate candidate, Map<Dimension,
CandidateDim> dimsToQuery, String alias) {
if (cubeTbls.get(alias) instanceof CubeInterface) {
return candidate.getAliasForTable(alias);
} else {
return dimsToQuery.get(cubeTbls.get(alias)).getStorageString(alias);
}
}
private String getWhereClauseWithAlias(Map<Dimension, CandidateDim> dimsToQuery, String alias) {
return StorageUtil.getWhereClause(dimsToQuery.get(cubeTbls.get(alias)), alias);
}
String getQBFromString(StorageCandidate candidate, Map<Dimension, CandidateDim> dimsToQuery) throws LensException {
String fromString;
if (getJoinAST() == null) {
if (candidate != null) {
if (dimensions.size() > 0) {
throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo());
}
fromString = candidate.getAliasForTable(getAliasForTableName(cube.getName()));
} else {
if (dimensions.size() != 1) {
throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo());
}
Dimension dim = dimensions.iterator().next();
fromString = dimsToQuery.get(dim).getStorageString(getAliasForTableName(dim.getName()));
}
} else {
StringBuilder builder = new StringBuilder();
getQLString(qb.getQbJoinTree(), builder, candidate, dimsToQuery);
fromString = builder.toString();
}
return fromString;
}
private void getQLString(QBJoinTree joinTree, StringBuilder builder, StorageCandidate candidate,
Map<Dimension, CandidateDim> dimsToQuery) throws LensException {
List<String> joiningTables = new ArrayList<>();
if (joinTree.getBaseSrc()[0] == null) {
if (joinTree.getJoinSrc() != null) {
getQLString(joinTree.getJoinSrc(), builder, candidate, dimsToQuery);
}
} else { // (joinTree.getBaseSrc()[0] != null){
String alias = joinTree.getBaseSrc()[0].toLowerCase();
builder.append(getStorageStringWithAlias(candidate, dimsToQuery, alias));
joiningTables.add(alias);
}
if (joinTree.getJoinCond() != null) {
builder.append(JoinUtils.getJoinTypeStr(joinTree.getJoinCond()[0].getJoinType()));
builder.append(" JOIN ");
}
if (joinTree.getBaseSrc()[1] == null) {
if (joinTree.getJoinSrc() != null) {
getQLString(joinTree.getJoinSrc(), builder, candidate, dimsToQuery);
}
} else { // (joinTree.getBaseSrc()[1] != null){
String alias = joinTree.getBaseSrc()[1].toLowerCase();
builder.append(getStorageStringWithAlias(candidate, dimsToQuery, alias));
joiningTables.add(alias);
}
String joinCond = joinConds.get(joinTree);
if (joinCond != null) {
builder.append(" ON ");
builder.append(joinCond);
// joining tables will contain all tables involved in joins.
// we need to push storage filters for Dimensions into join conditions, thus the following code
// takes care of the same.
for (String joiningTable : joiningTables) {
if (cubeTbls.get(joiningTable) instanceof Dimension) {
DimOnlyHQLContext.appendWhereClause(builder, getWhereClauseWithAlias(dimsToQuery, joiningTable), true);
dimsToQuery.get(cubeTbls.get(joiningTable)).setWhereClauseAdded(joiningTable);
}
}
} else {
throw new LensException(LensCubeErrorCode.NO_JOIN_CONDITION_AVAILABLE.getLensErrorInfo());
}
}
void setNonexistingParts(Map<String, Set<String>> nonExistingParts) throws LensException {
if (!nonExistingParts.isEmpty()) {
ObjectMapper mapper = new ObjectMapper();
try (ByteArrayOutputStream out = new ByteArrayOutputStream();){
mapper.writeValue(out, nonExistingParts);
conf.set(NON_EXISTING_PARTITIONS, out.toString("UTF-8"));
} catch (Exception e) {
throw new LensException("Error writing non existing parts", e);
}
} else {
conf.unset(NON_EXISTING_PARTITIONS);
}
}
String getNonExistingParts() {
return conf.get(NON_EXISTING_PARTITIONS);
}
Map<Dimension, CandidateDim> pickCandidateDimsToQuery(Set<Dimension> dimensions) throws LensException {
Map<Dimension, CandidateDim> dimsToQuery = new HashMap<Dimension, CandidateDim>();
if (!dimensions.isEmpty()) {
for (Dimension dim : dimensions) {
if (candidateDims.get(dim) != null && candidateDims.get(dim).size() > 0) {
CandidateDim cdim = candidateDims.get(dim).iterator().next();
log.info("Available candidate dims are:{}, picking up {} for querying.", candidateDims.get(dim),
cdim.dimtable);
dimsToQuery.put(dim, cdim);
} else {
if (dimPruningMsgs.get(dim) != null && !dimPruningMsgs.get(dim).isEmpty()) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(out, dimPruningMsgs.get(dim).getJsonObject());
log.info("No candidate dim found because: {}", out.toString("UTF-8"));
} catch (Exception e) {
throw new LensException("Error writing dim pruning messages", e);
}
}
log.error("Query rewrite failed due to NO_CANDIDATE_DIM_AVAILABLE, Cause {}",
dimPruningMsgs.get(dim).toJsonObject());
throw new NoCandidateDimAvailableException(dimPruningMsgs.get(dim));
}
}
}
return dimsToQuery;
}
Candidate pickCandidateToQuery() throws LensException {
Candidate cand;
if (hasCubeInQuery()) {
Iterator<Candidate> iter = candidates.iterator();
if (pickedCandidate == null && iter.hasNext()) {
cand = iter.next();
log.info("Available Candidates:{}, picking up Candidate: {} for querying", candidates, cand);
pickedCandidate = cand;
}
if (pickedCandidate == null) {
throwNoCandidateFactException();
}
}
return pickedCandidate;
}
void throwNoCandidateFactException() throws LensException {
log.error("Query rewrite failed due to NO_CANDIDATE_FACT_AVAILABLE, Cause {}", storagePruningMsgs.toJsonString());
throw new NoCandidateFactAvailableException(this);
}
@Getter
private QueryWriterContext queryWriterContext;
private QueryWriter queryWriter;
@Getter
private Candidate pickedCandidate;
@Getter
private Collection<CandidateDim> pickedDimTables;
void addRangeClauses(StorageCandidateHQLContext sc) throws LensException {
if (sc != null) {
// resolve timerange positions and replace it by corresponding where clause
for (TimeRange range : getTimeRanges()) {
String rangeWhere = sc.getStorageCandidate().getTimeRangeWhereClasue(rangeWriter, range);
if (!StringUtils.isBlank(rangeWhere)) {
ASTNode updatedRangeAST = HQLParser.parseExpr(rangeWhere, conf);
updateTimeRangeNode(sc.getQueryAst().getWhereAST(), range.getAstNode(), updatedRangeAST);
}
}
}
}
/**
* Find the appropriate time range node in the AST and update it with "updatedTimeRange".
* Time Range node looks like this
* time_range_in(dt, '2017', '2018') ->
* TOK_FUNCTION [TOK_FUNCTION] (l5c2p37) {
* time_range_in [Identifier] (l6c1p37)$
* TOK_TABLE_OR_COL [TOK_TABLE_OR_COL] (l6c2p51) {
* dt [Identifier] (l7c1p51)$
* }
* '2017' [StringLiteral] (l6c3p55)$
* '2018' [StringLiteral] (l6c4p63)$
}
* @param root
* @param timeRangeFuncNode
* @param updatedTimeRange
*/
private void updateTimeRangeNode(ASTNode root, ASTNode timeRangeFuncNode, ASTNode updatedTimeRange) {
ASTNode childNode;
if (root.getChildCount() == 0) {
return;
}
for (Node child : root.getChildren()) {
childNode = (ASTNode) child;
if (childNode.getType() == timeRangeFuncNode.getType()
&& childNode.getChildCount() == timeRangeFuncNode.getChildCount()
&& childNode.getChild(0).getText().equalsIgnoreCase(timeRangeFuncNode.getChild(0).getText())) {
//Found the "time_range_in" function node. Check the details further as there can be more than one time ranges
if (HQLParser.getString(timeRangeFuncNode).equalsIgnoreCase(HQLParser.getString(childNode))) {
//This is the correct time range node . Replace it with "updatedTimeRange"
childNode.getParent().setChild(childNode.getChildIndex(), updatedTimeRange);
return;
}
}
updateTimeRangeNode(childNode, timeRangeFuncNode, updatedTimeRange);
}
}
public QueryWriterContext getQueryWriterContext(Candidate cand, Map<Dimension, CandidateDim> dimsToQuery)
throws LensException {
if (cand == null) {
return new DimOnlyHQLContext(dimsToQuery, this);
} else {
return cand.toQueryWriterContext(dimsToQuery, this);
}
}
QueryWriter getQueryWriter() throws LensException {
if (queryWriter == null) {
Candidate cand = pickCandidateToQuery();
Map<Dimension, CandidateDim> dimsToQuery = pickCandidateDimsToQuery(dimensions);
log.info("Candidate: {}, DimsToQuery: {}", cand, dimsToQuery);
queryWriterContext = getQueryWriterContext(cand, dimsToQuery);
if (cand != null && autoJoinCtx != null) {
// prune join paths for picked fact and dimensions
autoJoinCtx.pruneAllPaths(cube, cand.getColumns(), dimsToQuery);
}
// pick dimension tables required during expression expansion for the picked fact and dimensions
queryWriterContext.addExpressionDims();
// pick denorm tables for the picked fact and dimensions
queryWriterContext.addDenormDims();
// Prune join paths once denorm tables are picked
if (cand != null && autoJoinCtx != null) {
// prune join paths for picked fact and dimensions
autoJoinCtx.pruneAllPaths(cube, cand.getColumns(), dimsToQuery);
}
queryWriterContext.addAutoJoinDims();
pickedDimTables = dimsToQuery.values();
//Set From string and time range clause
queryWriterContext.updateFromString();
//update dim filter with fact filter
queryWriterContext.updateDimFilterWithFactFilter();
queryWriter = queryWriterContext.toQueryWriter();
}
return queryWriter;
}
private String asHQL = null;
public String toHQL() throws LensException {
if (asHQL == null) {
asHQL = getQueryWriter().toHQL();
}
return asHQL;
}
public ASTNode toAST(Context ctx) throws LensException {
String hql = toHQL();
ParseDriver pd = new ParseDriver();
ASTNode tree;
try {
log.info("HQL:{}", hql);
tree = pd.parse(hql, ctx);
} catch (ParseException e) {
throw new LensException(e);
}
return ParseUtils.findRootNonNullToken(tree);
}
Set<String> getColumnsQueriedForTable(String tblName) {
return getColumnsQueried(getAliasForTableName(tblName));
}
void addColumnsQueriedWithTimeDimCheck(QueriedPhraseContext qur, String alias, String timeDimColumn) {
if (!shouldReplaceTimeDimWithPart()) {
qur.addColumnsQueried(alias, timeDimColumn);
}
}
boolean isCubeMeasure(String col) {
if (col == null) {
return false;
}
col = col.trim();
// Take care of brackets added around col names in HQLParsrer.getString
if (col.startsWith("(") && col.endsWith(")") && col.length() > 2) {
col = col.substring(1, col.length() - 1);
}
String[] split = StringUtils.split(col, ".");
if (split.length <= 1) {
col = col.trim().toLowerCase();
if (queriedExprs.contains(col)) {
return exprCtx.getExpressionContext(col, getAliasForTableName(cube.getName())).hasMeasures();
} else {
return cube.getMeasureNames().contains(col);
}
} else {
String cubeName = split[0].trim().toLowerCase();
String colName = split[1].trim().toLowerCase();
if (cubeName.equalsIgnoreCase(cube.getName()) || cubeName.equals(getAliasForTableName(cube.getName()))) {
if (queriedExprs.contains(colName)) {
return exprCtx.getExpressionContext(colName, cubeName).hasMeasures();
} else {
return cube.getMeasureNames().contains(colName.toLowerCase());
}
} else {
return false;
}
}
}
boolean isCubeMeasure(ASTNode node) {
String tabname = null;
String colname;
int nodeType = node.getToken().getType();
if (!(nodeType == HiveParser.TOK_TABLE_OR_COL || nodeType == HiveParser.DOT)) {
return false;
}
if (nodeType == HiveParser.TOK_TABLE_OR_COL) {
colname = ((ASTNode) node.getChild(0)).getText();
} else {
// node in 'alias.column' format
ASTNode tabident = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier);
ASTNode colIdent = (ASTNode) node.getChild(1);
colname = colIdent.getText();
assert tabident != null;
tabname = tabident.getText();
}
String msrname = StringUtils.isBlank(tabname) ? colname : tabname + "." + colname;
return isCubeMeasure(msrname);
}
boolean hasAggregates() {
if (getExprCtx().hasAggregates()) {
return true;
}
for (QueriedPhraseContext qur : queriedPhrases) {
if (qur.isAggregate()) {
return true;
}
}
return false;
}
void setJoinCond(QBJoinTree qb, String cond) {
joinConds.put(qb, cond);
}
public AbstractCubeTable getQueriedTable(String alias) {
if (cube != null && cube.getName().equalsIgnoreCase(qb.getTabNameForAlias((alias)))) {
return (AbstractCubeTable) cube;
}
for (Dimension dim : dimensions) {
if (dim.getName().equalsIgnoreCase(qb.getTabNameForAlias(alias))) {
return dim;
}
}
return null;
}
String getInsertClause() {
ASTNode destTree = qb.getParseInfo().getDestForClause(clauseName);
if (destTree != null && ((ASTNode) (destTree.getChild(0))).getToken().getType() != TOK_TMP_FILE) {
return "INSERT OVERWRITE " + HQLParser.getString(destTree) + " ";
}
return "";
}
Set<Aliased<Dimension>> getOptionalDimensions() {
return optionalDimensionMap.keySet();
}
public boolean shouldReplaceTimeDimWithPart() {
return getConf().getBoolean(REPLACE_TIMEDIM_WITH_PART_COL, DEFAULT_REPLACE_TIMEDIM_WITH_PART_COL);
}
boolean shouldReplaceDimFilterWithFactFilter() {
return getConf().getBoolean(REWRITE_DIM_FILTER_TO_FACT_FILTER, DEFAULT_REWRITE_DIM_FILTER_TO_FACT_FILTER);
}
String getPartitionColumnOfTimeDim(String timeDimName) {
return getPartitionColumnOfTimeDim(cube, timeDimName);
}
private static String getPartitionColumnOfTimeDim(CubeInterface cube, String timeDimName) {
if (cube == null) {
return timeDimName;
}
if (cube instanceof DerivedCube) {
return ((DerivedCube) cube).getParent().getPartitionColumnOfTimeDim(timeDimName);
} else {
return ((Cube) cube).getPartitionColumnOfTimeDim(timeDimName);
}
}
String getTimeDimOfPartitionColumn(String partCol) {
return getTimeDimOfPartitionColumn(cube, partCol);
}
private static String getTimeDimOfPartitionColumn(CubeInterface cube, String partCol) {
if (cube == null) {
return partCol;
}
if (cube instanceof DerivedCube) {
return ((DerivedCube) cube).getParent().getTimeDimOfPartitionColumn(partCol);
} else {
return ((Cube) cube).getTimeDimOfPartitionColumn(partCol);
}
}
void addQueriedMsrs(Set<String> msrs) {
queriedMsrs.addAll(msrs);
}
void addQueriedExprs(Set<String> exprs) {
queriedExprs.addAll(exprs);
}
void addQueriedExprsWithMeasures(Set<String> exprs) {
queriedExprsWithMeasures.addAll(exprs);
}
void addQueriedTimeDimensionCols(final String timeDimColName) {
checkArgument(StringUtils.isNotBlank(timeDimColName));
this.queriedTimeDimCols.add(timeDimColName);
}
ImmutableSet<String> getQueriedTimeDimCols() {
return ImmutableSet.copyOf(this.queriedTimeDimCols);
}
String getWhere(StorageCandidateHQLContext sc, AutoJoinContext autoJoinCtx,
ASTNode node, String cubeAlias,
boolean shouldReplaceDimFilter, String storageTable,
Map<Dimension, CandidateDim> dimToQuery) throws LensException {
String whereString;
if (autoJoinCtx != null && shouldReplaceDimFilter) {
List<String> allfilters = new ArrayList<>();
getAllFilters(node, cubeAlias, allfilters, autoJoinCtx.getJoinClause(sc.getStorageCandidate()), dimToQuery);
whereString = StringUtils.join(allfilters, " and ");
} else {
whereString = HQLParser.getString(sc.getQueryAst().getWhereAST());
}
return whereString;
}
private void getAllFilters(ASTNode node, String cubeAlias, List<String> allFilters,
JoinClause joinClause, Map<Dimension, CandidateDim> dimToQuery)
throws LensException {
if (node.getToken().getType() == HiveParser.KW_AND) {
// left child is and
if (node.getChild(0).getType() == HiveParser.KW_AND) {
// take right corresponding to right
String table = getTableFromFilterAST((ASTNode) node.getChild(1));
allFilters.add(getFilter(table, cubeAlias, node, joinClause, 1, dimToQuery));
} else if (node.getChildCount() > 1) {
for (int i = 0; i < node.getChildCount(); i++) {
String table = getTableFromFilterAST((ASTNode) node.getChild(i));
allFilters.add(getFilter(table, cubeAlias, node, joinClause, i, dimToQuery));
}
}
} else if (node.getParent() == null
&& node.getToken().getType() != HiveParser.KW_AND) {
// if node is the only child
allFilters.add(HQLParser.getString((ASTNode) node));
}
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
getAllFilters(child, cubeAlias, allFilters, joinClause, dimToQuery);
}
}
private String getFilter(String table, String cubeAlias, ASTNode node, JoinClause joinClause,
int index, Map<Dimension, CandidateDim> dimToQuery)
throws LensException{
String filter;
if (table != null && !table.equals(cubeAlias) && getStarJoin(joinClause, table) != null) {
//rewrite dim filter to fact filter if its a star join with fact
filter = buildFactSubqueryFromDimFilter(getStarJoin(joinClause, table),
(ASTNode) node.getChild(index), table, dimToQuery, cubeAlias);
} else {
filter = HQLParser.getString((ASTNode) node.getChild(index));
}
return filter;
}
private TableRelationship getStarJoin(JoinClause joinClause, String table) {
for (Map.Entry<TableRelationship, JoinTree> entry : joinClause.getJoinTree().getSubtrees().entrySet()) {
if (entry.getValue().getDepthFromRoot() == 1 && table.equals(entry.getValue().getAlias())) {
return entry.getKey();
}
}
return null;
}
private String getTableFromFilterAST(ASTNode node) {
if (node.getToken().getType() == HiveParser.DOT) {
ASTNode n = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier);
assert n != null;
return n.getText();
} else {
// recurse down
for (int i = 0; i < node.getChildCount(); i++) {
ASTNode child = (ASTNode) node.getChild(i);
String ret = getTableFromFilterAST(child);
if (ret != null) {
return ret;
}
}
}
return null;
}
private String buildFactSubqueryFromDimFilter(TableRelationship tabRelation, ASTNode dimFilter,
String dimAlias, Map<Dimension, CandidateDim> dimToQuery,
String cubeAlias)
throws LensException {
StringBuilder builder = new StringBuilder();
CandidateDim dim = dimToQuery.get(tabRelation.getToTable());
String storageClause = dim.getWhereClause();
builder.append(cubeAlias)
.append(".")
.append(tabRelation.getFromColumn())
.append(" in ( ")
.append("select ")
.append(tabRelation.getToColumn())
.append(" from ")
.append(dim.getStorageString(dimAlias))
.append(" where ")
.append(HQLParser.getString(dimFilter));
if (storageClause != null) {
builder.append(" and ")
.append(String.format(storageClause, dimAlias))
.append(" ) ");
} else {
builder.append(" ) ");
}
return builder.toString();
}
}