/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lens.cube.parse;
import static org.apache.hadoop.hive.ql.parse.HiveParser.*;
import java.util.*;
import org.apache.lens.cube.error.ColUnAvailableInTimeRange;
import org.apache.lens.cube.error.ColUnAvailableInTimeRangeException;
import org.apache.lens.cube.error.LensCubeErrorCode;
import org.apache.lens.cube.metadata.*;
import org.apache.lens.cube.metadata.join.JoinPath;
import org.apache.lens.cube.parse.join.AutoJoinContext;
import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.error.LensException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class TimeRangeChecker implements ContextRewriter {
public TimeRangeChecker(Configuration conf) {
}
@Override
public void rewriteContext(CubeQueryContext cubeql) throws LensException {
if (cubeql.getCube() == null) {
return;
}
doColLifeValidation(cubeql);
doFactRangeValidation(cubeql);
}
private void extractTimeRange(CubeQueryContext cubeql) throws LensException {
// get time range -
// Time range should be direct child of where condition
// TOK_WHERE.TOK_FUNCTION.Identifier Or, it should be right hand child of
// AND condition TOK_WHERE.KW_AND.TOK_FUNCTION.Identifier
if (cubeql.getWhereAST() == null || cubeql.getWhereAST().getChildCount() < 1) {
throw new LensException(LensCubeErrorCode.NO_TIMERANGE_FILTER.getLensErrorInfo());
}
searchTimeRanges(cubeql.getWhereAST(), cubeql, null, 0);
}
private void searchTimeRanges(ASTNode root, CubeQueryContext cubeql, ASTNode parent, int childIndex)
throws LensException {
if (root == null) {
return;
} else if (root.getToken().getType() == TOK_FUNCTION) {
ASTNode fname = HQLParser.findNodeByPath(root, Identifier);
if (fname != null && CubeQueryContext.TIME_RANGE_FUNC.equalsIgnoreCase(fname.getText())) {
processTimeRangeFunction(cubeql, root, parent, childIndex);
}
} else {
for (int i = 0; i < root.getChildCount(); i++) {
ASTNode child = (ASTNode) root.getChild(i);
searchTimeRanges(child, cubeql, root, i);
}
}
}
private String getColumnName(ASTNode node) {
String column = null;
if (node.getToken().getType() == DOT) {
ASTNode colIdent = (ASTNode) node.getChild(1);
column = colIdent.getText().toLowerCase();
} else if (node.getToken().getType() == TOK_TABLE_OR_COL) {
// Take child ident.totext
ASTNode ident = (ASTNode) node.getChild(0);
column = ident.getText().toLowerCase();
}
return column;
}
private void processTimeRangeFunction(CubeQueryContext cubeql, ASTNode timenode, ASTNode parent, int childIndex)
throws LensException {
TimeRange.TimeRangeBuilder builder = TimeRange.getBuilder();
builder.astNode(timenode);
builder.parent(parent);
builder.childIndex(childIndex);
String timeDimName = getColumnName((ASTNode) timenode.getChild(1));
if (!cubeql.getCube().getTimedDimensions().contains(timeDimName)) {
throw new LensException(LensCubeErrorCode.NOT_A_TIMED_DIMENSION.getLensErrorInfo(), timeDimName);
}
// Replace timeDimName with column which is used for partitioning. Assume
// the same column
// is used as a partition column in all storages of the fact
timeDimName = cubeql.getPartitionColumnOfTimeDim(timeDimName);
builder.partitionColumn(timeDimName);
String fromDateRaw = PlanUtils.stripQuotes(timenode.getChild(2).getText());
String toDateRaw = null;
if (timenode.getChildCount() > 3) {
ASTNode toDateNode = (ASTNode) timenode.getChild(3);
if (toDateNode != null) {
toDateRaw = PlanUtils.stripQuotes(timenode.getChild(3).getText());
}
}
long currentTime = cubeql.getConf().getLong(LensConfConstants.QUERY_CURRENT_TIME_IN_MILLIS, 0);
Date now;
if (currentTime != 0) {
now = new Date(currentTime);
} else {
now = new Date();
}
builder.fromDate(DateUtil.resolveDate(fromDateRaw, now));
if (StringUtils.isNotBlank(toDateRaw)) {
builder.toDate(DateUtil.resolveDate(toDateRaw, now));
} else {
builder.toDate(now);
}
TimeRange range = builder.build();
range.validate();
cubeql.getTimeRanges().add(range);
}
private void doColLifeValidation(CubeQueryContext cubeql) throws LensException,
ColUnAvailableInTimeRangeException {
Set<String> cubeColumns = cubeql.getColumnsQueriedForTable(cubeql.getCube().getName());
if (cubeColumns == null || cubeColumns.isEmpty()) {
// Query doesn't have any columns from cube
return;
}
for (String col : cubeql.getColumnsQueriedForTable(cubeql.getCube().getName())) {
CubeColumn column = cubeql.getCube().getColumnByName(col);
for (TimeRange range : cubeql.getTimeRanges()) {
if (column == null) {
if (!cubeql.getCube().getTimedDimensions().contains(col)) {
throw new LensException(LensCubeErrorCode.NOT_A_CUBE_COLUMN.getLensErrorInfo(), col);
}
continue;
}
if (!column.isColumnAvailableInTimeRange(range)) {
throwException(column);
}
}
}
// Remove join paths that have columns with invalid life span
AutoJoinContext joinContext = cubeql.getAutoJoinCtx();
if (joinContext == null) {
return;
}
// Get cube columns which are part of join chain
Set<String> joinColumns = joinContext.getAllJoinPathColumnsOfTable((AbstractCubeTable) cubeql.getCube());
if (joinColumns == null || joinColumns.isEmpty()) {
return;
}
// Loop over all cube columns part of join paths
for (String col : joinColumns) {
CubeColumn column = cubeql.getCube().getColumnByName(col);
for (TimeRange range : cubeql.getTimeRanges()) {
if (!column.isColumnAvailableInTimeRange(range)) {
log.info("Timerange queried is not in column life for {}, Removing join paths containing the column", column);
// Remove join paths containing this column
Map<Aliased<Dimension>, List<JoinPath>> allPaths = joinContext.getAllPaths();
for (Aliased<Dimension> dimension : allPaths.keySet()) {
List<JoinPath> joinPaths = allPaths.get(dimension);
Iterator<JoinPath> joinPathIterator = joinPaths.iterator();
while (joinPathIterator.hasNext()) {
JoinPath path = joinPathIterator.next();
if (path.containsColumnOfTable(col, (AbstractCubeTable) cubeql.getCube())) {
log.info("Removing join path: {} as columns :{} is not available in the range", path, col);
joinPathIterator.remove();
if (joinPaths.isEmpty()) {
// This dimension doesn't have any paths left
throw new LensException(LensCubeErrorCode.NO_JOIN_PATH.getLensErrorInfo(),
"No valid join path available for dimension " + dimension + " which would satisfy time range "
+ range.getFromDate() + "-" + range.getToDate());
}
}
} // End loop to remove path
} // End loop for all paths
}
} // End time range loop
} // End column loop
}
private void throwException(CubeColumn column) throws ColUnAvailableInTimeRangeException {
final Long availabilityStartTime = (column.getStartTimeMillisSinceEpoch().isPresent())
? column.getStartTimeMillisSinceEpoch().get() : null;
final Long availabilityEndTime = column.getEndTimeMillisSinceEpoch().isPresent()
? column.getEndTimeMillisSinceEpoch().get() : null;
ColUnAvailableInTimeRange col = new ColUnAvailableInTimeRange(column.getName(), availabilityStartTime,
availabilityEndTime);
throw new ColUnAvailableInTimeRangeException(col);
}
private void doFactRangeValidation(CubeQueryContext cubeql) {
Iterator<CandidateFact> iter = cubeql.getCandidateFacts().iterator();
while (iter.hasNext()) {
CandidateFact cfact = iter.next();
List<TimeRange> invalidTimeRanges = Lists.newArrayList();
for (TimeRange timeRange : cubeql.getTimeRanges()) {
if (!cfact.isValidForTimeRange(timeRange)) {
invalidTimeRanges.add(timeRange);
}
}
if (!invalidTimeRanges.isEmpty()){
cubeql.addFactPruningMsgs(cfact.fact, CandidateTablePruneCause.factNotAvailableInRange(invalidTimeRanges));
log.info("Not considering {} as it's not available for time ranges: {}", cfact, invalidTimeRanges);
iter.remove();
}
}
cubeql.pruneCandidateFactSet(CandidateTablePruneCause.CandidateTablePruneCode.FACT_NOT_AVAILABLE_IN_RANGE);
}
}