/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.imhotep.sql.parser;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.indeed.imhotep.sql.ast.*;
import com.indeed.imhotep.sql.ast2.*;
import com.indeed.imhotep.web.IQLParseException;
import com.indeed.imhotep.web.ImhotepMetadataCache;
import org.codehaus.jparsec.*;
import org.codehaus.jparsec.functors.Map;
import org.codehaus.jparsec.misc.Mapper;
import org.joda.time.DateTime;
import org.joda.time.Period;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.indeed.imhotep.sql.parser.TerminalParser.term;
/**
* @author vladimir
*/
public class StatementParser {
private static final Pattern selectPattern = Pattern.compile("(?i)\\s*(?:select|from) .*");
private static final Pattern showPattern = Pattern.compile("(?i)\\s*show\\s+(?:tables|datasets).*");
private static final Pattern describePattern = Pattern.compile("(?i)\\s*(?:describe|explain|desc)\\s+(\\w+)(?:(?:\\s+|\\.)(\\w+))?.*");
public static IQLStatement parse(String statement) {
return parse(statement, null);
}
public static IQLStatement parse(String statement, ImhotepMetadataCache metadata) {
if(selectPattern.matcher(statement).matches()) {
final QueryParts parts;
try {
parts = QuerySplitter.splitQuery(statement);
} catch (Exception e) {
throw new IQLParseException(e, "splitter");
}
return parseSelectStatement(parts, metadata);
} else if(showPattern.matcher(statement).matches()) {
return new ShowStatement();
} else {
Matcher matcher = describePattern.matcher(statement);
if(matcher.matches()) {
return new DescribeStatement(matcher.group(1), matcher.group(2));
}
}
return null;
}
public static SelectStatement parseSelectStatement(QueryParts parts, ImhotepMetadataCache metadata) {
final SelectClause select;
final FromClause from;
final WhereClause where;
final GroupByClause groupBy;
try {
from = parseFromClause(parts.from);
} catch (Exception e) {
throw new IQLParseException(e, "from");
}
final String dataset = from.getDataset();
final java.util.Map<String, String> aliases = metadata != null ? metadata.getDataset(dataset).getAliases() : Collections.<String, String>emptyMap();
try {
select = parseSelectClause(parts.select, aliases);
} catch (Exception e) {
throw new IQLParseException(e, "select");
}
try {
where = parseWhereClause(parts.where, aliases);
} catch (Exception e) {
throw new IQLParseException(e, "where");
}
try {
groupBy = parseGroupByClause(parts.groupBy, aliases);
} catch (Exception e) {
throw new IQLParseException(e, "groupBy");
}
int limit = parseLimit(parts.limit);
return new SelectStatement(select, from, where, groupBy, limit);
}
private static int parseLimit(String limit) {
try {
int limitInt = Integer.valueOf(limit);
if(limitInt > 0) {
return limitInt;
}
} catch (Exception ignored) { }
return Integer.MAX_VALUE;
}
static GroupByClause parseGroupByClause(String text) {
return parseGroupByClause(text, Collections.<String, String>emptyMap());
}
static GroupByClause parseGroupByClause(String text, java.util.Map<String, String> aliases) {
if(Strings.isNullOrEmpty(text)) {
return null;
}
text = Preprocessor.applyAliases(text, aliases);
Parser<Expression> expr = ExpressionParser.groupByExpression();
Parser<GroupByClause> groupByParser = Mapper.curry(GroupByClause.class).sequence(expr.sepBy1(term(",")));
return TerminalParser.parse(groupByParser, text);
}
public static WhereClause parseWhereClause(String text) {
return parseWhereClause(text, Collections.<String, String>emptyMap());
}
public static WhereClause parseWhereClause(String text, java.util.Map<String, String> aliases) {
if(Strings.isNullOrEmpty(text)) {
return null;
}
text = Preprocessor.applyAliases(text, aliases);
final Expression whereExpression = ExpressionParser.parseWhereExpression(text);
return new WhereClause(whereExpression);
}
public static FromClause parseFromClause(String text) {
Parser<String> tokenizer = Parsers.or(Terminals.StringLiteral.SINGLE_QUOTE_TOKENIZER,
Terminals.StringLiteral.DOUBLE_QUOTE_TOKENIZER, QuerySplitter.wordParser);
Parser<FromClause> fromParser = TerminalParser.STRING.atLeast(3).map(new Map<List<String>, FromClause>() {
@Override
public FromClause map(List<String> parts) {
String dataset = parts.get(0);
String start = "";
String end = "";
if(parts.size() == 3) {
start = parts.get(1);
end = parts.get(2);
} else if(parts.size() == 5) {
start = parts.get(1) + " " + parts.get(2);
end = parts.get(3) + " " + parts.get(4);
} else if(parts.size() == 4) {
if(parts.get(2).contains("-")) {
// start is 1 word date, end is 2 word date+time
start = parts.get(1);
end = parts.get(2) + " " + parts.get(3);
} else {
// start is 2 word date+time, end is 1 word date
start = parts.get(1) + " " + parts.get(2);
end = parts.get(3);
}
}
DateTime startTime;
DateTime endTime;
try {
startTime = new DateTime(start.replace(' ', 'T'));
} catch (IllegalArgumentException ignored) {
if(Strings.isNullOrEmpty(start)) {
startTime = DateTime.now().withTimeAtStartOfDay();
} else {
startTime = tryParseRelativeDate(start);
}
}
try {
endTime = new DateTime(end.replace(' ', 'T'));
} catch (IllegalArgumentException ignored) {
if(Strings.isNullOrEmpty(end)) {
endTime = DateTime.now().plusDays(1).withTimeAtStartOfDay();
} else {
endTime = tryParseRelativeDate(end);
}
}
// check if it's a unix timestamp
if(startTime == null) {
startTime = tryParseUnixTimestamp(start);
}
if(endTime == null) {
endTime = tryParseUnixTimestamp(end);
}
if(startTime == null) {
throw new IllegalArgumentException("Start date parsing failed: " + start);
}
if(endTime == null) {
throw new IllegalArgumentException("End date parsing failed: " + end);
}
if(!startTime.isBefore(endTime)) {
throw new IllegalArgumentException("Start date has to be before the end date. start: " + startTime + ", end: " + endTime);
}
if(startTime.isBefore(new DateTime(0, 1, 1, 0, 0))) {
throw new IllegalArgumentException("The start date appears to be too low. Check for a typo: " + startTime);
}
return new FromClause(dataset, startTime, endTime, start, end);
}
});
return fromParser.from(tokenizer, Scanners.SQL_DELIMITER).parse(text);
}
@Nullable
private static DateTime tryParseRelativeDate(String value) {
if(Strings.isNullOrEmpty(value)) {
return new DateTime().withTimeAtStartOfDay();
}
final String lowercasedValue = value.toLowerCase();
if("yesterday".startsWith(lowercasedValue)) {
return DateTime.now().minusDays(1).withTimeAtStartOfDay();
} else if ("today".startsWith(lowercasedValue)) {
return new DateTime().withTimeAtStartOfDay();
} else if("tomorrow".startsWith(lowercasedValue)) {
return new DateTime().plusDays(1).withTimeAtStartOfDay();
}
Period period = PeriodParser.parseString(value);
if(period == null) {
return null;
}
return DateTime.now().withTimeAtStartOfDay().minus(period);
}
@Nullable
private static DateTime tryParseUnixTimestamp(String value) {
try {
long timestamp = Long.valueOf(value);
if(timestamp < Integer.MAX_VALUE) {
timestamp *= 1000; // seconds to milliseconds
}
return new DateTime(timestamp);
} catch (NumberFormatException ignored) {
return null;
}
}
public static SelectClause parseSelectClause(String text) {
return parseSelectClause(text, Collections.<String, String>emptyMap());
}
public static SelectClause parseSelectClause(String text, java.util.Map<String, String> aliases) {
if(Strings.isNullOrEmpty(text)) {
return defaultSelect();
}
text = Preprocessor.applyAliases(text, aliases);
Parser<Expression> expr = ExpressionParser.expression();
Parser<List<Expression>> selectParser = expr.sepBy1(term(","));
List<Expression> result = TerminalParser.parse(selectParser, text);
if(result == null || result.size() == 0) {
return defaultSelect();
}
return new SelectClause(result);
}
private static SelectClause defaultSelect() {
// default to counts()
return new SelectClause(Lists.newArrayList((Expression)new FunctionExpression("count", Collections.<Expression>emptyList())));
}
}