/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.rakam.presto.analysis;
import com.facebook.presto.sql.RakamSqlFormatter;
import com.facebook.presto.sql.tree.DefaultExpressionTraversalVisitor;
import com.facebook.presto.sql.tree.Expression;
import com.facebook.presto.sql.tree.QualifiedNameReference;
import io.netty.handler.codec.http.HttpResponseStatus;
import org.rakam.analysis.CalculatedUserSet;
import org.rakam.analysis.ContinuousQueryService;
import org.rakam.analysis.AbstractFunnelQueryExecutor;
import org.rakam.analysis.MaterializedViewService;
import org.rakam.analysis.metadata.Metastore;
import org.rakam.config.ProjectConfig;
import org.rakam.plugin.user.UserPluginConfig;
import org.rakam.report.DelegateQueryExecution;
import org.rakam.report.PreComputedTableSubQueryVisitor;
import org.rakam.report.QueryExecution;
import org.rakam.report.QueryExecutor;
import org.rakam.report.QueryExecutorService;
import org.rakam.util.RakamException;
import org.rakam.util.ValidationUtil;
import javax.inject.Inject;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static com.facebook.presto.sql.RakamExpressionFormatter.formatIdentifier;
import static java.lang.String.format;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static org.rakam.presto.analysis.PrestoUserService.ANONYMOUS_ID_MAPPING;
import static org.rakam.util.ValidationUtil.checkCollection;
import static org.rakam.util.ValidationUtil.checkTableColumn;
public class PrestoFunnelQueryExecutor
extends AbstractFunnelQueryExecutor
{
private final QueryExecutorService executorService;
private static final String CONNECTOR_FIELD = "_user";
private final MaterializedViewService materializedViewService;
private final ContinuousQueryService continuousQueryService;
private final boolean userMappingEnabled;
@Inject
public PrestoFunnelQueryExecutor(
ProjectConfig projectConfig,
Metastore metastore,
QueryExecutorService executorService,
QueryExecutor executor,
MaterializedViewService materializedViewService,
ContinuousQueryService continuousQueryService,
UserPluginConfig userPluginConfig)
{
super(projectConfig, metastore, executor);
this.materializedViewService = materializedViewService;
this.continuousQueryService = continuousQueryService;
this.executorService = executorService;
this.userMappingEnabled = userPluginConfig.getEnableUserMapping();
}
@Override
public String getTemplate(List<FunnelStep> steps, Optional<String> dimension, Optional<FunnelWindow> window)
{
return "select %s get_funnel_step(steps) step, count(*) total from (\n" +
"select %s array_agg(step) as steps from (select * from (%s) order by "
+ checkTableColumn(projectConfig.getTimeColumn()) + ") t WHERE "
+ checkTableColumn(projectConfig.getTimeColumn()) + " between timestamp '%s' and timestamp '%s'\n" +
"group by %s %s\n" +
") t group by 1 %s order by 1";
}
@Override
public QueryExecution query(String project, List<FunnelStep> steps, Optional<String> dimension, LocalDate startDate, LocalDate endDate, Optional<FunnelWindow> window, ZoneId zoneId, Optional<List<String>> connectors)
{
if (dimension.isPresent() && CONNECTOR_FIELD.equals(dimension.get())) {
throw new RakamException("Dimension and connector field cannot be equal", HttpResponseStatus.BAD_REQUEST);
}
Set<CalculatedUserSet> calculatedUserSets = new HashSet<>();
String stepQueries = IntStream.range(0, steps.size())
.mapToObj(i -> convertFunnel(calculatedUserSets, project, CONNECTOR_FIELD, i, window, steps.get(i), dimension, startDate, endDate))
.collect(Collectors.joining(", "));
if (calculatedUserSets.size() == steps.size()) {
return super.query(project, steps, dimension, startDate, endDate, window, zoneId, connectors);
}
String query;
if (dimension.isPresent()) {
query = IntStream.range(0, steps.size())
.mapToObj(i -> format("(SELECT step, (CASE WHEN rank > 15 THEN 'Others' ELSE cast(dimension as varchar) END) as %s," +
" sum(count) FROM (select 'Step %d' as step, dimension, cardinality(merge_sets(%s_set)) count, row_number() OVER(ORDER BY 3 DESC) rank from " +
"step%s %s ORDER BY 4 ASC) GROUP BY 1, 2)",
dimension.get(), i + 1, CONNECTOR_FIELD, i, dimension.map(v -> "GROUP BY 2").orElse("")))
.collect(Collectors.joining(" UNION ALL ")) + " ORDER BY 1 ASC";
}
else {
query = IntStream.range(0, steps.size())
.mapToObj(i -> format("(SELECT 'Step %d' as step, coalesce(cardinality(merge_sets(%s_set)), 0) count FROM step%d)",
i + 1, CONNECTOR_FIELD, i))
.collect(Collectors.joining(" UNION ALL ")) + " ORDER BY 1 ASC";
}
return new DelegateQueryExecution(executorService.executeQuery(project, "WITH \n" + stepQueries + " " + query),
result -> {
result.setProperty("calculatedUserSets", calculatedUserSets);
return result;
});
}
private String convertFunnel(Set<CalculatedUserSet> calculatedUserSets, String project,
String connectorField,
int idx,
Optional<FunnelWindow> window,
FunnelStep funnelStep,
Optional<String> dimension,
LocalDate startDate, LocalDate endDate)
{
String timePredicate = format("BETWEEN timestamp '%s' and timestamp '%s' + interval '1' day",
startDate.format(ISO_LOCAL_DATE), endDate.format(ISO_LOCAL_DATE));
Optional<String> joinPreviousStep = idx == 0 ?
Optional.empty() :
Optional.of(format("JOIN step%d ON (step%d.date >= step%d.date %s %s)",
idx - 1, idx, idx - 1,
window.map(v -> format("AND step%d.date - interval '%d' %s < step%d.date", idx, v.value, v.type.name().toLowerCase(), idx - 1)).orElse(""),
dimension.map(value -> format("AND step%d.dimension = step%d.dimension", idx, idx - 1)).orElse("")));
Optional<String> preComputedTable = getPreComputedTable(calculatedUserSets, project, funnelStep.getCollection(), connectorField,
joinPreviousStep, timePredicate, dimension, funnelStep.getExpression(), idx);
if (preComputedTable.isPresent()) {
return format("step%s AS (%s)", idx, preComputedTable.get());
}
else {
Optional<String> filterExp = funnelStep.getExpression().map(value -> "AND " + RakamSqlFormatter.formatExpression(value,
name -> name.getParts().stream().map(e -> formatIdentifier(e, '"')).collect(Collectors.joining(".")),
name -> checkCollection(funnelStep.getCollection()) + "." + name.getParts().stream()
.map(e -> formatIdentifier(e, '"')).collect(Collectors.joining(".")), '"'));
String merged = format("step%d.date, %s intersection(merge_sets(step%d.\"%s_set\"), merge_sets(step%d.\"%s_set\")) as %s_set",
idx, dimension.map(v -> "step" + idx + ".dimension, ").orElse(""), idx, connectorField, idx - 1, connectorField, connectorField);
return format("step%d AS (select %s FROM (%s) step%d %s)",
idx, idx == 0 ? ("step" + idx + ".*") : merged,
format("SELECT cast(%s as date) as date, %s set(%s) as %s_set from %s where %s %s %s group by 1 %s",
checkTableColumn(projectConfig.getTimeColumn()),
dimension.map(value -> value + " as dimension,").orElse(""),
connectorField, connectorField,
checkCollection(funnelStep.getCollection()),
checkTableColumn(projectConfig.getTimeColumn()),
timePredicate, filterExp.orElse(""), dimension.map(value -> ", 2").orElse("")),
idx,
joinPreviousStep.map(v -> v + " GROUP BY 1" + dimension.map(val -> ", 2").orElse("")).orElse(""));
}
}
private Optional<String> getPreComputedTable(
Set<CalculatedUserSet> calculatedUserSets, String project,
String collection, String connectorField,
Optional<String> joinPart,
String timePredicate, Optional<String> dimension,
Optional<Expression> filterExpression, int stepIdx)
{
String tableNameForCollection = connectorField + "s_daily_" + collection;
if (filterExpression.isPresent()) {
try {
String query = new PreComputedTableSubQueryVisitor(columnName -> {
String tableRef = tableNameForCollection + "_by_" + columnName;
if (continuousQueryService.list(project).stream().anyMatch(e -> e.tableName.equals(tableRef))) {
return Optional.of("continuous." + checkCollection(tableRef));
}
else if (materializedViewService.list(project).stream().anyMatch(e -> e.tableName.equals(tableRef))) {
return Optional.of("materialized." + checkCollection(tableRef));
}
calculatedUserSets.add(new CalculatedUserSet(Optional.of(collection), Optional.of(columnName)));
return Optional.empty();
}).process(filterExpression.get(), false);
if (dimension.isPresent()) {
final boolean[] referenced = {false};
new DefaultExpressionTraversalVisitor<Void, Void>()
{
@Override
protected Void visitQualifiedNameReference(QualifiedNameReference node, Void context)
{
if (node.getName().toString().equals(dimension.get())) {
referenced[0] = true;
}
return null;
}
}.process(filterExpression.get(), null);
if (referenced[0]) {
return Optional.of(query);
}
String tableName = tableNameForCollection + dimension.map(value -> "_by_" + value).orElse("");
Optional<String> schema = getSchemaForPreCalculatedTable(project, connectorField, tableName, dimension);
if (!schema.isPresent()) {
calculatedUserSets.add(new CalculatedUserSet(Optional.of(collection), dimension));
return Optional.empty();
}
return Optional.of("SELECT data.date, data.dimension, data." + CONNECTOR_FIELD + "_set FROM " + schema.get() + "." + tableName + " data " +
"JOIN (" + query + ") filter ON (filter.date = data.date)");
}
else {
return Optional.of(query);
}
}
catch (UnsupportedOperationException e) {
return Optional.empty();
}
}
String refTable = dimension.map(value -> tableNameForCollection + "_by_" + value).orElse(tableNameForCollection);
Optional<String> table = getSchemaForPreCalculatedTable(project, connectorField, collection, dimension);
if (!table.isPresent()) {
calculatedUserSets.add(new CalculatedUserSet(Optional.of(collection), dimension));
}
return table
.map(value -> generatePreCalculatedTableSql(refTable, value, connectorField,
dimension, joinPart, timePredicate, stepIdx));
}
private Optional<String> getSchemaForPreCalculatedTable(String project, String connectorField, String collection, Optional<String> dimension)
{
String refTable = connectorField + "s_daily_" + collection + dimension.map(value -> "_by_" + value).orElse("");
if (continuousQueryService.list(project).stream().anyMatch(e -> e.tableName.equals(refTable))) {
return Optional.of("continuous");
}
else if (materializedViewService.list(project).stream().anyMatch(e -> e.tableName.equals(refTable))) {
return Optional.of("materialized");
}
return Optional.empty();
}
private String generatePreCalculatedTableSql(String table, String schema, String connectorField, Optional<String> dimensionColumn, Optional<String> joinPart, String timePredicate, int stepIdx)
{
return format("select step%d.date, %s step%d.%s_set from %s.%s as step%d %s where step%d.date %s",
stepIdx, dimensionColumn.map(v -> format("step%d.dimension,", stepIdx)).orElse(""),
stepIdx, connectorField, schema, table, stepIdx,
joinPart.orElse(""), stepIdx, timePredicate);
}
public String convertFunnel(String project, String connectorField, int idx, FunnelStep funnelStep, Optional<String> dimension, LocalDate startDate, LocalDate endDate)
{
Optional<String> filterExp = funnelStep.getExpression().map(value -> RakamSqlFormatter.formatExpression(value,
name -> name.getParts().stream().map(e -> formatIdentifier(e, '"')).collect(Collectors.joining(".")),
name -> formatIdentifier("step" + idx, '"') + "." + name.getParts().stream()
.map(e -> formatIdentifier(e, '"')).collect(Collectors.joining(".")), '"'));
String format = format("SELECT %s %s, %d as step, %s.%s from %s %s %s %s",
dimension.map(ValidationUtil::checkTableColumn).map(v -> "step" + idx + "." + v + ",").orElse(""),
userMappingEnabled ? format("coalesce(mapping._user, %s._user, %s) as _user", "step" + idx, format(connectorField, "step" + idx)) : ("step" + idx + "._user"),
idx + 1,
"step" + idx,
checkTableColumn(projectConfig.getTimeColumn()),
project + "." + checkCollection(funnelStep.getCollection()),
"step" + idx,
userMappingEnabled ? format("left join %s.%s mapping on (%s.%s is null and mapping.created_at >= date '%s' and mapping.merged_at <= date '%s' and mapping.id = %s.%s)",
project, checkCollection(ANONYMOUS_ID_MAPPING),
"step" + idx, checkTableColumn(projectConfig.getUserColumn()), startDate.format(ISO_LOCAL_DATE), endDate.format(ISO_LOCAL_DATE),
"step" + idx, checkTableColumn(projectConfig.getUserColumn())) : "",
filterExp.map(v -> "where " + v).orElse(""));
return format;
}
}