package org.rakam.report.eventexplorer; import com.facebook.presto.sql.parser.SqlParser; import com.facebook.presto.sql.tree.DefaultExpressionTraversalVisitor; import com.facebook.presto.sql.tree.Expression; import com.facebook.presto.sql.tree.QualifiedNameReference; import io.airlift.log.Logger; import org.rakam.analysis.ContinuousQueryService; import org.rakam.analysis.EventExplorer; import org.rakam.analysis.EventExplorerListener; import org.rakam.analysis.MaterializedViewService; import org.rakam.analysis.metadata.Metastore; import org.rakam.config.ProjectConfig; import org.rakam.plugin.MaterializedView; import org.rakam.plugin.SystemEvents; import org.rakam.report.DelegateQueryExecution; import org.rakam.report.QueryExecution; import org.rakam.report.QueryExecutorService; import org.rakam.report.QueryResult; import org.rakam.report.realtime.AggregationType; import org.rakam.util.JsonHelper; import org.rakam.util.MaterializedViewNotExists; import org.rakam.util.RakamException; import java.time.Instant; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.AbstractMap; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; import static java.lang.String.format; import static java.time.temporal.ChronoUnit.DAYS; import static org.rakam.analysis.EventExplorer.ReferenceType.COLUMN; import static org.rakam.analysis.EventExplorer.ReferenceType.REFERENCE; import static org.rakam.analysis.EventExplorer.TimestampTransformation.HOUR; import static org.rakam.analysis.EventExplorer.TimestampTransformation.fromString; import static org.rakam.collection.SchemaField.stripName; import static org.rakam.report.realtime.AggregationType.COUNT; import static org.rakam.util.DateTimeUtils.TIMESTAMP_FORMATTER; import static org.rakam.util.ValidationUtil.checkCollection; import static org.rakam.util.ValidationUtil.checkLiteral; import static org.rakam.util.ValidationUtil.checkProject; import static org.rakam.util.ValidationUtil.checkTableColumn; public abstract class AbstractEventExplorer implements EventExplorer { private final static Logger LOGGER = Logger.get(AbstractEventExplorer.class); protected final static String TIME_INTERVAL_ERROR_MESSAGE = "Date interval is too big. Please narrow the date range or use different date dimension."; protected final Reference DEFAULT_SEGMENT = new Reference(COLUMN, "_collection"); protected static SqlParser sqlParser = new SqlParser(); private final QueryExecutorService executor; private final Map<TimestampTransformation, String> timestampMapping; private final MaterializedViewService materializedViewService; private final ContinuousQueryService continuousQueryService; private final ProjectConfig projectConfig; private final Metastore metastore; public AbstractEventExplorer( ProjectConfig projectConfig, QueryExecutorService executor, Metastore metastore, MaterializedViewService materializedViewService, ContinuousQueryService continuousQueryService, Map<TimestampTransformation, String> timestampMapping) { this.projectConfig = projectConfig; this.executor = executor; this.metastore = metastore; this.timestampMapping = timestampMapping; this.materializedViewService = materializedViewService; this.continuousQueryService = continuousQueryService; } public static void checkReference(String refValue, Instant startDate, Instant endDate, int size) { switch (fromString(refValue.replace(" ", "_"))) { case HOUR_OF_DAY: case DAY_OF_MONTH: case WEEK_OF_YEAR: case MONTH_OF_YEAR: case QUARTER_OF_YEAR: case DAY_OF_WEEK: return; case HOUR: if (startDate.atZone(ZoneOffset.UTC).until(endDate.atZone(ZoneOffset.UTC), ChronoUnit.HOURS) > 30000 / size) { throw new RakamException(TIME_INTERVAL_ERROR_MESSAGE, BAD_REQUEST); } break; case DAY: if (startDate.atZone(ZoneOffset.UTC).until(endDate.atZone(ZoneOffset.UTC), DAYS) > 30000 / size) { throw new RakamException(TIME_INTERVAL_ERROR_MESSAGE, BAD_REQUEST); } break; case MONTH: if (startDate.atZone(ZoneOffset.UTC).until(endDate.atZone(ZoneOffset.UTC), ChronoUnit.MONTHS) > 30000 / size) { throw new RakamException(TIME_INTERVAL_ERROR_MESSAGE, BAD_REQUEST); } break; case YEAR: if (startDate.atZone(ZoneOffset.UTC).until(endDate.atZone(ZoneOffset.UTC), ChronoUnit.YEARS) > 30000 / size) { throw new RakamException(TIME_INTERVAL_ERROR_MESSAGE, BAD_REQUEST); } break; } } public String getColumnValue(Map<TimestampTransformation, String> timestampMapping, Reference ref, boolean format) { switch (ref.type) { case COLUMN: return format ? checkTableColumn(ref.value) : ref.value; case REFERENCE: return format(timestampMapping.get(fromString(ref.value.replace(" ", "_"))), projectConfig.getTimeColumn()); default: throw new IllegalArgumentException("Unknown reference type: " + ref.value); } } public String getColumnReference(Reference ref) { switch (ref.type) { case COLUMN: return ref.value; case REFERENCE: return projectConfig.getTimeColumn(); default: throw new IllegalArgumentException("Unknown reference type: " + ref.value); } } private Optional<AggregationType> getIntermediateAggregation(AggregationType aggregationType) { switch (aggregationType) { case COUNT: case SUM: return Optional.of(AggregationType.SUM); case MINIMUM: return Optional.of(AggregationType.MINIMUM); case MAXIMUM: return Optional.of(AggregationType.MAXIMUM); default: return Optional.empty(); } } @Override public QueryExecution analyze( String project, List<String> collections, Measure measure, Reference grouping, Reference segmentValue2, String filterExpression, Instant startDate, Instant endDate) { Reference segment = segmentValue2 == null ? DEFAULT_SEGMENT : segmentValue2; if (grouping != null && grouping.type == REFERENCE) { checkReference(grouping.value, startDate, endDate, collections.size()); } if (segment != null && segment.type == REFERENCE) { checkReference(segment.value, startDate, endDate, collections.size()); } Predicate<OLAPTable> groupedMetricsPredicate = options -> { Expression filterExp; if (filterExpression != null) { synchronized (sqlParser) { filterExp = sqlParser.createExpression(filterExpression); } } else { filterExp = null; } if (options.collections.containsAll(collections)) { if (options.aggregations.contains(measure.aggregation) && measure.column != null && options.measures.contains(measure.column) && (grouping == null || (grouping.type == REFERENCE || (grouping.type == COLUMN && options.dimensions.contains(grouping.value)))) && (segment == null || (segment.value.equals("_collection") && segment.type == COLUMN && options.collections.size() == 1) || (segment.type == REFERENCE || (segment.type == COLUMN && options.dimensions.contains(segment.value)))) && (filterExp == null || testFilterExpressionForPerComputedTable(filterExp, options))) { return true; } } return false; }; Optional<Map.Entry<OLAPTable, String>> preComputedTable = materializedViewService.list(project).stream() .filter(view -> view.options != null && view.options.containsKey("olap_table")) .map(view -> JsonHelper.convert(view.options.get("olap_table"), OLAPTable.class)) .filter(table -> groupedMetricsPredicate.test(table)).findAny() .map(view -> new AbstractMap.SimpleImmutableEntry<>(view, "materialized." + checkCollection(view.tableName))); if (!preComputedTable.isPresent()) { preComputedTable = continuousQueryService.list(project).stream() .filter(view -> view.options != null && view.options.containsKey("olap_table")) .map(view -> JsonHelper.convert(view.options.get("olap_table"), OLAPTable.class)) .filter(table -> groupedMetricsPredicate.test(table)).findAny() .map(view -> new AbstractMap.SimpleImmutableEntry<>(view, "continuous." + checkCollection(view.tableName))); } String timeFilter = format(" %s between timestamp '%s' and timestamp '%s' + interval '1' day", checkTableColumn(projectConfig.getTimeColumn()), TIMESTAMP_FORMATTER.format(startDate), TIMESTAMP_FORMATTER.format(endDate)); String groupBy; boolean bothActive = segment != null && grouping != null; if (bothActive) { groupBy = "GROUP BY 1, 2"; } else if (segment != null || grouping != null) { groupBy = "GROUP BY 1"; } else { groupBy = ""; } String computeQuery; if (preComputedTable.isPresent()) { String filters = preComputedTable.get().getKey().dimensions.stream() .filter(dim -> (grouping == null || grouping.type == REFERENCE && !grouping.value.equals(dim) && (segment == null || segment.type == REFERENCE && !segment.value.equals(dim)))) .map(dim -> format("%s is null", dim)) .collect(Collectors.joining(" and ")); computeQuery = format("SELECT %s %s %s as value FROM %s WHERE %s %s", grouping != null ? (getColumnValue(timestampMapping, grouping, true) + " as " + checkTableColumn(getColumnReference(grouping) + "_group") + " ,") : "", segment != null ? (getColumnValue(timestampMapping, segment, true) + " as " + checkTableColumn(getColumnReference(segment) + "_segment") + " ,") : "", format(getFinalForAggregationFunction(measure), measure.column + "_" + measure.aggregation.name().toLowerCase()), preComputedTable.get().getValue(), Stream.of( collections.size() > 1 ? format("collection IN (%s)", collections.stream().map(c -> "'" + c + "'").collect(Collectors.joining(","))) : "", filters, filterExpression, timeFilter ).filter(e -> e != null && !e.isEmpty()).collect(Collectors.joining(" AND ")), groupBy); } else { String where = timeFilter + (filterExpression == null ? "" : (" AND " + filterExpression)); String measureAgg = convertSqlFunction(measure != null && measure.aggregation != null ? measure.aggregation : COUNT); String measureColumn = measure != null && measure.column != null ? checkTableColumn(measure.column) : "1"; if (collections.size() == 1) { String select = generateComputeQuery(grouping, segment, collections.get(0)); computeQuery = format("select %s %s as value from %s where %s %s", select.isEmpty() ? select : select + ",", format(measureAgg, measureColumn), checkCollection(collections.get(0)), where, groupBy); } else { String selectPart = (grouping == null ? "" : checkTableColumn(getColumnReference(grouping) + "_group")) + (grouping == null ? "" : ", ") + checkTableColumn(getColumnReference(segment) + "_segment"); String queries = collections.size() == 1 ? collections.get(0) : collections.stream() .map(collection -> { String select = generateComputeQuery(grouping, segment, collection); String format = format("select %s %s from %s where %s", select.isEmpty() ? select : select + ",", measureColumn, checkCollection(collection), where); return format; }) .collect(Collectors.joining(" union all ")); computeQuery = format("select %s %s as value from (%s) as data %s", selectPart.isEmpty() ? "" : selectPart + ",", format(measureAgg, measureColumn), queries, groupBy); } } String query = null; Optional<AggregationType> intermediateAggregation = getIntermediateAggregation(measure.aggregation); if (intermediateAggregation.isPresent()) { if (grouping != null && grouping.type == COLUMN && segment.type == COLUMN) { query = format(" SELECT " + " CASE WHEN group_rank > 15 THEN 'Others' ELSE cast(%s as varchar) END,\n" + " CASE WHEN segment_rank > 20 THEN 'Others' ELSE cast(%s as varchar) END,\n" + " %s FROM (\n" + " SELECT *,\n" + " row_number() OVER (ORDER BY %s DESC) AS group_rank,\n" + " row_number() OVER (PARTITION BY %s ORDER BY value DESC) AS segment_rank\n" + " FROM (%s) as data GROUP BY 1, 2, 3) as data GROUP BY 1, 2 ORDER BY 3 DESC", checkTableColumn(getColumnReference(grouping) + "_group"), checkTableColumn(getColumnReference(segment) + "_segment"), format(convertSqlFunction(intermediateAggregation.get(), measure.aggregation), "value"), format(convertSqlFunction(intermediateAggregation.get(), measure.aggregation), "value"), checkCollection(format(getColumnReference(grouping), "value") + "_group"), computeQuery); } else { if ((grouping != null && grouping.type == COLUMN) || (segment != null && segment.type == COLUMN)) { String windowColumn = checkTableColumn(getColumnValue(timestampMapping, (grouping != null && grouping.type == COLUMN) ? grouping : segment, false) + ((grouping != null && grouping.type == COLUMN) ? "_group" : "_segment")); query = format(" SELECT " + " %s CASE WHEN group_rank > 50 THEN 'Others' ELSE CAST(%s as varchar) END, %s FROM (\n" + " SELECT *, row_number() OVER (ORDER BY %s DESC) AS group_rank\n" + " FROM (%s) as data GROUP BY 1, 2 %s) as data GROUP BY 1 %s ORDER BY %d DESC", bothActive ? checkTableColumn(getColumnReference(grouping.type == COLUMN ? segment : grouping) + (grouping.type == COLUMN ? "_segment" : "_group")) + ", " : "", windowColumn, format(convertSqlFunction(intermediateAggregation.get(), measure.aggregation), "value"), format(convertSqlFunction(intermediateAggregation.get(), measure.aggregation), "value"), computeQuery, bothActive ? ", 3" : "", bothActive ? ", 2" : "", bothActive ? 3 : 2); } else { query = computeQuery + " ORDER BY 1 DESC"; } } } if (query == null) { query = format("select %s %s %s value from (%s) data ORDER BY %s DESC", grouping == null ? "" : format(grouping.type == COLUMN ? "cast(" + checkTableColumn("%s_group") + " as varchar)" : checkTableColumn("%s_group"), getColumnReference(grouping)), segment == null ? "" : ((grouping == null ? "" : ",") + format(segment.type == COLUMN ? "cast(" + checkTableColumn("%s_segment") + " as varchar)" : checkTableColumn("%s_segment"), getColumnReference(segment))), grouping != null || segment != null ? "," : "", computeQuery, bothActive ? 3 : 2); } String table = preComputedTable.map(e -> e.getValue()).orElse(null); return new DelegateQueryExecution(executor.executeQuery(project, query), result -> { if (table != null) { result.setProperty("olapTable", table); } if (result.isFailed()) { LOGGER.error(new RuntimeException(result.getError().toString()), "Error while running event explorer query"); } return result; }); } protected String generateComputeQuery(Reference grouping, Reference segment, String collection) { StringBuilder selectBuilder = new StringBuilder(); if (grouping != null) { selectBuilder.append(getColumnValue(timestampMapping, grouping, true) + " as " + checkTableColumn(getColumnReference(grouping) + "_group")); if (segment != null) { selectBuilder.append(", "); } } if (segment != null) { selectBuilder.append((!segment.equals(DEFAULT_SEGMENT) ? getColumnValue(timestampMapping, segment, true) : "'" + stripName(collection, "collection") + "'") + " as " + checkTableColumn(getColumnReference(segment) + "_segment")); } return selectBuilder.toString(); } private boolean testFilterExpressionForPerComputedTable(Expression filterExp, OLAPTable options) { final boolean[] columnExists = {true}; new DefaultExpressionTraversalVisitor<Void, Void>() { @Override protected Void visitQualifiedNameReference(QualifiedNameReference node, Void context) { if (node.getName().getParts().size() != 1) { columnExists[0] = false; } if (!options.dimensions.contains(node.getName().getParts().get(0))) { columnExists[0] = false; } return null; } }.process(filterExp, null); return columnExists[0]; } private String getFinalForAggregationFunction(Measure aggregation) { switch (aggregation.aggregation) { case AVERAGE: return "cast(sum(%1$s) as double) / count(%1$s)"; case MAXIMUM: return "max(%s)"; case MINIMUM: return "min(%s)"; case COUNT: return "count(%s)"; case SUM: return "sum(%s)"; case COUNT_UNIQUE: throw new UnsupportedOperationException(); case APPROXIMATE_UNIQUE: return getFinalForApproximateUniqueFunction(); default: throw new IllegalArgumentException("aggregation type is not supported"); } } @Override public CompletableFuture<QueryResult> getEventStatistics(String project, Optional<Set<String>> collections, Optional<String> dimension, Instant startDate, Instant endDate) { checkProject(project); if (collections.isPresent() && collections.get().isEmpty()) { return CompletableFuture.completedFuture(QueryResult.empty()); } if (dimension.isPresent()) { checkReference(dimension.get(), startDate, endDate, collections.map(v -> v.size()).orElse(10)); } String timePredicate = format("%s between timestamp '%s' and timestamp '%s' + interval '1' day", checkTableColumn(projectConfig.getTimeColumn()), TIMESTAMP_FORMATTER.format(startDate), TIMESTAMP_FORMATTER.format(endDate)); String query; if (dimension.isPresent()) { Optional<TimestampTransformation> aggregationMethod = TimestampTransformation.fromPrettyName(dimension.get()); if (!aggregationMethod.isPresent()) { throw new RakamException(BAD_REQUEST); } query = format("select collection, %s as %s, cast(sum(total) as bigint) from (%s) data where %s group by 1, 2 order by 2 desc", aggregationMethod.get() == HOUR ? projectConfig.getTimeColumn() : format(timestampMapping.get(aggregationMethod.get()), projectConfig.getTimeColumn()), aggregationMethod.get(), sourceTable(project, collections), timePredicate); } else { query = format("select collection, cast(coalesce(sum(total), 0) as bigint) as total \n" + " from (%s) data where %s group by 1", sourceTable(project, collections), timePredicate); } QueryExecution collection; try { collection = executor.executeQuery(project, query, Optional.empty(), "collection", 20000); } catch (MaterializedViewNotExists e) { List<MaterializedView> views = materializedViewService.list(project); EventExplorerListener eventExplorerListener = new EventExplorerListener(projectConfig, materializedViewService); collections.orElseGet(() -> metastore.getCollectionNames(project)) .stream() .filter(c -> !views.stream().anyMatch(t -> t.tableName.equals(c))) .forEach(c -> eventExplorerListener.createTable(project, c)); collection = executor.executeQuery(project, query, Optional.empty(), "collection", 20000); } collection.getResult().thenAccept(result -> { if (result.isFailed()) { LOGGER.error(new RuntimeException(result.getError().toString()), "An error occurred while executing event explorer statistics query."); } }); return collection.getResult(); } public String sourceTable(String project, Optional<Set<String>> collections) { String collect = collections.orElseGet(() -> metastore.getCollectionNames(project)) .stream() .map(c -> format("select _time, total, cast('%s' as varchar) as collection from materialized.%s", checkLiteral(c), checkCollection("_event_explorer_metrics - " + c))) .collect(Collectors.joining(" union all ")); if(collect.isEmpty()) { return "select now() as _time, 0 as total, cast(null as varchar) as collection"; } return collect; } // public String sourceTable(Optional<Set<String>> collections) { // return "continuous._event_explorer_metrics"; // } @Override public Map<String, List<String>> getExtraDimensions(String project) { Map<String, List<String>> builder = new HashMap<>(); for (TimestampTransformation transformation : timestampMapping.keySet()) { builder.computeIfAbsent(transformation.getCategory(), k -> new ArrayList<>()) .add(transformation.getPrettyName()); } return builder; } public abstract String convertSqlFunction(AggregationType aggType); public String convertSqlFunction(AggregationType intermediate, AggregationType main) { return convertSqlFunction(intermediate); } }