Aggregation.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.tools.scan.query;

import com.linkedin.pinot.common.request.AggregationInfo;
import com.linkedin.pinot.core.query.utils.Pair;
import com.linkedin.pinot.core.segment.index.IndexSegmentImpl;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.readers.Dictionary;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class Aggregation {
  private boolean _addCountStar;
  private IndexSegmentImpl _indexSegment;
  private SegmentMetadataImpl _metadata;
  private List<Integer> _filteredDocIds;
  private List<AggregationInfo> _aggregationsInfo;
  private List<String> _groupByColumns;
  private List<Pair> _columnFunctionList;
  private Map<String, Dictionary> _dictionaryMap;
  private List<Pair> _projectionColumns;
  private List<Pair> _allColumns;
  private long _topN = 10;

  private void init(List<String> groupByColumns) {
    _groupByColumns = groupByColumns;
    _columnFunctionList = new ArrayList<>();
    _addCountStar = false;

    for (AggregationInfo aggregationInfo : _aggregationsInfo) {
      Map<String, String> aggregationParams = aggregationInfo.getAggregationParams();
      for (Map.Entry<String, String> entry : aggregationParams.entrySet()) {
        String column = entry.getValue();
        // Apparently in case of multiple group by's '*' is replaced by empty/null in brokerRequest.
        if (column == null || column.isEmpty() || column.equals("*")) {
          _addCountStar = true;
          continue;
        }
        _columnFunctionList.add(new Pair(column, aggregationInfo.getAggregationType().toLowerCase()));
      }
    }

    // Count star appended at the end in the result table.
    if (_addCountStar) {
      _columnFunctionList.add(new Pair("*", "count"));
    }

    _projectionColumns = new ArrayList<>();
    _allColumns = new ArrayList<>();

    if (_groupByColumns != null) {
      for (String column : _groupByColumns) {
        _projectionColumns.add(new Pair(column, null));
        _allColumns.add(new Pair(column, null));
      }
    }

    for (Pair pair : _columnFunctionList) {
      String column = (String) pair.getFirst();
      if (!column.equals("*")) {
        _projectionColumns.add(pair);
        _allColumns.add(pair);
      }
    }

    // This is always the last columns.
    if (_addCountStar) {
      _allColumns.add(new Pair("*", "count"));
    }
  }

  public Aggregation(List<AggregationInfo> aggregationsInfo, List<String> groupByColumns, long topN) {
    _aggregationsInfo = aggregationsInfo;
    _topN = topN;
    init(groupByColumns);
  }

  public Aggregation(IndexSegmentImpl indexSegment, SegmentMetadataImpl metadata, List<Integer> filteredDocIds,
      List<AggregationInfo> aggregationsInfo, List<String> groupByColumns, long topN) {
    _indexSegment = indexSegment;
    _metadata = metadata;
    _dictionaryMap = new HashMap<>();

    _filteredDocIds = filteredDocIds;
    _aggregationsInfo = aggregationsInfo;
    _topN = topN;
    init(groupByColumns);

    for (Pair pair : _projectionColumns) {
      String column = (String) pair.getFirst();
      _dictionaryMap.put(column, _indexSegment.getDictionaryFor(column));
    }
  }

  public ResultTable run() {
    Projection projection = new Projection(_indexSegment, _metadata, _filteredDocIds, _projectionColumns,
        _dictionaryMap, _addCountStar);
    return aggregate(projection.run());
  }

  public ResultTable aggregate(ResultTable input) {
    if (_groupByColumns == null) {
      return aggregateOne(input);
    }

    Map<GroupByOperator, ResultTable> resultsMap = new HashMap<>();
    for (ResultTable.Row row : input) {
      // For MV Columns we enumerate each value as a separate group, instead of all values of the column in one group.
      for (List<Object> groupByValues : enumerateGroups(row)) {
        GroupByOperator groupByOperator = new GroupByOperator(groupByValues);
        ResultTable resultTable;

        if (resultsMap.containsKey(groupByOperator)) {
          resultTable = resultsMap.get(groupByOperator);
        } else {
          resultTable = new ResultTable(_allColumns, 0);
          resultsMap.put(groupByOperator, resultTable);
        }

        resultTable.append(row);
      }
    }

    ResultTable results = new ResultTable(_allColumns, 0);

    for (Map.Entry<GroupByOperator, ResultTable> entry : resultsMap.entrySet()) {
      GroupByOperator groupByOperator = entry.getKey();
      ResultTable groupByTable = entry.getValue();
      ResultTable aggregationResult = new ResultTable(_allColumns, 1);

      for (Object groupByColumn : groupByOperator._getGroupBys()) {
        aggregationResult.add(0, groupByColumn);
      }

      ResultTable.Row row = aggregateOne(groupByTable).getRow(0);
      for (Object value : row) {
        aggregationResult.add(0, value);
      }
      results.append(aggregationResult);
    }

    results.setResultType(ResultTable.ResultType.AggregationGroupBy);
    return results;
  }

  private List<List<Object>> enumerateGroups(ResultTable.Row row) {
    List<List<Object>> groups = new ArrayList<>();

    for (String groupByColumn : _groupByColumns) {
      Object value = row.get(groupByColumn, null);
      if (value instanceof Object[]) {
        groups.add(Arrays.asList((Object[]) value));
      } else {
        groups.add(Arrays.asList(value));
      }
    }
    return Utils.cartesianProduct(groups);
  }

  private ResultTable aggregateOne(ResultTable input) {
    ResultTable results = new ResultTable(_allColumns, 1);
    results.setResultType(ResultTable.ResultType.Aggregation);

    if (input.isEmpty()) {
      return new ResultTable(_allColumns, 0);
    }

    for (Pair pair : _columnFunctionList) {
      String column = (String) pair.getFirst();
      String function = (String) pair.getSecond();

      AggregationFunc aggregationFunc =
          AggregationFuncFactory.getAggregationFunc(input, column, function);
      ResultTable aggregationResult = aggregationFunc.run();
      results.add(0, aggregationResult.get(0, 0));
    }

    return results;
  }
}