SelectionOperatorService.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.query.selection;

import com.linkedin.pinot.common.request.Selection;
import com.linkedin.pinot.common.request.SelectionSort;
import com.linkedin.pinot.common.response.ServerInstance;
import com.linkedin.pinot.common.response.broker.SelectionResults;
import com.linkedin.pinot.common.utils.DataSchema;
import com.linkedin.pinot.common.utils.DataTable;
import com.linkedin.pinot.core.common.Block;
import com.linkedin.pinot.core.common.BlockDocIdIterator;
import com.linkedin.pinot.core.common.Constants;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.query.selection.comparator.CompositeDocIdValComparator;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import javax.annotation.Nonnull;


/**
 * The <code>SelectionOperatorService</code> class provides the services for selection queries with
 * <code>ORDER BY</code>.
 * <p>Expected behavior:
 * <ul>
 *   <li>
 *     Return selection results with the same order of columns as user passed in.
 *     <ul>
 *       <li>Eg. SELECT colB, colA, colC FROM table -> [valB, valA, valC]</li>
 *     </ul>
 *   </li>
 *   <li>
 *     For 'SELECT *', return columns with alphabetically order.
 *     <ul>
 *       <li>Eg. SELECT * FROM table -> [valA, valB, valC]</li>
 *     </ul>
 *   </li>
 *   <li>
 *     Order by does not change the order of columns in selection results.
 *     <ul>
 *       <li>Eg. SELECT colB, colA, colC FROM table ORDER BY calC -> [valB, valA, valC]</li>
 *     </ul>
 *   </li>
 * </ul>
 */
public class SelectionOperatorService {
  private final List<String> _selectionColumns;
  private final List<SelectionSort> _sortSequence;
  private final DataSchema _dataSchema;
  private final int _selectionOffset;
  private final int _maxNumRows;
  private final PriorityQueue<Serializable[]> _rows;

  private long _numDocsScanned = 0;

  /**
   * Constructor for <code>SelectionOperatorService</code> with {@link IndexSegment}. (Inner segment)
   *
   * @param selection selection query.
   * @param indexSegment index segment.
   */
  public SelectionOperatorService(@Nonnull Selection selection, @Nonnull IndexSegment indexSegment) {
    _selectionColumns = SelectionOperatorUtils.getSelectionColumns(selection.getSelectionColumns(), indexSegment);
    _sortSequence = getSortSequence(selection.getSelectionSortSequence());
    _dataSchema = SelectionOperatorUtils.extractDataSchema(_sortSequence, _selectionColumns, indexSegment);
    // Select rows from offset to offset + size.
    _selectionOffset = selection.getOffset();
    _maxNumRows = _selectionOffset + selection.getSize();
    _rows = new PriorityQueue<>(_maxNumRows, getStrictComparator());
  }

  /**
   * Constructor for <code>SelectionOperatorService</code> with {@link DataSchema}. (Inter segment)
   *
   * @param selection selection query.
   * @param dataSchema data schema.
   */
  public SelectionOperatorService(@Nonnull Selection selection, @Nonnull DataSchema dataSchema) {
    _selectionColumns = SelectionOperatorUtils.getSelectionColumns(selection.getSelectionColumns(), dataSchema);
    _sortSequence = getSortSequence(selection.getSelectionSortSequence());
    _dataSchema = dataSchema;
    // Select rows from offset to offset + size.
    _selectionOffset = selection.getOffset();
    _maxNumRows = _selectionOffset + selection.getSize();
    _rows = new PriorityQueue<>(_maxNumRows, getTypeCompatibleComparator());
  }

  /**
   * Helper method to handle duplicate sort columns.
   *
   * @return de-duplicated list of sort sequences.
   */
  @Nonnull
  private List<SelectionSort> getSortSequence(List<SelectionSort> selectionSorts) {
    List<SelectionSort> deDupedSelectionSorts = new ArrayList<>();
    Set<String> sortColumns = new HashSet<>();
    for (SelectionSort selectionSort : selectionSorts) {
      String sortColumn = selectionSort.getColumn();
      if (!sortColumns.contains(sortColumn)) {
        deDupedSelectionSorts.add(selectionSort);
        sortColumns.add(sortColumn);
      }
    }
    return deDupedSelectionSorts;
  }

  /**
   * Helper method to get the strict {@link Comparator} for selection rows. (Inner segment)
   * <p>Strict comparator does not allow any schema mismatch (more performance driven).
   *
   * @return strict {@link Comparator} for selection rows.
   */
  @Nonnull
  private Comparator<Serializable[]> getStrictComparator() {
    return new Comparator<Serializable[]>() {
      @Override
      public int compare(Serializable[] o1, Serializable[] o2) {
        int numSortColumns = _sortSequence.size();
        for (int i = 0; i < numSortColumns; i++) {
          int ret = 0;
          SelectionSort selectionSort = _sortSequence.get(i);
          Serializable v1 = o1[i];
          Serializable v2 = o2[i];

          // Only compare single-value columns.
          switch (_dataSchema.getColumnType(i)) {
            case INT:
              if (!selectionSort.isIsAsc()) {
                ret = ((Integer) v1).compareTo((Integer) v2);
              } else {
                ret = ((Integer) v2).compareTo((Integer) v1);
              }
              break;
            case LONG:
              if (!selectionSort.isIsAsc()) {
                ret = ((Long) v1).compareTo((Long) v2);
              } else {
                ret = ((Long) v2).compareTo((Long) v1);
              }
              break;
            case FLOAT:
              if (!selectionSort.isIsAsc()) {
                ret = ((Float) v1).compareTo((Float) v2);
              } else {
                ret = ((Float) v2).compareTo((Float) v1);
              }
              break;
            case DOUBLE:
              if (!selectionSort.isIsAsc()) {
                ret = ((Double) v1).compareTo((Double) v2);
              } else {
                ret = ((Double) v2).compareTo((Double) v1);
              }
              break;
            case STRING:
              if (!selectionSort.isIsAsc()) {
                ret = ((String) v1).compareTo((String) v2);
              } else {
                ret = ((String) v2).compareTo((String) v1);
              }
              break;
            default:
              break;
          }

          if (ret != 0) {
            return ret;
          }
        }
        return 0;
      }
    };
  }

  /**
   * Helper method to get the type-compatible {@link Comparator} for selection rows. (Inter segment)
   * <p>Type-compatible comparator allows compatible types to compare with each other.
   *
   * @return flexible {@link Comparator} for selection rows.
   */
  @Nonnull
  private Comparator<Serializable[]> getTypeCompatibleComparator() {
    return new Comparator<Serializable[]>() {
      @Override
      public int compare(Serializable[] o1, Serializable[] o2) {
        int numSortColumns = _sortSequence.size();
        for (int i = 0; i < numSortColumns; i++) {
          int ret = 0;
          SelectionSort selectionSort = _sortSequence.get(i);
          Serializable v1 = o1[i];
          Serializable v2 = o2[i];

          // Only compare single-value columns.
          if (v1 instanceof Number) {
            if (!selectionSort.isIsAsc()) {
              ret = Double.compare(((Number) v1).doubleValue(), ((Number) v2).doubleValue());
            } else {
              ret = Double.compare(((Number) v2).doubleValue(), ((Number) v1).doubleValue());
            }
          } else if (v1 instanceof String) {
            if (!selectionSort.isIsAsc()) {
              ret = ((String) v1).compareTo((String) v2);
            } else {
              ret = ((String) v2).compareTo((String) v1);
            }
          }

          if (ret != 0) {
            return ret;
          }
        }
        return 0;
      }
    };
  }

  /**
   * Get the {@link DataSchema}.
   *
   * @return data schema.
   */
  @Nonnull
  public DataSchema getDataSchema() {
    return _dataSchema;
  }

  /**
   * Get the selection results.
   *
   * @return selection results.
   */
  @Nonnull
  public PriorityQueue<Serializable[]> getRows() {
    return _rows;
  }

  /**
   * Get number of documents scanned. (Inner segment)
   *
   * @return number of documents scanned.
   */
  public long getNumDocsScanned() {
    return _numDocsScanned;
  }

  /**
   * Iterate over {@link Block}s, extract values from them and merge the values to the selection results for selection
   * queries with <code>ORDER BY</code>. (Inner segment)
   *
   * @param blockDocIdIterator block document id iterator.
   * @param blocks {@link Block} array.
   */
  public void iterateOnBlocksWithOrdering(@Nonnull BlockDocIdIterator blockDocIdIterator, @Nonnull Block[] blocks) {
    Comparator<Integer> rowDocIdComparator = new CompositeDocIdValComparator(_sortSequence, blocks);
    PriorityQueue<Integer> rowDocIdPriorityQueue = new PriorityQueue<>(_maxNumRows, rowDocIdComparator);
    int docId;
    while ((docId = blockDocIdIterator.next()) != Constants.EOF) {
      _numDocsScanned++;
      SelectionOperatorUtils.addToPriorityQueue(docId, rowDocIdPriorityQueue, _maxNumRows);
    }

    SelectionFetcher selectionFetcher = new SelectionFetcher(blocks, _dataSchema);
    Collection<Serializable[]> rows = new ArrayList<>(rowDocIdPriorityQueue.size());
    for (int rowDocId : rowDocIdPriorityQueue) {
      rows.add(selectionFetcher.getRow(rowDocId));
    }
    SelectionOperatorUtils.mergeWithOrdering(_rows, rows, _maxNumRows);
  }

  /**
   * Reduce a collection of {@link DataTable}s to selection rows for selection queries with <code>ORDER BY</code>.
   * (Broker side)
   *
   * @param selectionResults {@link Map} from {@link ServerInstance} to {@link DataTable}.
   */
  public void reduceWithOrdering(@Nonnull Map<ServerInstance, DataTable> selectionResults) {
    for (DataTable dataTable : selectionResults.values()) {
      int numRows = dataTable.getNumberOfRows();
      for (int rowId = 0; rowId < numRows; rowId++) {
        Serializable[] row = SelectionOperatorUtils.extractRowFromDataTable(dataTable, rowId);
        SelectionOperatorUtils.addToPriorityQueue(row, _rows, _maxNumRows);
      }
    }
  }

  /**
   * Render the unformatted selection rows to a formatted {@link SelectionResults} object for selection queries with
   * <code>ORDER BY</code>. (Broker side)
   * <p>{@link SelectionResults} object will be used to build the broker response.
   * <p>Should be called after method "reduceWithOrdering()".
   *
   * @return {@link SelectionResults} object results.
   */
  @Nonnull
  public SelectionResults renderSelectionResultsWithOrdering() {
    LinkedList<Serializable[]> rowsInSelectionResults = new LinkedList<>();

    int[] columnIndices = getColumnIndices();
    while (_rows.size() > _selectionOffset) {
      rowsInSelectionResults.addFirst(getFormattedRowWithOrdering(_rows.poll(), columnIndices));
    }

    return new SelectionResults(_selectionColumns, rowsInSelectionResults);
  }

  /**
   * Helper method to get each selection column index in data schema.
   *
   * @return column indices.
   */
  private int[] getColumnIndices() {
    int numSelectionColumns = _selectionColumns.size();
    int[] columnIndices = new int[numSelectionColumns];

    int numColumnsInDataSchema = _dataSchema.size();
    Map<String, Integer> dataSchemaIndices = new HashMap<>(numColumnsInDataSchema);
    for (int i = 0; i < numColumnsInDataSchema; i++) {
      dataSchemaIndices.put(_dataSchema.getColumnName(i), i);
    }

    for (int i = 0; i < numSelectionColumns; i++) {
      columnIndices[i] = dataSchemaIndices.get(_selectionColumns.get(i));
    }

    return columnIndices;
  }

  /**
   * Helper method to format a selection row, make all values string or string array type based on data schema passed in
   * for selection queries with <code>ORDER BY</code>. (Broker side)
   * <p>Formatted row is used to build the {@link SelectionResults}.
   *
   * @param row selection row to be formatted.
   * @param columnIndices column indices of original rows.
   * @return formatted selection row.
   */
  @Nonnull
  private Serializable[] getFormattedRowWithOrdering(@Nonnull Serializable[] row, @Nonnull int[] columnIndices) {
    int numColumns = columnIndices.length;
    Serializable[] formattedRow = new Serializable[numColumns];
    for (int i = 0; i < numColumns; i++) {
      int columnIndex = columnIndices[i];
      formattedRow[i] =
          SelectionOperatorUtils.getFormattedValue(row[columnIndex], _dataSchema.getColumnType(columnIndex));
    }
    return formattedRow;
  }
}