/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.query.selection; import com.linkedin.pinot.common.request.Selection; import com.linkedin.pinot.common.request.SelectionSort; import com.linkedin.pinot.common.response.ServerInstance; import com.linkedin.pinot.common.response.broker.SelectionResults; import com.linkedin.pinot.common.utils.DataSchema; import com.linkedin.pinot.common.utils.DataTable; import com.linkedin.pinot.core.common.Block; import com.linkedin.pinot.core.common.BlockDocIdIterator; import com.linkedin.pinot.core.common.Constants; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.query.selection.comparator.CompositeDocIdValComparator; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.Set; import javax.annotation.Nonnull; /** * The <code>SelectionOperatorService</code> class provides the services for selection queries with * <code>ORDER BY</code>. * <p>Expected behavior: * <ul> * <li> * Return selection results with the same order of columns as user passed in. * <ul> * <li>Eg. SELECT colB, colA, colC FROM table -> [valB, valA, valC]</li> * </ul> * </li> * <li> * For 'SELECT *', return columns with alphabetically order. * <ul> * <li>Eg. SELECT * FROM table -> [valA, valB, valC]</li> * </ul> * </li> * <li> * Order by does not change the order of columns in selection results. * <ul> * <li>Eg. SELECT colB, colA, colC FROM table ORDER BY calC -> [valB, valA, valC]</li> * </ul> * </li> * </ul> */ public class SelectionOperatorService { private final List<String> _selectionColumns; private final List<SelectionSort> _sortSequence; private final DataSchema _dataSchema; private final int _selectionOffset; private final int _maxNumRows; private final PriorityQueue<Serializable[]> _rows; private long _numDocsScanned = 0; /** * Constructor for <code>SelectionOperatorService</code> with {@link IndexSegment}. (Inner segment) * * @param selection selection query. * @param indexSegment index segment. */ public SelectionOperatorService(@Nonnull Selection selection, @Nonnull IndexSegment indexSegment) { _selectionColumns = SelectionOperatorUtils.getSelectionColumns(selection.getSelectionColumns(), indexSegment); _sortSequence = getSortSequence(selection.getSelectionSortSequence()); _dataSchema = SelectionOperatorUtils.extractDataSchema(_sortSequence, _selectionColumns, indexSegment); // Select rows from offset to offset + size. _selectionOffset = selection.getOffset(); _maxNumRows = _selectionOffset + selection.getSize(); _rows = new PriorityQueue<>(_maxNumRows, getStrictComparator()); } /** * Constructor for <code>SelectionOperatorService</code> with {@link DataSchema}. (Inter segment) * * @param selection selection query. * @param dataSchema data schema. */ public SelectionOperatorService(@Nonnull Selection selection, @Nonnull DataSchema dataSchema) { _selectionColumns = SelectionOperatorUtils.getSelectionColumns(selection.getSelectionColumns(), dataSchema); _sortSequence = getSortSequence(selection.getSelectionSortSequence()); _dataSchema = dataSchema; // Select rows from offset to offset + size. _selectionOffset = selection.getOffset(); _maxNumRows = _selectionOffset + selection.getSize(); _rows = new PriorityQueue<>(_maxNumRows, getTypeCompatibleComparator()); } /** * Helper method to handle duplicate sort columns. * * @return de-duplicated list of sort sequences. */ @Nonnull private List<SelectionSort> getSortSequence(List<SelectionSort> selectionSorts) { List<SelectionSort> deDupedSelectionSorts = new ArrayList<>(); Set<String> sortColumns = new HashSet<>(); for (SelectionSort selectionSort : selectionSorts) { String sortColumn = selectionSort.getColumn(); if (!sortColumns.contains(sortColumn)) { deDupedSelectionSorts.add(selectionSort); sortColumns.add(sortColumn); } } return deDupedSelectionSorts; } /** * Helper method to get the strict {@link Comparator} for selection rows. (Inner segment) * <p>Strict comparator does not allow any schema mismatch (more performance driven). * * @return strict {@link Comparator} for selection rows. */ @Nonnull private Comparator<Serializable[]> getStrictComparator() { return new Comparator<Serializable[]>() { @Override public int compare(Serializable[] o1, Serializable[] o2) { int numSortColumns = _sortSequence.size(); for (int i = 0; i < numSortColumns; i++) { int ret = 0; SelectionSort selectionSort = _sortSequence.get(i); Serializable v1 = o1[i]; Serializable v2 = o2[i]; // Only compare single-value columns. switch (_dataSchema.getColumnType(i)) { case INT: if (!selectionSort.isIsAsc()) { ret = ((Integer) v1).compareTo((Integer) v2); } else { ret = ((Integer) v2).compareTo((Integer) v1); } break; case LONG: if (!selectionSort.isIsAsc()) { ret = ((Long) v1).compareTo((Long) v2); } else { ret = ((Long) v2).compareTo((Long) v1); } break; case FLOAT: if (!selectionSort.isIsAsc()) { ret = ((Float) v1).compareTo((Float) v2); } else { ret = ((Float) v2).compareTo((Float) v1); } break; case DOUBLE: if (!selectionSort.isIsAsc()) { ret = ((Double) v1).compareTo((Double) v2); } else { ret = ((Double) v2).compareTo((Double) v1); } break; case STRING: if (!selectionSort.isIsAsc()) { ret = ((String) v1).compareTo((String) v2); } else { ret = ((String) v2).compareTo((String) v1); } break; default: break; } if (ret != 0) { return ret; } } return 0; } }; } /** * Helper method to get the type-compatible {@link Comparator} for selection rows. (Inter segment) * <p>Type-compatible comparator allows compatible types to compare with each other. * * @return flexible {@link Comparator} for selection rows. */ @Nonnull private Comparator<Serializable[]> getTypeCompatibleComparator() { return new Comparator<Serializable[]>() { @Override public int compare(Serializable[] o1, Serializable[] o2) { int numSortColumns = _sortSequence.size(); for (int i = 0; i < numSortColumns; i++) { int ret = 0; SelectionSort selectionSort = _sortSequence.get(i); Serializable v1 = o1[i]; Serializable v2 = o2[i]; // Only compare single-value columns. if (v1 instanceof Number) { if (!selectionSort.isIsAsc()) { ret = Double.compare(((Number) v1).doubleValue(), ((Number) v2).doubleValue()); } else { ret = Double.compare(((Number) v2).doubleValue(), ((Number) v1).doubleValue()); } } else if (v1 instanceof String) { if (!selectionSort.isIsAsc()) { ret = ((String) v1).compareTo((String) v2); } else { ret = ((String) v2).compareTo((String) v1); } } if (ret != 0) { return ret; } } return 0; } }; } /** * Get the {@link DataSchema}. * * @return data schema. */ @Nonnull public DataSchema getDataSchema() { return _dataSchema; } /** * Get the selection results. * * @return selection results. */ @Nonnull public PriorityQueue<Serializable[]> getRows() { return _rows; } /** * Get number of documents scanned. (Inner segment) * * @return number of documents scanned. */ public long getNumDocsScanned() { return _numDocsScanned; } /** * Iterate over {@link Block}s, extract values from them and merge the values to the selection results for selection * queries with <code>ORDER BY</code>. (Inner segment) * * @param blockDocIdIterator block document id iterator. * @param blocks {@link Block} array. */ public void iterateOnBlocksWithOrdering(@Nonnull BlockDocIdIterator blockDocIdIterator, @Nonnull Block[] blocks) { Comparator<Integer> rowDocIdComparator = new CompositeDocIdValComparator(_sortSequence, blocks); PriorityQueue<Integer> rowDocIdPriorityQueue = new PriorityQueue<>(_maxNumRows, rowDocIdComparator); int docId; while ((docId = blockDocIdIterator.next()) != Constants.EOF) { _numDocsScanned++; SelectionOperatorUtils.addToPriorityQueue(docId, rowDocIdPriorityQueue, _maxNumRows); } SelectionFetcher selectionFetcher = new SelectionFetcher(blocks, _dataSchema); Collection<Serializable[]> rows = new ArrayList<>(rowDocIdPriorityQueue.size()); for (int rowDocId : rowDocIdPriorityQueue) { rows.add(selectionFetcher.getRow(rowDocId)); } SelectionOperatorUtils.mergeWithOrdering(_rows, rows, _maxNumRows); } /** * Reduce a collection of {@link DataTable}s to selection rows for selection queries with <code>ORDER BY</code>. * (Broker side) * * @param selectionResults {@link Map} from {@link ServerInstance} to {@link DataTable}. */ public void reduceWithOrdering(@Nonnull Map<ServerInstance, DataTable> selectionResults) { for (DataTable dataTable : selectionResults.values()) { int numRows = dataTable.getNumberOfRows(); for (int rowId = 0; rowId < numRows; rowId++) { Serializable[] row = SelectionOperatorUtils.extractRowFromDataTable(dataTable, rowId); SelectionOperatorUtils.addToPriorityQueue(row, _rows, _maxNumRows); } } } /** * Render the unformatted selection rows to a formatted {@link SelectionResults} object for selection queries with * <code>ORDER BY</code>. (Broker side) * <p>{@link SelectionResults} object will be used to build the broker response. * <p>Should be called after method "reduceWithOrdering()". * * @return {@link SelectionResults} object results. */ @Nonnull public SelectionResults renderSelectionResultsWithOrdering() { LinkedList<Serializable[]> rowsInSelectionResults = new LinkedList<>(); int[] columnIndices = getColumnIndices(); while (_rows.size() > _selectionOffset) { rowsInSelectionResults.addFirst(getFormattedRowWithOrdering(_rows.poll(), columnIndices)); } return new SelectionResults(_selectionColumns, rowsInSelectionResults); } /** * Helper method to get each selection column index in data schema. * * @return column indices. */ private int[] getColumnIndices() { int numSelectionColumns = _selectionColumns.size(); int[] columnIndices = new int[numSelectionColumns]; int numColumnsInDataSchema = _dataSchema.size(); Map<String, Integer> dataSchemaIndices = new HashMap<>(numColumnsInDataSchema); for (int i = 0; i < numColumnsInDataSchema; i++) { dataSchemaIndices.put(_dataSchema.getColumnName(i), i); } for (int i = 0; i < numSelectionColumns; i++) { columnIndices[i] = dataSchemaIndices.get(_selectionColumns.get(i)); } return columnIndices; } /** * Helper method to format a selection row, make all values string or string array type based on data schema passed in * for selection queries with <code>ORDER BY</code>. (Broker side) * <p>Formatted row is used to build the {@link SelectionResults}. * * @param row selection row to be formatted. * @param columnIndices column indices of original rows. * @return formatted selection row. */ @Nonnull private Serializable[] getFormattedRowWithOrdering(@Nonnull Serializable[] row, @Nonnull int[] columnIndices) { int numColumns = columnIndices.length; Serializable[] formattedRow = new Serializable[numColumns]; for (int i = 0; i < numColumns; i++) { int columnIndex = columnIndices[i]; formattedRow[i] = SelectionOperatorUtils.getFormattedValue(row[columnIndex], _dataSchema.getColumnType(columnIndex)); } return formattedRow; } }