/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.query.selection;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.request.Selection;
import com.linkedin.pinot.common.request.SelectionSort;
import com.linkedin.pinot.common.response.ServerInstance;
import com.linkedin.pinot.common.response.broker.SelectionResults;
import com.linkedin.pinot.common.utils.DataSchema;
import com.linkedin.pinot.common.utils.DataTable;
import com.linkedin.pinot.core.common.DataSourceMetadata;
import com.linkedin.pinot.core.common.datatable.DataTableBuilder;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import java.io.Serializable;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* The <code>SelectionOperatorUtils</code> class provides the utility methods for selection queries without
* <code>ORDER BY</code> and {@link SelectionOperatorService}.
* <p>Expected behavior:
* <ul>
* <li>
* Return selection results with the same order of columns as user passed in.
* <ul>
* <li>Eg. SELECT colB, colA, colC FROM table -> [valB, valA, valC]</li>
* </ul>
* </li>
* <li>
* For 'SELECT *', return columns with alphabetically order.
* <ul>
* <li>Eg. SELECT * FROM table -> [valA, valB, valC]</li>
* </ul>
* </li>
* </ul>
*/
public class SelectionOperatorUtils {
private SelectionOperatorUtils() {
}
private static final DecimalFormat INT_FORMAT =
new DecimalFormat("##########", DecimalFormatSymbols.getInstance(Locale.US));
private static final DecimalFormat LONG_FORMAT =
new DecimalFormat("####################", DecimalFormatSymbols.getInstance(Locale.US));
private static final DecimalFormat FLOAT_FORMAT =
new DecimalFormat("#########0.0####", DecimalFormatSymbols.getInstance(Locale.US));
private static final DecimalFormat DOUBLE_FORMAT =
new DecimalFormat("###################0.0#########", DecimalFormatSymbols.getInstance(Locale.US));
/**
* Expand <code>'SELECT *'</code> to select all columns with {@link IndexSegment}, order all columns alphabatically.
* (Inner segment)
*
* @param selectionColumns unexpanded selection columns (may contain '*').
* @param indexSegment index segment.
* @return expanded selection columns.
*/
@Nonnull
public static List<String> getSelectionColumns(@Nonnull List<String> selectionColumns,
@Nonnull IndexSegment indexSegment) {
if (selectionColumns.size() == 1 && selectionColumns.get(0).equals("*")) {
List<String> allColumns = Arrays.asList(indexSegment.getColumnNames());
Collections.sort(allColumns);
return allColumns;
} else {
return selectionColumns;
}
}
/**
* Extract all related columns for a selection query with {@link IndexSegment}. (Inner segment)
*
* @param selection selection query.
* @param indexSegment index segment.
* @return all related columns.
*/
@Nonnull
public static String[] extractSelectionRelatedColumns(@Nonnull Selection selection,
@Nonnull IndexSegment indexSegment) {
Set<String> selectionColumns = new HashSet<>(getSelectionColumns(selection.getSelectionColumns(), indexSegment));
if (selection.getSelectionSortSequence() != null) {
for (SelectionSort selectionSort : selection.getSelectionSortSequence()) {
selectionColumns.add(selectionSort.getColumn());
}
}
return selectionColumns.toArray(new String[selectionColumns.size()]);
}
/**
* Extract the {@link DataSchema} from sort sequence, selection columns and {@link IndexSegment}. (Inner segment)
* <p>Inside data schema, we just store each column once (de-duplicated).
*
* @param sortSequence sort sequence.
* @param selectionColumns selection columns.
* @param indexSegment index segment.
* @return data schema.
*/
@Nonnull
public static DataSchema extractDataSchema(@Nullable List<SelectionSort> sortSequence,
@Nonnull List<String> selectionColumns, @Nonnull IndexSegment indexSegment) {
List<String> columnList = new ArrayList<>();
Set<String> columnSet = new HashSet<>();
if (sortSequence != null) {
for (SelectionSort selectionSort : sortSequence) {
String column = selectionSort.getColumn();
columnList.add(column);
columnSet.add(column);
}
}
for (String column : selectionColumns) {
if (!columnSet.contains(column)) {
columnList.add(column);
columnSet.add(column);
}
}
int numColumns = columnList.size();
String[] columns = new String[numColumns];
DataType[] dataTypes = new DataType[numColumns];
for (int i = 0; i < numColumns; i++) {
String column = columnList.get(i);
columns[i] = column;
DataSourceMetadata columnMetadata = indexSegment.getDataSource(column).getDataSourceMetadata();
if (columnMetadata.isSingleValue()) {
dataTypes[i] = columnMetadata.getDataType();
} else {
dataTypes[i] = columnMetadata.getDataType().toMultiValue();
}
}
return new DataSchema(columns, dataTypes);
}
/**
* Expand <code>'SELECT *'</code> to select all columns with {@link DataSchema}, order all columns alphabatically.
* (Inter segment)
*
* @param selectionColumns unexpanded selection columns (may contain '*').
* @param dataSchema data schema.
* @return expanded selection columns.
*/
@Nonnull
public static List<String> getSelectionColumns(@Nonnull List<String> selectionColumns,
@Nonnull DataSchema dataSchema) {
if ((selectionColumns.size() == 1) && selectionColumns.get(0).equals("*")) {
int numColumns = dataSchema.size();
List<String> allColumns = new ArrayList<>(numColumns);
for (int i = 0; i < numColumns; i++) {
allColumns.add(dataSchema.getColumnName(i));
}
Collections.sort(allColumns);
return allColumns;
} else {
return selectionColumns;
}
}
/**
* Merge two partial results for selection queries without <code>ORDER BY</code>. (Server side)
*
* @param mergedRows partial results 1.
* @param rowsToMerge partial results 2.
* @param selectionSize size of the selection.
*/
public static void mergeWithoutOrdering(@Nonnull Collection<Serializable[]> mergedRows,
@Nonnull Collection<Serializable[]> rowsToMerge, int selectionSize) {
Iterator<Serializable[]> iterator = rowsToMerge.iterator();
while (mergedRows.size() < selectionSize && iterator.hasNext()) {
mergedRows.add(iterator.next());
}
}
/**
* Merge two partial results for selection queries with <code>ORDER BY</code>. (Server side)
*
* @param mergedRows partial results 1.
* @param rowsToMerge partial results 2.
* @param maxNumRows maximum number of rows need to be stored.
*/
public static void mergeWithOrdering(@Nonnull PriorityQueue<Serializable[]> mergedRows,
@Nonnull Collection<Serializable[]> rowsToMerge, int maxNumRows) {
for (Serializable[] row : rowsToMerge) {
addToPriorityQueue(row, mergedRows, maxNumRows);
}
}
/**
* Build a {@link DataTable} from a {@link Collection} of selection rows with {@link DataSchema}. (Server side)
* <p>The passed in data schema stored the column data type that can cover all actual data types for that column.
* <p>The actual data types for each column in rows can be different but must be compatible with each other.
* <p>Before write each row into the data table, first convert it to match the data types in data schema.
*
* @param rows {@link Collection} of selection rows.
* @param dataSchema data schema.
* @return data table.
* @throws Exception
*/
@Nonnull
public static DataTable getDataTableFromRows(@Nonnull Collection<Serializable[]> rows, @Nonnull DataSchema dataSchema)
throws Exception {
int numColumns = dataSchema.size();
DataTableBuilder dataTableBuilder = new DataTableBuilder(dataSchema);
for (Serializable[] row : rows) {
dataTableBuilder.startRow();
for (int i = 0; i < numColumns; i++) {
Serializable columnValue = row[i];
DataType columnType = dataSchema.getColumnType(i);
switch (columnType) {
// Single-value column.
case INT:
dataTableBuilder.setColumn(i, ((Number) columnValue).intValue());
break;
case LONG:
dataTableBuilder.setColumn(i, ((Number) columnValue).longValue());
break;
case FLOAT:
dataTableBuilder.setColumn(i, ((Number) columnValue).floatValue());
break;
case DOUBLE:
dataTableBuilder.setColumn(i, ((Number) columnValue).doubleValue());
break;
case STRING:
dataTableBuilder.setColumn(i, ((String) columnValue));
break;
// Multi-value column.
case INT_ARRAY:
dataTableBuilder.setColumn(i, (int[]) columnValue);
break;
case LONG_ARRAY:
// LONG_ARRAY type covers INT_ARRAY and LONG_ARRAY.
if (columnValue instanceof int[]) {
int[] ints = (int[]) columnValue;
int length = ints.length;
long[] longs = new long[length];
for (int j = 0; j < length; j++) {
longs[j] = ints[j];
}
dataTableBuilder.setColumn(i, longs);
} else {
dataTableBuilder.setColumn(i, (long[]) columnValue);
}
break;
case FLOAT_ARRAY:
dataTableBuilder.setColumn(i, (float[]) columnValue);
break;
case DOUBLE_ARRAY:
// DOUBLE_ARRAY type covers INT_ARRAY, LONG_ARRAY, FLOAT_ARRAY and DOUBLE_ARRAY.
if (columnValue instanceof int[]) {
int[] ints = (int[]) columnValue;
int length = ints.length;
double[] doubles = new double[length];
for (int j = 0; j < length; j++) {
doubles[j] = ints[j];
}
dataTableBuilder.setColumn(i, doubles);
} else if (columnValue instanceof long[]) {
long[] longs = (long[]) columnValue;
int length = longs.length;
double[] doubles = new double[length];
for (int j = 0; j < length; j++) {
doubles[j] = longs[j];
}
dataTableBuilder.setColumn(i, doubles);
} else if (columnValue instanceof float[]) {
float[] floats = (float[]) columnValue;
int length = floats.length;
double[] doubles = new double[length];
for (int j = 0; j < length; j++) {
doubles[j] = floats[j];
}
dataTableBuilder.setColumn(i, doubles);
} else {
dataTableBuilder.setColumn(i, (double[]) columnValue);
}
break;
case STRING_ARRAY:
dataTableBuilder.setColumn(i, (String[]) columnValue);
break;
default:
throw new UnsupportedOperationException(
"Unsupported data type: " + columnType + " for column: " + dataSchema.getColumnName(i));
}
}
dataTableBuilder.finishRow();
}
return dataTableBuilder.build();
}
/**
* Extract a selection row from {@link DataTable}. (Broker side)
*
* @param dataTable data table.
* @param rowId row id.
* @return selection row.
*/
@Nonnull
public static Serializable[] extractRowFromDataTable(@Nonnull DataTable dataTable, int rowId) {
DataSchema dataSchema = dataTable.getDataSchema();
int numColumns = dataSchema.size();
Serializable[] row = new Serializable[numColumns];
for (int i = 0; i < numColumns; i++) {
DataType columnType = dataSchema.getColumnType(i);
switch (columnType) {
// Single-value column.
case INT:
row[i] = dataTable.getInt(rowId, i);
break;
case LONG:
row[i] = dataTable.getLong(rowId, i);
break;
case FLOAT:
row[i] = dataTable.getFloat(rowId, i);
break;
case DOUBLE:
row[i] = dataTable.getDouble(rowId, i);
break;
case STRING:
row[i] = dataTable.getString(rowId, i);
break;
// Multi-value column.
case INT_ARRAY:
row[i] = dataTable.getIntArray(rowId, i);
break;
case LONG_ARRAY:
row[i] = dataTable.getLongArray(rowId, i);
break;
case FLOAT_ARRAY:
row[i] = dataTable.getFloatArray(rowId, i);
break;
case DOUBLE_ARRAY:
row[i] = dataTable.getDoubleArray(rowId, i);
break;
case STRING_ARRAY:
row[i] = dataTable.getStringArray(rowId, i);
break;
default:
throw new UnsupportedOperationException(
"Unsupported data type: " + columnType + " for column: " + dataSchema.getColumnName(i));
}
}
return row;
}
/**
* Reduce a collection of {@link DataTable}s to selection rows for selection queries without <code>ORDER BY</code>.
* (Broker side)
*
* @param selectionResults {@link Map} from {@link ServerInstance} to {@link DataTable}.
* @param selectionSize size of the selection.
* @return reduced results.
*/
@Nonnull
public static List<Serializable[]> reduceWithoutOrdering(@Nonnull Map<ServerInstance, DataTable> selectionResults,
int selectionSize) {
List<Serializable[]> rows = new ArrayList<>(selectionSize);
for (DataTable dataTable : selectionResults.values()) {
int numRows = dataTable.getNumberOfRows();
for (int rowId = 0; rowId < numRows; rowId++) {
if (rows.size() < selectionSize) {
rows.add(extractRowFromDataTable(dataTable, rowId));
} else {
return rows;
}
}
}
return rows;
}
/**
* Render the unformatted selection rows to a formatted {@link SelectionResults} object for selection queries without
* <code>ORDER BY</code>. (Broker side)
* <p>{@link SelectionResults} object will be used to build the broker response.
* <p>Should be called after method "reduceWithoutOrdering()".
*
* @param rows unformatted selection rows.
* @param dataSchema data schema.
* @param selectionColumns selection columns.
* @return {@link SelectionResults} object results.
*/
@Nonnull
public static SelectionResults renderSelectionResultsWithoutOrdering(@Nonnull List<Serializable[]> rows,
@Nonnull DataSchema dataSchema, @Nonnull List<String> selectionColumns) {
// TODO: remove the code for backward compatible after server updated to the latest code.
int numSelectionColumns = selectionColumns.size();
int[] columnIndices = new int[numSelectionColumns];
Map<String, Integer> dataSchemaIndices = new HashMap<>(numSelectionColumns);
for (int i = 0; i < numSelectionColumns; i++) {
dataSchemaIndices.put(dataSchema.getColumnName(i), i);
}
for (int i = 0; i < numSelectionColumns; i++) {
columnIndices[i] = dataSchemaIndices.get(selectionColumns.get(i));
}
int numRows = rows.size();
for (int i = 0; i < numRows; i++) {
rows.set(i, getFormattedRowWithoutOrdering(rows.get(i), dataSchema, columnIndices));
}
/* TODO: uncomment after server updated to the latest code.
for (Serializable[] row : rows) {
formatRowWithoutOrdering(row, dataSchema);
}*/
return new SelectionResults(selectionColumns, rows);
}
/**
* Helper method to format a selection row, make all values string or string array type based on data schema passed in
* for selection queries without <code>ORDER BY</code>. (Broker side)
* <p>Formatted row is used to build the {@link SelectionResults}.
*
* @param row selection row to be formatted.
* @param dataSchema data schema.
*/
private static void formatRowWithoutOrdering(@Nonnull Serializable[] row, @Nonnull DataSchema dataSchema) {
int numColumns = row.length;
for (int i = 0; i < numColumns; i++) {
row[i] = getFormattedValue(row[i], dataSchema.getColumnType(i));
}
}
// TODO: remove this method after server updated to the latest code.
private static Serializable[] getFormattedRowWithoutOrdering(@Nonnull Serializable[] row,
@Nonnull DataSchema dataSchema, @Nonnull int[] columnIndices) {
int numColumns = columnIndices.length;
Serializable[] formattedRow = new Serializable[numColumns];
for (int i = 0; i < numColumns; i++) {
int columnIndex = columnIndices[i];
formattedRow[i] =
SelectionOperatorUtils.getFormattedValue(row[columnIndex], dataSchema.getColumnType(columnIndex));
}
return formattedRow;
}
/**
* Format a {@link Serializable} value into a {@link String} or {@link String} array based on the data type.
* (Broker side)
* <p>Actual value type can be different with data type passed in, but they must be type compatible.
*
* @param value value to be formatted.
* @param dataType data type.
* @return formatted value.
*/
@Nonnull
public static Serializable getFormattedValue(@Nonnull Serializable value, @Nonnull DataType dataType) {
switch (dataType) {
// Single-value column.
case INT:
return INT_FORMAT.format(((Number) value).intValue());
case LONG:
return LONG_FORMAT.format(((Number) value).longValue());
case FLOAT:
return FLOAT_FORMAT.format(((Number) value).floatValue());
case DOUBLE:
return DOUBLE_FORMAT.format(((Number) value).doubleValue());
// Multi-value column.
case INT_ARRAY:
int[] ints = (int[]) value;
int length = ints.length;
String[] formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = INT_FORMAT.format(ints[i]);
}
return formattedValue;
case LONG_ARRAY:
// LONG_ARRAY type covers INT_ARRAY and LONG_ARRAY.
if (value instanceof int[]) {
ints = (int[]) value;
length = ints.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = LONG_FORMAT.format(ints[i]);
}
} else {
long[] longs = (long[]) value;
length = longs.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = LONG_FORMAT.format(longs[i]);
}
}
return formattedValue;
case FLOAT_ARRAY:
float[] floats = (float[]) value;
length = floats.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = FLOAT_FORMAT.format(floats[i]);
}
return formattedValue;
case DOUBLE_ARRAY:
// DOUBLE_ARRAY type covers INT_ARRAY, LONG_ARRAY, FLOAT_ARRAY and DOUBLE_ARRAY.
if (value instanceof int[]) {
ints = (int[]) value;
length = ints.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = DOUBLE_FORMAT.format((double) ints[i]);
}
return formattedValue;
} else if (value instanceof long[]) {
long[] longs = (long[]) value;
length = longs.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = DOUBLE_FORMAT.format((double) longs[i]);
}
return formattedValue;
} else if (value instanceof float[]) {
floats = (float[]) value;
length = floats.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = DOUBLE_FORMAT.format(floats[i]);
}
return formattedValue;
} else {
double[] doubles = (double[]) value;
length = doubles.length;
formattedValue = new String[length];
for (int i = 0; i < length; i++) {
formattedValue[i] = DOUBLE_FORMAT.format(doubles[i]);
}
return formattedValue;
}
default:
// For STRING and STRING_ARRAY, no need to format.
return value;
}
}
/**
* Helper method to add a value to a {@link PriorityQueue}.
*
* @param value value to be added.
* @param queue priority queue.
* @param maxNumValues maximum number of values in the priority queue.
* @param <T> type for the value.
*/
public static <T> void addToPriorityQueue(@Nonnull T value, @Nonnull PriorityQueue<T> queue, int maxNumValues) {
if (queue.size() < maxNumValues) {
queue.add(value);
} else if (queue.comparator().compare(queue.peek(), value) < 0) {
queue.poll();
queue.offer(value);
}
}
}