/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.index.loader.defaultcolumn; import com.google.common.base.Preconditions; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo; import com.linkedin.pinot.core.segment.creator.ForwardIndexType; import com.linkedin.pinot.core.segment.creator.InvertedIndexType; import com.linkedin.pinot.core.segment.creator.impl.SegmentColumnarIndexCreator; import com.linkedin.pinot.core.segment.creator.impl.SegmentDictionaryCreator; import com.linkedin.pinot.core.segment.creator.impl.V1Constants; import com.linkedin.pinot.core.segment.creator.impl.fwd.MultiValueUnsortedForwardIndexCreator; import com.linkedin.pinot.core.segment.creator.impl.fwd.SingleValueSortedForwardIndexCreator; import com.linkedin.pinot.core.segment.index.ColumnMetadata; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.segment.index.loader.LoaderUtils; import java.io.File; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class BaseDefaultColumnHandler implements DefaultColumnHandler { private static final Logger LOGGER = LoggerFactory.getLogger(BaseDefaultColumnHandler.class); protected enum DefaultColumnAction { // Present in schema but not in segment. ADD_DIMENSION, ADD_METRIC, // Present in schema & segment but default value doesn't match. UPDATE_DIMENSION, UPDATE_METRIC, // Present in segment but not in schema, auto-generated. REMOVE_DIMENSION, REMOVE_METRIC; boolean isRemoveAction() { return this == REMOVE_DIMENSION || this == REMOVE_METRIC; } } protected final File _indexDir; protected final Schema _schema; protected final SegmentMetadataImpl _segmentMetadata; private final PropertiesConfiguration _segmentProperties; protected BaseDefaultColumnHandler(File indexDir, Schema schema, SegmentMetadataImpl segmentMetadata) { _indexDir = indexDir; _schema = schema; _segmentMetadata = segmentMetadata; _segmentProperties = segmentMetadata.getSegmentMetadataPropertiesConfiguration(); } /** * {@inheritDoc} */ @Override public void updateDefaultColumns() throws Exception { // Compute the action needed for each column. Map<String, DefaultColumnAction> defaultColumnActionMap = computeDefaultColumnActionMap(); if (defaultColumnActionMap.isEmpty()) { return; } // Update each default column based on the default column action. for (Map.Entry<String, DefaultColumnAction> entry : defaultColumnActionMap.entrySet()) { // This method updates the metadata properties, need to save it later. updateDefaultColumn(entry.getKey(), entry.getValue()); } // Update the segment metadata. List<String> dimensionColumns = LoaderUtils.getStringListFromSegmentProperties(V1Constants.MetadataKeys.Segment.DIMENSIONS, _segmentProperties); List<String> metricColumns = LoaderUtils.getStringListFromSegmentProperties(V1Constants.MetadataKeys.Segment.METRICS, _segmentProperties); for (Map.Entry<String, DefaultColumnAction> entry : defaultColumnActionMap.entrySet()) { String column = entry.getKey(); DefaultColumnAction action = entry.getValue(); switch (action) { case ADD_DIMENSION: dimensionColumns.add(column); break; case ADD_METRIC: metricColumns.add(column); break; case REMOVE_DIMENSION: dimensionColumns.remove(column); break; case REMOVE_METRIC: metricColumns.remove(column); break; default: break; } } _segmentProperties.setProperty(V1Constants.MetadataKeys.Segment.DIMENSIONS, dimensionColumns); _segmentProperties.setProperty(V1Constants.MetadataKeys.Segment.METRICS, metricColumns); // Create a back up for origin metadata. File metadataFile = new File(_indexDir, V1Constants.MetadataKeys.METADATA_FILE_NAME); File metadataBackUpFile = new File(metadataFile + ".bak"); if (!metadataBackUpFile.exists()) { FileUtils.copyFile(metadataFile, metadataBackUpFile); } // Save the new metadata. _segmentProperties.save(metadataFile); } /** * Compute the action needed for each column. * This method compares the column metadata across schema and segment. * * @return Action Map for each column. */ private Map<String, DefaultColumnAction> computeDefaultColumnActionMap() { Map<String, DefaultColumnAction> defaultColumnActionMap = new HashMap<>(); // Compute ADD and UPDATE actions. Collection<String> columnsInSchema = _schema.getColumnNames(); for (String column : columnsInSchema) { FieldSpec fieldSpecInSchema = _schema.getFieldSpecFor(column); Preconditions.checkNotNull(fieldSpecInSchema); FieldSpec.FieldType fieldTypeInSchema = fieldSpecInSchema.getFieldType(); ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); if (columnMetadata != null) { // Column exists in the segment, check if we need to update the value. // Only check for auto-generated column. if (!columnMetadata.isAutoGenerated()) { continue; } // Check the field type matches. FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType(); if (fieldTypeInMetadata != fieldTypeInSchema) { String failureMessage = "Field type: " + fieldTypeInMetadata + " for auto-generated column: " + column + " does not match field type: " + fieldTypeInSchema + " in schema, throw exception to drop and re-download the segment."; throw new RuntimeException(failureMessage); } // Check the data type and default value matches. FieldSpec.DataType dataTypeInMetadata = columnMetadata.getDataType(); FieldSpec.DataType dataTypeInSchema = fieldSpecInSchema.getDataType(); boolean isSingleValueInMetadata = columnMetadata.isSingleValue(); boolean isSingleValueInSchema = fieldSpecInSchema.isSingleValueField(); String defaultValueInMetadata = columnMetadata.getDefaultNullValueString(); String defaultValueInSchema = fieldSpecInSchema.getDefaultNullValue().toString(); if (dataTypeInMetadata != dataTypeInSchema || isSingleValueInMetadata != isSingleValueInSchema || !defaultValueInSchema.equals(defaultValueInMetadata)) { if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) { defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_DIMENSION); } else { Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC); defaultColumnActionMap.put(column, DefaultColumnAction.UPDATE_METRIC); } } } else { // Column does not exist in the segment, add default value for it. switch (fieldTypeInSchema) { case DIMENSION: defaultColumnActionMap.put(column, DefaultColumnAction.ADD_DIMENSION); break; case METRIC: defaultColumnActionMap.put(column, DefaultColumnAction.ADD_METRIC); break; default: LOGGER.warn("Skip adding default column for column: {} with field type: {}", column, fieldTypeInSchema); break; } } } // Compute REMOVE actions. Set<String> columnsInMetadata = _segmentMetadata.getAllColumns(); for (String column : columnsInMetadata) { if (!columnsInSchema.contains(column)) { ColumnMetadata columnMetadata = _segmentMetadata.getColumnMetadataFor(column); // Only remove auto-generated columns. if (columnMetadata.isAutoGenerated()) { FieldSpec.FieldType fieldTypeInMetadata = columnMetadata.getFieldType(); if (fieldTypeInMetadata == FieldSpec.FieldType.DIMENSION) { defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_DIMENSION); } else { Preconditions.checkState(fieldTypeInMetadata == FieldSpec.FieldType.METRIC); defaultColumnActionMap.put(column, DefaultColumnAction.REMOVE_METRIC); } } } } return defaultColumnActionMap; } /** * Helper method to update default column indices. * TODO: ADD SUPPORT TO STAR TREE INDEX. * * @param column column name. * @param action default column action. * @throws Exception */ protected abstract void updateDefaultColumn(String column, DefaultColumnAction action) throws Exception; /** * Helper method to remove the V1 indices (dictionary and forward index) for a column. * * @param column column name. */ protected void removeColumnV1Indices(String column) { // Delete existing dictionary and forward index for the column. FileUtils.deleteQuietly(new File(_indexDir, column + V1Constants.Dict.FILE_EXTENTION)); FileUtils.deleteQuietly(new File(_indexDir, column + V1Constants.Indexes.SORTED_FWD_IDX_FILE_EXTENTION)); FileUtils.deleteQuietly(new File(_indexDir, column + V1Constants.Indexes.UN_SORTED_MV_FWD_IDX_FILE_EXTENTION)); // Remove the column metadata information if exists. SegmentColumnarIndexCreator.removeColumnMetadataInfo(_segmentProperties, column); } /** * Helper method to create the V1 indices (dictionary and forward index) for a column. * * @param column column name. */ protected void createColumnV1Indices(String column) throws Exception { FieldSpec fieldSpec = _schema.getFieldSpecFor(column); Preconditions.checkNotNull(fieldSpec); // Generate column index creation information. int totalDocs = _segmentMetadata.getTotalDocs(); int totalRawDocs = _segmentMetadata.getTotalRawDocs(); int totalAggDocs = totalDocs - totalRawDocs; FieldSpec.DataType dataType = fieldSpec.getDataType(); Object defaultValue = fieldSpec.getDefaultNullValue(); boolean isSingleValue = fieldSpec.isSingleValueField(); int maxNumberOfMultiValueElements = isSingleValue ? 0 : 1; int dictionaryElementSize = 0; Object sortedArray; switch (dataType) { case STRING: Preconditions.checkState(defaultValue instanceof String); String stringDefaultValue = (String) defaultValue; // Length of the UTF-8 encoded byte array. dictionaryElementSize = stringDefaultValue.getBytes("UTF8").length; sortedArray = new String[]{stringDefaultValue}; break; case INT: Preconditions.checkState(defaultValue instanceof Integer); sortedArray = new int[]{(Integer) defaultValue}; break; case LONG: Preconditions.checkState(defaultValue instanceof Long); sortedArray = new long[]{(Long) defaultValue}; break; case FLOAT: Preconditions.checkState(defaultValue instanceof Float); sortedArray = new float[]{(Float) defaultValue}; break; case DOUBLE: Preconditions.checkState(defaultValue instanceof Double); sortedArray = new double[]{(Double) defaultValue}; break; default: throw new UnsupportedOperationException("Unsupported data type: " + dataType + " for column: " + column); } ColumnIndexCreationInfo columnIndexCreationInfo = new ColumnIndexCreationInfo(true/*createDictionary*/, defaultValue/*min*/, defaultValue/*max*/, sortedArray, ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.SORTED_INDEX, isSingleValue/*isSortedColumn*/, false/*hasNulls*/, totalDocs/*totalNumberOfEntries*/, maxNumberOfMultiValueElements, -1 /* Unused max length*/, true/*isAutoGenerated*/, null/*partitionFunction*/, -1/*numPartitions*/, null/*partitionValue*/, defaultValue/*defaultNullValue*/); // Create dictionary. // We will have only one value in the dictionary. SegmentDictionaryCreator segmentDictionaryCreator = new SegmentDictionaryCreator(false/*hasNulls*/, sortedArray, fieldSpec, _indexDir, V1Constants.Str.DEFAULT_STRING_PAD_CHAR); segmentDictionaryCreator.build(new boolean[]{true}/*isSorted*/); segmentDictionaryCreator.close(); // Create forward index. if (isSingleValue) { // Single-value column. SingleValueSortedForwardIndexCreator svFwdIndexCreator = new SingleValueSortedForwardIndexCreator(_indexDir, 1/*cardinality*/, fieldSpec); for (int docId = 0; docId < totalDocs; docId++) { svFwdIndexCreator.add(0/*dictionaryId*/, docId); } svFwdIndexCreator.close(); } else { // Multi-value column. MultiValueUnsortedForwardIndexCreator mvFwdIndexCreator = new MultiValueUnsortedForwardIndexCreator(fieldSpec, _indexDir, 1/*cardinality*/, totalDocs/*numDocs*/, totalDocs/*totalNumberOfValues*/, false/*hasNulls*/); int[] dictionaryIds = {0}; for (int docId = 0; docId < totalDocs; docId++) { mvFwdIndexCreator.index(docId, dictionaryIds); } mvFwdIndexCreator.close(); } // Add the column metadata information to the metadata properties. SegmentColumnarIndexCreator.addColumnMetadataInfo(_segmentProperties, column, columnIndexCreationInfo, totalDocs, totalRawDocs, totalAggDocs, fieldSpec, true/*hasDictionary*/, dictionaryElementSize, true/*hasInvertedIndex*/, null/*hllOriginColumn*/); } }