/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.index.loader.invertedindex; import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion; import com.linkedin.pinot.core.io.reader.DataFileReader; import com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader; import com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader; import com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader; import com.linkedin.pinot.core.segment.creator.impl.V1Constants; import com.linkedin.pinot.core.segment.creator.impl.inv.OffHeapBitmapInvertedIndexCreator; import com.linkedin.pinot.core.segment.index.ColumnMetadata; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.segment.index.loader.IndexLoadingConfig; import com.linkedin.pinot.core.segment.index.loader.LoaderUtils; import com.linkedin.pinot.core.segment.memory.PinotDataBuffer; import com.linkedin.pinot.core.segment.store.ColumnIndexType; import com.linkedin.pinot.core.segment.store.SegmentDirectory; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class InvertedIndexHandler { private static final Logger LOGGER = LoggerFactory.getLogger(InvertedIndexHandler.class); private final File indexDir; private final SegmentMetadataImpl segmentMetadata; private final String segmentName; private final SegmentVersion segmentVersion; private final IndexLoadingConfig indexConfig; private final SegmentDirectory.Writer segmentWriter; public InvertedIndexHandler(File indexDir, SegmentMetadataImpl segmentMetadata, IndexLoadingConfig indexConfig, SegmentDirectory.Writer segmentWriter) { this.indexDir = indexDir; this.segmentMetadata = segmentMetadata; segmentName = segmentMetadata.getName(); segmentVersion = SegmentVersion.valueOf(segmentMetadata.getVersion()); this.indexConfig = indexConfig; this.segmentWriter = segmentWriter; } /** * Create column inverted indices according to the index config. * * @throws IOException */ public void createInvertedIndices() throws IOException { Set<String> invertedIndexColumns = getInvertedIndexColumns(); for (String column : invertedIndexColumns) { createInvertedIndexForColumn(segmentMetadata.getColumnMetadataFor(column)); } } private Set<String> getInvertedIndexColumns() { Set<String> invertedIndexColumns = new HashSet<>(); if (indexConfig == null) { return invertedIndexColumns; } Set<String> invertedIndexColumnsFromConfig = indexConfig.getInvertedIndexColumns(); for (String column : invertedIndexColumnsFromConfig) { ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column); if (columnMetadata != null && !columnMetadata.isSorted()) { invertedIndexColumns.add(column); } } return invertedIndexColumns; } private void createInvertedIndexForColumn(ColumnMetadata columnMetadata) throws IOException { // Validation check String column = columnMetadata.getColumnName(); int totalDocs = columnMetadata.getTotalDocs(); if (totalDocs > OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES) { LOGGER.warn( "Skip creating inverted index for segment: {}, column: {} because totalDocs: {} exceeds the limit: {}", segmentName, column, totalDocs, OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES); return; } int cardinality = columnMetadata.getCardinality(); if (cardinality > OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES) { LOGGER.warn( "Skip creating inverted index for segment: {}, column: {} because cardinality: {} exceeds the limit: {}", segmentName, column, cardinality, OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES); return; } boolean singleValue = columnMetadata.isSingleValue(); int totalNumberOfEntries = columnMetadata.getTotalNumberOfEntries(); if ((!singleValue) && (totalNumberOfEntries > OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES)) { LOGGER.warn( "Skip creating inverted index for segment: {}, multi-value column: {} because totalNumberOfEntries: {} exceeds the limit: {}", segmentName, column, totalNumberOfEntries, OffHeapBitmapInvertedIndexCreator.MAX_NUM_ENTRIES); return; } File inProgress = new File(indexDir, column + ".inv.inprogress"); File invertedIndexFile = new File(indexDir, column + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Marker file does not exist, which means last run ended normally. if (segmentWriter.hasIndexFor(column, ColumnIndexType.INVERTED_INDEX)) { // Skip creating inverted index if already exists. LOGGER.info("Found inverted index for segment: {}, column: {}", segmentName, column); return; } // Create a marker file. FileUtils.touch(inProgress); } else { // Marker file exists, which means last run gets interrupted. // Remove inverted index if exists. // For v1 and v2, it's the actual inverted index. For v3, it's the temporary inverted index. FileUtils.deleteQuietly(invertedIndexFile); } // Create new inverted index for the column. LOGGER.info("Creating new inverted index for segment: {}, column: {}", segmentName, column); OffHeapBitmapInvertedIndexCreator creator = new OffHeapBitmapInvertedIndexCreator(indexDir, cardinality, totalDocs, totalNumberOfEntries, columnMetadata.getFieldSpec()); try (DataFileReader fwdIndex = getForwardIndexReader(columnMetadata, segmentWriter)) { if (singleValue) { // Single-value column. FixedBitSingleValueReader svFwdIndex = (FixedBitSingleValueReader) fwdIndex; for (int i = 0; i < totalDocs; i++) { creator.add(i, svFwdIndex.getInt(i)); } } else { // Multi-value column. SingleColumnMultiValueReader mvFwdIndex = (SingleColumnMultiValueReader) fwdIndex; int[] dictIds = new int[columnMetadata.getMaxNumberOfMultiValues()]; for (int i = 0; i < totalDocs; i++) { int len = mvFwdIndex.getIntArray(i, dictIds); creator.add(i, dictIds, len); } } } creator.seal(); // For v3, write the generated inverted index file into the single file and remove it. if (segmentVersion == SegmentVersion.v3) { LoaderUtils.writeIndexToV3Format(segmentWriter, column, invertedIndexFile, ColumnIndexType.INVERTED_INDEX); } // Delete the marker file. FileUtils.deleteQuietly(inProgress); LOGGER.info("Created inverted index for segment: {}, column: {}", segmentName, column); } private DataFileReader getForwardIndexReader(ColumnMetadata columnMetadata, SegmentDirectory.Writer segmentWriter) throws IOException { PinotDataBuffer buffer = segmentWriter.getIndexFor(columnMetadata.getColumnName(), ColumnIndexType.FORWARD_INDEX); if (columnMetadata.isSingleValue()) { return new FixedBitSingleValueReader(buffer, columnMetadata.getTotalDocs(), columnMetadata.getBitsPerElement(), columnMetadata.hasNulls()); } else { return new FixedBitMultiValueReader(buffer, columnMetadata.getTotalDocs(), columnMetadata.getTotalNumberOfEntries(), columnMetadata.getBitsPerElement(), false); } } }