/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.creator.impl; import java.io.DataOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.io.FileUtils; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.HashBiMap; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.MetricFieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.data.StarTreeIndexSpec; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.data.extractors.FieldExtractorFactory; import com.linkedin.pinot.core.data.extractors.PlainFieldExtractor; import com.linkedin.pinot.core.data.readers.RecordReader; import com.linkedin.pinot.core.data.readers.RecordReaderFactory; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.indexsegment.generator.SegmentVersion; import com.linkedin.pinot.core.segment.creator.ColumnIndexCreationInfo; import com.linkedin.pinot.core.segment.creator.ColumnStatistics; import com.linkedin.pinot.core.segment.creator.ForwardIndexType; import com.linkedin.pinot.core.segment.creator.InvertedIndexType; import com.linkedin.pinot.core.segment.creator.RecordReaderSegmentCreationDataSource; import com.linkedin.pinot.core.segment.creator.SegmentCreationDataSource; import com.linkedin.pinot.core.segment.creator.SegmentCreator; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver; import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationInfo; import com.linkedin.pinot.core.segment.creator.SegmentPreIndexStatsContainer; import com.linkedin.pinot.core.segment.creator.StatsCollectorConfig; import com.linkedin.pinot.core.segment.creator.impl.stats.SegmentPreIndexStatsCollectorImpl; import com.linkedin.pinot.core.segment.index.converter.SegmentFormatConverter; import com.linkedin.pinot.core.segment.index.converter.SegmentFormatConverterFactory; import com.linkedin.pinot.core.startree.OffHeapStarTreeBuilder; import com.linkedin.pinot.core.startree.StarTree; import com.linkedin.pinot.core.startree.StarTreeBuilder; import com.linkedin.pinot.core.startree.StarTreeBuilderConfig; import com.linkedin.pinot.core.startree.StarTreeIndexNode; import com.linkedin.pinot.core.startree.StarTreeIndexNodeInterf; import com.linkedin.pinot.core.startree.StarTreeSerDe; import com.linkedin.pinot.core.startree.hll.HllConfig; import com.linkedin.pinot.core.startree.hll.HllUtil; import com.linkedin.pinot.core.util.CrcUtils; /** * Implementation of an index segment creator. */ public class SegmentIndexCreationDriverImpl implements SegmentIndexCreationDriver { private static final Logger LOGGER = LoggerFactory.getLogger(SegmentIndexCreationDriverImpl.class); private SegmentGeneratorConfig config; private RecordReader recordReader; private SegmentPreIndexStatsContainer segmentStats; private Map<String, ColumnIndexCreationInfo> indexCreationInfoMap; private SegmentCreator indexCreator; private SegmentIndexCreationInfo segmentIndexCreationInfo; private Schema dataSchema; private PlainFieldExtractor extractor; private int totalDocs = 0; private int totalRawDocs = 0; private int totalAggDocs = 0; private File tempIndexDir; private String segmentName; private long totalRecordReadTime = 0; private long totalIndexTime = 0; private long totalStatsCollectorTime = 0; private boolean createStarTree = false; // flag indicates if the this segment generator code // will create the HLL index for the given columns. // This will be false if HLL column is provided to us private boolean createHllIndex = false; private File starTreeTempDir; @Override public void init(SegmentGeneratorConfig config) throws Exception { init(config, new RecordReaderSegmentCreationDataSource(RecordReaderFactory.get(config))); } public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSource) throws Exception { this.config = config; this.createStarTree = config.isEnableStarTreeIndex(); recordReader = dataSource.getRecordReader(); dataSchema = recordReader.getSchema(); if (config.getHllConfig() != null) { HllConfig hllConfig = config.getHllConfig(); // create hll index is true only if we're provided with columns to // generate HLL fields if (hllConfig.getColumnsToDeriveHllFields() != null && !hllConfig.getColumnsToDeriveHllFields().isEmpty()) { if (!createStarTree) { throw new IllegalArgumentException("Derived HLL fields generation will not work if StarTree is not enabled."); } else { createHllIndex = true; } } // else columnsToDeriveHllFields is null...don't do anything in this case // segment seal() will write the log2m value to the metadata } addDerivedFieldsInSchema(); extractor = FieldExtractorFactory.getPlainFieldExtractor(dataSchema); // Initialize stats collection if (!createStarTree) { // For star tree, the stats are gathered in buildStarTree() segmentStats = dataSource.gatherStats(new StatsCollectorConfig(dataSchema, config.getSegmentPartitionConfig())); totalDocs = segmentStats.getTotalDocCount(); totalRawDocs = segmentStats.getRawDocCount(); totalAggDocs = segmentStats.getAggregatedDocCount(); } // Initialize index creation segmentIndexCreationInfo = new SegmentIndexCreationInfo(); indexCreationInfoMap = new HashMap<>(); // Check if has star tree indexCreator = new SegmentColumnarIndexCreator(); // Ensure that the output directory exists final File indexDir = new File(config.getOutDir()); if (!indexDir.exists()) { indexDir.mkdirs(); } // Create a temporary directory used in segment creation tempIndexDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName()); starTreeTempDir = new File(indexDir, com.linkedin.pinot.common.utils.FileUtils.getRandomFileName()); LOGGER.debug("tempIndexDir:{}", tempIndexDir); LOGGER.debug("starTreeTempDir:{}", starTreeTempDir); } @Deprecated public void init(SegmentGeneratorConfig config, RecordReader reader) throws Exception { init(config, new RecordReaderSegmentCreationDataSource(reader)); } private void addDerivedFieldsInSchema() { if (createHllIndex) { Collection<String> columnNames = dataSchema.getColumnNames(); HllConfig hllConfig = config.getHllConfig(); for (String derivedFieldName : hllConfig.getDerivedHllFieldToOriginMap().keySet()) { if (columnNames.contains(derivedFieldName)) { throw new IllegalArgumentException( "Cannot add derived field: " + derivedFieldName + " since it already exists in schema."); } else { dataSchema.addField( new MetricFieldSpec(derivedFieldName, FieldSpec.DataType.STRING, hllConfig.getHllFieldSize(), MetricFieldSpec.DerivedMetricType.HLL)); } } } } private void populateDefaultDerivedColumnValues(GenericRow row) throws IOException { //add default hll value in each row if (createHllIndex) { HllConfig hllConfig = config.getHllConfig(); for (Entry<String, String> entry : hllConfig.getDerivedHllFieldToOriginMap().entrySet()) { String derivedFieldName = entry.getKey(); String originFieldName = entry.getValue(); row.putField(derivedFieldName, HllUtil.singleValueHllAsString(hllConfig.getHllLog2m(), row.getValue(originFieldName))); } } } @Override public void build() throws Exception { if (createStarTree) { buildStarTree(); } else { buildRaw(); } } private void buildStarTree() throws Exception { // Create stats collector StatsCollectorConfig statsCollectorConfig = new StatsCollectorConfig(dataSchema, config.getSegmentPartitionConfig()); SegmentPreIndexStatsCollectorImpl statsCollector = new SegmentPreIndexStatsCollectorImpl(statsCollectorConfig); statsCollector.init(); segmentStats = statsCollector; long start = System.currentTimeMillis(); //construct star tree builder config StarTreeIndexSpec starTreeIndexSpec = config.getStarTreeIndexSpec(); if (starTreeIndexSpec == null) { starTreeIndexSpec = new StarTreeIndexSpec(); starTreeIndexSpec.setMaxLeafRecords(StarTreeIndexSpec.DEFAULT_MAX_LEAF_RECORDS); config.setStarTreeIndexSpec(starTreeIndexSpec); } List<String> dimensionsSplitOrder = starTreeIndexSpec.getDimensionsSplitOrder(); if (dimensionsSplitOrder != null && !dimensionsSplitOrder.isEmpty()) { final String timeColumnName = config.getTimeColumnName(); if (timeColumnName != null) { dimensionsSplitOrder.remove(timeColumnName); } } //create star builder config from startreeindexspec. Merge these two in one later. StarTreeBuilderConfig starTreeBuilderConfig = new StarTreeBuilderConfig(); starTreeBuilderConfig.setSchema(dataSchema); starTreeBuilderConfig.setDimensionsSplitOrder(dimensionsSplitOrder); starTreeBuilderConfig.setMaxLeafRecords(starTreeIndexSpec.getMaxLeafRecords()); starTreeBuilderConfig.setSkipStarNodeCreationForDimensions( starTreeIndexSpec.getSkipStarNodeCreationForDimensions()); Set<String> skipMaterializationForDimensions = starTreeIndexSpec.getskipMaterializationForDimensions(); starTreeBuilderConfig.setSkipMaterializationForDimensions(skipMaterializationForDimensions); starTreeBuilderConfig.setSkipMaterializationCardinalityThreshold( starTreeIndexSpec.getskipMaterializationCardinalityThreshold()); starTreeBuilderConfig.setOutDir(starTreeTempDir); boolean enableOffHeapFormat = starTreeIndexSpec.isEnableOffHeapFormat(); starTreeBuilderConfig.setEnableOffHealpFormat(enableOffHeapFormat); //initialize star tree builder StarTreeBuilder starTreeBuilder = new OffHeapStarTreeBuilder(); starTreeBuilder.init(starTreeBuilderConfig); //build star tree along with collecting stats recordReader.rewind(); LOGGER.info("Start append raw data to star tree builder!"); totalDocs = 0; GenericRow readRow = new GenericRow(); GenericRow transformedRow = new GenericRow(); while (recordReader.hasNext()) { //PlainFieldExtractor conducts necessary type conversions transformedRow = readNextRowSanitized(readRow, transformedRow); //must be called after previous step since type conversion for derived values is unnecessary populateDefaultDerivedColumnValues(transformedRow); starTreeBuilder.append(transformedRow); statsCollector.collectRow(transformedRow); totalRawDocs++; totalDocs++; } recordReader.close(); LOGGER.info("Start building star tree!"); starTreeBuilder.build(); LOGGER.info("Finished building star tree!"); long starTreeBuildFinishTime = System.currentTimeMillis(); //build stats // Count the number of documents and gather per-column statistics LOGGER.info("Start building StatsCollector!"); Iterator<GenericRow> aggregatedRowsIterator = starTreeBuilder.iterator(starTreeBuilder.getTotalRawDocumentCount(), starTreeBuilder.getTotalRawDocumentCount() + starTreeBuilder.getTotalAggregateDocumentCount()); while (aggregatedRowsIterator.hasNext()) { GenericRow genericRow = aggregatedRowsIterator.next(); statsCollector.collectRow(genericRow, true /* isAggregated */); totalAggDocs++; totalDocs++; } statsCollector.build(); buildIndexCreationInfo(); LOGGER.info("Collected stats for {} raw documents, {} aggregated documents", totalRawDocs, totalAggDocs); long statCollectionFinishTime = System.currentTimeMillis(); // Initialize the index creation using the per-column statistics information indexCreator.init(config, segmentIndexCreationInfo, indexCreationInfoMap, dataSchema, tempIndexDir); //iterate over the data again, Iterator<GenericRow> allRowsIterator = starTreeBuilder.iterator(0, starTreeBuilder.getTotalRawDocumentCount() + starTreeBuilder.getTotalAggregateDocumentCount()); while (allRowsIterator.hasNext()) { GenericRow genericRow = allRowsIterator.next(); indexCreator.indexRow(genericRow); } // If no dimensionsSplitOrder was specified in starTreeIndexSpec, set the order used by the starTreeBuilder. // This is required so the dimensionsSplitOrder used by the builder can be written into the segment metadata. if (dimensionsSplitOrder == null || dimensionsSplitOrder.isEmpty()) { starTreeIndexSpec.setDimensionsSplitOrder(starTreeBuilder.getDimensionsSplitOrder()); } if (skipMaterializationForDimensions == null || skipMaterializationForDimensions.isEmpty()) { starTreeIndexSpec.setSkipMaterializationForDimensions(starTreeBuilder.getSkipMaterializationForDimensions()); } serializeTree(starTreeBuilder, enableOffHeapFormat); //post creation handlePostCreation(); starTreeBuilder.cleanup(); long end = System.currentTimeMillis(); LOGGER.info("Total time:{} \n star tree build time:{} \n stat collection time:{} \n column index build time:{}", (end - start), (starTreeBuildFinishTime - start), statCollectionFinishTime - starTreeBuildFinishTime, end - statCollectionFinishTime); } private void serializeTree(StarTreeBuilder starTreeBuilder, boolean enableOffHeapFormat) throws Exception { //star tree was built using its own dictionary, we need to re-map dimension value id Map<String, HashBiMap<Object, Integer>> dictionaryMap = starTreeBuilder.getDictionaryMap(); StarTree tree = starTreeBuilder.getTree(); HashBiMap<String, Integer> dimensionNameToIndexMap = starTreeBuilder.getDimensionNameToIndexMap(); StarTreeIndexNode node = (StarTreeIndexNode) tree.getRoot(); updateTree(node, dictionaryMap, dimensionNameToIndexMap); File starTreeFile = new File(tempIndexDir, V1Constants.STAR_TREE_INDEX_FILE); if (enableOffHeapFormat) { StarTreeSerDe.writeTreeOffHeapFormat(tree, starTreeFile); } else { StarTreeSerDe.writeTreeOnHeapFormat(tree, starTreeFile); } } /** * Startree built its only dictionary that is different from the columnar segment dictionary. * This method updates the tree with new mapping * @param node * @param dictionaryMap * @param dimensionNameToIndexMap */ private void updateTree(StarTreeIndexNode node, Map<String, HashBiMap<Object, Integer>> dictionaryMap, HashBiMap<String, Integer> dimensionNameToIndexMap) { //current node needs to update only if its not star if (node.getDimensionName() != StarTreeIndexNodeInterf.ALL) { String dimName = dimensionNameToIndexMap.inverse().get(node.getDimensionName()); int dimensionValue = node.getDimensionValue(); if (dimensionValue != StarTreeIndexNodeInterf.ALL) { Object sortedValuesForDim = indexCreationInfoMap.get(dimName).getSortedUniqueElementsArray(); int indexForDimValue = searchValueInArray(sortedValuesForDim, dictionaryMap.get(dimName).inverse().get(dimensionValue)); node.setDimensionValue(indexForDimValue); } } //update children map Iterator<StarTreeIndexNode> childrenIterator = node.getChildrenIterator(); if (childrenIterator.hasNext()) { Map<Integer, StarTreeIndexNode> newChildren = new HashMap<>(); String childDimName = dimensionNameToIndexMap.inverse().get(node.getChildDimensionName()); Object sortedValuesForDim = indexCreationInfoMap.get(childDimName).getSortedUniqueElementsArray(); while (childrenIterator.hasNext()) { StarTreeIndexNode child = childrenIterator.next(); int childDimValue = child.getDimensionValue(); int childMappedDimValue = StarTreeIndexNodeInterf.ALL; if (childDimValue != StarTreeIndexNodeInterf.ALL) { childMappedDimValue = searchValueInArray(sortedValuesForDim, dictionaryMap.get(childDimName).inverse().get(childDimValue)); } newChildren.put(childMappedDimValue, child); updateTree(child, dictionaryMap, dimensionNameToIndexMap); } node.setChildren(newChildren); } } private void buildRaw() throws Exception { // Count the number of documents and gather per-column statistics LOGGER.debug("Start building StatsCollector!"); buildIndexCreationInfo(); LOGGER.info("Finished building StatsCollector!"); LOGGER.info("Collected stats for {} documents", totalDocs); // Initialize the index creation using the per-column statistics information indexCreator.init(config, segmentIndexCreationInfo, indexCreationInfoMap, dataSchema, tempIndexDir); // Build the index recordReader.rewind(); LOGGER.info("Start building IndexCreator!"); GenericRow readRow = new GenericRow(); GenericRow transformedRow = new GenericRow(); while (recordReader.hasNext()) { long start = System.currentTimeMillis(); transformedRow = readNextRowSanitized(readRow, transformedRow); long stop = System.currentTimeMillis(); indexCreator.indexRow(transformedRow); long stop1 = System.currentTimeMillis(); totalRecordReadTime += (stop - start); totalIndexTime += (stop1 - stop); } recordReader.close(); LOGGER.info("Finished records indexing in IndexCreator!"); int numErrors, numConversions, numNulls, numNullCols; if ((numErrors = extractor.getTotalErrors()) > 0) { LOGGER.warn("Index creator for schema {} had {} rows with errors", dataSchema.getSchemaName(), numErrors); } Map<String, Integer> errorCount = extractor.getErrorCount(); for (String column : errorCount.keySet()) { if ((numErrors = errorCount.get(column)) > 0) { LOGGER.info("Column {} had {} rows with errors", column, numErrors); } } if ((numConversions = extractor.getTotalConversions()) > 0) { LOGGER.info("Index creator for schema {} had {} rows with type conversions", dataSchema.getSchemaName(), numConversions); } if ((numNulls = extractor.getTotalNulls()) > 0) { LOGGER.info("Index creator for schema {} had {} rows with null columns", dataSchema.getSchemaName(), numNulls); } if ((numNullCols = extractor.getTotalNullCols()) > 0) { LOGGER.info("Index creator for schema {} had {} null columns", dataSchema.getSchemaName(), numNullCols); } handlePostCreation(); } private void handlePostCreation() throws Exception { final String timeColumn = config.getTimeColumnName(); segmentName = config.getSegmentNameGenerator().generateSegmentName(segmentStats.getColumnProfileFor(timeColumn)); updateSegmentStartEndTimeIfNecessary(segmentStats.getColumnProfileFor(timeColumn)); // Write the index files to disk indexCreator.setSegmentName(segmentName); indexCreator.seal(); LOGGER.info("Finished segment seal!"); // Delete the directory named after the segment name, if it exists final File outputDir = new File(config.getOutDir()); final File segmentOutputDir = new File(outputDir, segmentName); if (segmentOutputDir.exists()) { FileUtils.deleteDirectory(segmentOutputDir); } // Move the temporary directory into its final location FileUtils.moveDirectory(tempIndexDir, segmentOutputDir); // Delete the temporary directory FileUtils.deleteQuietly(tempIndexDir); // Compute CRC final long crc = CrcUtils.forAllFilesInFolder(segmentOutputDir).computeCrc(); // Persist creation metadata to disk persistCreationMeta(segmentOutputDir, crc); convertFormatIfNeeded(segmentOutputDir); LOGGER.info("Driver, record read time : {}", totalRecordReadTime); LOGGER.info("Driver, stats collector time : {}", totalStatsCollectorTime); LOGGER.info("Driver, indexing time : {}", totalIndexTime); } private void updateSegmentStartEndTimeIfNecessary(ColumnStatistics timeColumnStats) { switch (config.getTimeColumnType()) { case EPOCH: break; case SIMPLE_DATE: long startTime = convertStartTimeSDFToMillis(timeColumnStats); config.getCustomProperties().put(V1Constants.MetadataKeys.Segment.SEGMENT_START_TIME, String.valueOf(startTime)); long endTime = convertEndTimeSDFToMillis(timeColumnStats); config.getCustomProperties().put(V1Constants.MetadataKeys.Segment.SEGMENT_END_TIME, String.valueOf(endTime)); break; } } public long convertStartTimeSDFToMillis(ColumnStatistics timeColumnStats) { final String minTimeStr = timeColumnStats.getMinValue().toString(); return convertSDFToMillis(minTimeStr); } public long convertEndTimeSDFToMillis(ColumnStatistics timeColumnStats) { final String maxTimeStr = timeColumnStats.getMaxValue().toString(); return convertSDFToMillis(maxTimeStr); } private long convertSDFToMillis(final String colValue) { final String sdfFormatStr = config.getSimpleDateFormat(); DateTimeFormatter sdfFormatter = DateTimeFormat.forPattern(sdfFormatStr); DateTime dateTime = DateTime.parse(colValue, sdfFormatter); return dateTime.getMillis(); } // Explanation of why we are using format converter: // There are 3 options to correctly generate segments to v3 format // 1. Generate v3 directly: This is efficient but v3 index writer needs to know buffer size upfront. // Inverted, star and raw indexes don't have the index size upfront. This is also least flexible approach // if we add more indexes in future. // 2. Hold data in-memory: One way to work around predeclaring sizes in (1) is to allocate "large" buffer (2GB?) // and hold the data in memory and write the buffer at the end. The memory requirement in this case increases linearly // with the number of columns. Variation of that is to mmap data to separate files...which is what we are doing here // 3. Another option is to generate dictionary and fwd indexes in v3 and generate inverted, star and raw indexes in // separate files. Then add those files to v3 index file. This leads to lot of hodgepodge code to // handle multiple segment formats. // Using converter is similar to option (2), plus it's battle-tested code. We will roll out with // this change to keep changes limited. Once we've migrated we can implement approach (1) with option to // copy for indexes for which we don't know sizes upfront. private void convertFormatIfNeeded(File segmentDirectory) throws Exception { SegmentVersion versionToGenerate = config.getSegmentVersion(); if (versionToGenerate.equals(SegmentVersion.v1)) { // v1 by default return; } SegmentFormatConverter converter = SegmentFormatConverterFactory.getConverter(SegmentVersion.v1, SegmentVersion.v3); converter.convert(segmentDirectory); } public ColumnStatistics getColumnStatisticsCollector(final String columnName) throws Exception { return segmentStats.getColumnProfileFor(columnName); } public void overWriteSegmentName(String segmentName) { this.segmentName = segmentName; } /** * Writes segment creation metadata to disk. */ void persistCreationMeta(File outputDir, long crc) throws IOException { final File crcFile = new File(outputDir, V1Constants.SEGMENT_CREATION_META); final DataOutputStream out = new DataOutputStream(new FileOutputStream(crcFile)); out.writeLong(crc); long creationTime = System.currentTimeMillis(); // Use the creation time from the configuration if it exists and is not -1 try { long configCreationTime = Long.parseLong(config.getCreationTime()); if (0L < configCreationTime) { creationTime = configCreationTime; } } catch (Exception nfe) { // Ignore NPE and NFE, use the current time. } out.writeLong(creationTime); out.close(); } /** * Complete the stats gathering process and store the stats information in indexCreationInfoMap. */ void buildIndexCreationInfo() throws Exception { for (FieldSpec spec : dataSchema.getAllFieldSpecs()) { String column = spec.getName(); indexCreationInfoMap.put(column, new ColumnIndexCreationInfo(true/*createDictionary*/, segmentStats.getColumnProfileFor(column).getMinValue(), segmentStats.getColumnProfileFor(column).getMaxValue(), segmentStats.getColumnProfileFor(column).getUniqueValuesSet(), ForwardIndexType.FIXED_BIT_COMPRESSED, InvertedIndexType.ROARING_BITMAPS, segmentStats.getColumnProfileFor(column).isSorted(), segmentStats.getColumnProfileFor(column).hasNull(), segmentStats.getColumnProfileFor(column).getTotalNumberOfEntries(), segmentStats.getColumnProfileFor(column).getMaxNumberOfMultiValues(), segmentStats.getColumnProfileFor(column).getLengthOfLargestElement(), false/*isAutoGenerated*/, segmentStats.getColumnProfileFor(column).getPartitionFunction(), segmentStats.getColumnProfileFor(column).getNumPartitions(), segmentStats.getColumnProfileFor(column).getPartitionRanges(), dataSchema.getFieldSpecFor(column).getDefaultNullValue())); } segmentIndexCreationInfo.setTotalDocs(totalDocs); segmentIndexCreationInfo.setTotalRawDocs(totalRawDocs); segmentIndexCreationInfo.setTotalAggDocs(totalAggDocs); segmentIndexCreationInfo.setStarTreeEnabled(createStarTree); segmentIndexCreationInfo.setTotalConversions(extractor.getTotalConversions()); segmentIndexCreationInfo.setTotalErrors(extractor.getTotalErrors()); segmentIndexCreationInfo.setTotalNullCols(extractor.getTotalNullCols()); segmentIndexCreationInfo.setTotalNulls(extractor.getTotalNulls()); } /** * Returns the name of the segment associated with this index creation driver. */ @Override public String getSegmentName() { return segmentName; } /** * Returns the path of the output directory */ @Override public File getOutputDirectory() { return new File(new File(config.getOutDir()), segmentName); } private GenericRow readNextRowSanitized(GenericRow readRow, GenericRow transformedRow) { readRow = GenericRow.createOrReuseRow(readRow); readRow = recordReader.next(readRow); transformedRow = GenericRow.createOrReuseRow(transformedRow); return extractor.transform(readRow, transformedRow); } /** * Helper method to binary-search a given key in an input array. * Both input array and the key to search are passed in as 'Object'. * * - Supported data types are int, long, float, double & String. * - Throws an exception for any other data type. * * @param inputArray Input array to search * @param key Key to search for * @return */ private int searchValueInArray(Object inputArray, Object key) { if (inputArray instanceof int[]) { return Arrays.binarySearch((int[]) inputArray, (Integer) key); } else if (inputArray instanceof long[]) { return Arrays.binarySearch((long[]) inputArray, (Long) key); } else if (inputArray instanceof float[]) { return Arrays.binarySearch((float[]) inputArray, (Float) key); } else if (inputArray instanceof double[]) { return Arrays.binarySearch((double[]) inputArray, (Double) key); } else if (inputArray instanceof String[]) { return Arrays.binarySearch((String[]) inputArray, key); } else if (inputArray instanceof Object[]) { return Arrays.binarySearch((Object[]) inputArray, key); } else { throw new RuntimeException( "Unexpected data type encountered while updating StarTree node" + inputArray.getClass().getName()); } } }