/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.tools.query.comparison;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.segment.index.loader.Loaders;
import com.linkedin.pinot.core.segment.index.readers.Dictionary;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
/**
* Given a segments directory, pick all segments and read the dictionaries for all single-value dimension columns.
* Here we will treat time column (if exists) as a single-value dimension column.
*/
public class SegmentInfoProvider {
private static final String TMP_DIR = System.getProperty("java.io.tmpdir");
private static final String SEGMENT_INFO_PROVIDER = "segmentInfoProvider";
private final List<String> _singleValueDimensionColumns;
private final List<String> _metricColumns;
private final Map<String, List<Object>> _singleValueDimensionValuesMap;
/**
* Assume that segments directory has at least one segment.
* - Gets all single-value dimension/metric columns from the directory.
* - Reads dictionaries for all single-value dimension columns.
*
* @param segmentDirName Name of directory containing tarred/untarred segments.
* @throws Exception
*/
public SegmentInfoProvider(String segmentDirName)
throws Exception {
Set<String> uniqueMetrics = new HashSet<>();
Set<String> uniqueSingleValueDimensions = new HashSet<>();
Map<String, Set<Object>> uniqueSingleValueDimensionValues = new HashMap<>();
File segmentsDir = new File(segmentDirName);
for (File segment : segmentsDir.listFiles()) {
readOneSegment(segment, uniqueMetrics, uniqueSingleValueDimensions, uniqueSingleValueDimensionValues);
}
_singleValueDimensionColumns = new ArrayList<>(uniqueSingleValueDimensions);
_metricColumns = new ArrayList<>(uniqueMetrics);
_singleValueDimensionValuesMap = new HashMap<>(uniqueSingleValueDimensionValues.size());
for (Map.Entry<String, Set<Object>> entry : uniqueSingleValueDimensionValues.entrySet()) {
_singleValueDimensionValuesMap.put(entry.getKey(), new ArrayList<>(entry.getValue()));
}
}
/**
* Read the metadata of the given segmentFile and collect:
* - Unique metric columns
* - Unique single-value dimension columns
* - Unique values for each single-value dimension columns
*
* @param segmentFile segment file.
* @param uniqueMetrics unique metric columns buffer.
* @param uniqueSingleValueDimensions unique single-value dimension columns buffer.
* @param singleValueDimensionValuesMap single-value dimension columns to unique values map buffer.
* @throws Exception
*/
private void readOneSegment(File segmentFile, Set<String> uniqueMetrics, Set<String> uniqueSingleValueDimensions,
Map<String, Set<Object>> singleValueDimensionValuesMap)
throws Exception {
// Get segment directory from segment file (decompress if necessary).
File segmentDir;
File tmpDir = null;
if (segmentFile.isFile()) {
tmpDir = File.createTempFile(SEGMENT_INFO_PROVIDER, null, new File(TMP_DIR));
FileUtils.deleteQuietly(tmpDir);
tmpDir.mkdir();
TarGzCompressionUtils.unTar(segmentFile, tmpDir);
segmentDir = tmpDir.listFiles()[0];
} else {
segmentDir = segmentFile;
}
IndexSegment indexSegment = Loaders.IndexSegment.load(segmentDir, ReadMode.heap);
Schema schema = indexSegment.getSegmentMetadata().getSchema();
// Add time column if exists.
String timeColumn = schema.getTimeColumnName();
if (timeColumn != null) {
uniqueSingleValueDimensions.add(timeColumn);
loadValuesForSingleValueDimension(indexSegment, singleValueDimensionValuesMap, timeColumn);
}
// Add all metric columns.
uniqueMetrics.addAll(schema.getMetricNames());
// Add all single-value dimension columns.
for (DimensionFieldSpec fieldSpec : schema.getDimensionFieldSpecs()) {
if (!fieldSpec.isSingleValueField()) {
continue;
}
String column = fieldSpec.getName();
uniqueSingleValueDimensions.add(column);
loadValuesForSingleValueDimension(indexSegment, singleValueDimensionValuesMap, column);
}
if (tmpDir != null) {
FileUtils.deleteQuietly(tmpDir);
}
}
/**
* Helper method to load values for a single-value dimension.
*
* @param indexSegment index segment.
* @param singleValueDimensionValuesMap single-value dimension columns to unique values map buffer.
* @param column single-value dimension name.
*/
private void loadValuesForSingleValueDimension(IndexSegment indexSegment,
Map<String, Set<Object>> singleValueDimensionValuesMap, String column) {
Dictionary dictionary = indexSegment.getDataSource(column).getDictionary();
Set<Object> values = singleValueDimensionValuesMap.get(column);
if (values == null) {
values = new HashSet<>();
singleValueDimensionValuesMap.put(column, values);
}
int length = dictionary.length();
for (int i = 0; i < length; i++) {
values.add(dictionary.get(i));
}
}
/**
* Return the list of single-value dimension columns.
*
* @return single-value dimension columns.
*/
public List<String> getSingleValueDimensionColumns() {
return _singleValueDimensionColumns;
}
/**
* Return the list of metric columns
*
* @return metric columns.
*/
public List<String> getMetricColumns() {
return _metricColumns;
}
/**
* Return the map from single-value dimension names to values list for the column.
*
* @return map from single-value dimension names to values list for the column.
*/
public Map<String, List<Object>> getSingleValueDimensionValuesMap() {
return _singleValueDimensionValuesMap;
}
}