/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.tools.segment.converter;
import com.linkedin.pinot.common.segment.SegmentMetadata;
import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.data.readers.PinotSegmentRecordReader;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.segment.DefaultSegmentNameGenerator;
import com.linkedin.pinot.core.segment.SegmentNameGenerator;
import com.linkedin.pinot.core.segment.creator.SegmentIndexCreationDriver;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import java.io.File;
import java.lang.reflect.Field;
import org.apache.commons.io.FileUtils;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
/**
* Class to convert Pinot Columnar Segment to Pinot Star Tree Segment
*/
public class ColumnarToStarTreeConverter {
private static final String TMP_DIR_PREFIX = "_tmp_";
@Option(name = "-inputDir", required = true, usage = "Path to input directory containing Pinot segments")
private String _inputDirName = null;
@Option(name = "-outputDir", required = true, usage = "Path to output directory")
private String _outputDirName = null;
@Option(name = "-starTreeConfigFile", required = false, usage = "Path to Star Tree configuration file")
private String _starTreeConfigFileName = null;
@SuppressWarnings("FieldCanBeLocal")
@Option(name = "-overwrite", required = false, usage = "Overwrite existing output directory.")
private boolean _overwrite = false;
@Option(name = "-help", required = false, help = true, aliases = {"-h"}, usage = "print this message")
private boolean _help = false;
/**
* Convert the specified set of columnar segments into star tree segments.
* @throws Exception
*/
public void convert()
throws Exception {
File inputDir = new File(_inputDirName);
if (!inputDir.exists()) {
System.out.println("Error: Input directory " + _inputDirName + " does not exist.");
return;
}
File outputDir = new File(_outputDirName);
if (!outputDir.exists()) {
System.out.println("Error: Output directory " + _outputDirName + " does not exist");
return;
}
File[] files = inputDir.listFiles();
if (files == null || files.length == 0) {
System.out.println("Error: Input directory " + _inputDirName + " is empty");
return;
}
for (File file : files) {
String fileName = file.getName();
File segment;
boolean cleanupTempFile = false;
if (fileName.endsWith("tar.gz") || fileName.endsWith("tgz")) {
File untarredSegment = new File(outputDir, TMP_DIR_PREFIX + System.currentTimeMillis());
segment = TarGzCompressionUtils.unTar(file, untarredSegment).get(0);
cleanupTempFile = true;
} else {
segment = file;
}
convertSegment(segment);
if (cleanupTempFile) {
FileUtils.deleteQuietly(segment.getParentFile());
}
}
}
/**
* Helper method to perform the conversion.
* @param columnarSegment Columnar segment directory to convert
* @throws Exception
*/
private void convertSegment(File columnarSegment)
throws Exception {
PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(columnarSegment);
SegmentGeneratorConfig config = new SegmentGeneratorConfig(pinotSegmentRecordReader.getSchema());
config.setDataDir(_inputDirName);
config.setInputFilePath(columnarSegment.getAbsolutePath());
config.setFormat(FileFormat.PINOT);
config.setEnableStarTreeIndex(true);
config.setOutDir(_outputDirName);
config.setStarTreeIndexSpecFile(_starTreeConfigFileName);
config.setOverwrite(_overwrite);
// Read the segment and table name from the segment's metadata.
SegmentMetadata metadata = new SegmentMetadataImpl(columnarSegment);
SegmentNameGenerator nameGenerator = new DefaultSegmentNameGenerator(metadata.getName());
config.setSegmentNameGenerator(nameGenerator);
config.setTableName(metadata.getTableName());
SegmentIndexCreationDriver indexCreator = new SegmentIndexCreationDriverImpl();
indexCreator.init(config);
indexCreator.build();
}
/**
* Helper method to print usage at the command line interface.
*/
private static void printUsage() {
System.out.println("Usage: ColumnarToStarTreeConverter");
for (Field field : ColumnarToStarTreeConverter.class.getDeclaredFields()) {
if (field.isAnnotationPresent(Option.class)) {
Option option = field.getAnnotation(Option.class);
System.out.println(
String.format("\t%-15s: %s (required=%s)", option.name(), option.usage(), option.required()));
}
}
}
/**
* Main driver for the converter class.
*
* @param args Command line arguments
* @throws Exception
*/
public static void main(String[] args)
throws Exception {
ColumnarToStarTreeConverter converter = new ColumnarToStarTreeConverter();
CmdLineParser parser = new CmdLineParser(converter);
parser.parseArgument(args);
if (converter._help) {
printUsage();
return;
}
converter.convert();
}
}