/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.tools.admin.command;
import com.linkedin.pinot.core.data.readers.FileFormat;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.tools.Command;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.commons.io.FileUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Class to implement CreateSegment command.
*
*/
public class CreateSegmentCommand extends AbstractBaseAdminCommand implements Command {
private static final Logger LOGGER = LoggerFactory.getLogger(CreateSegmentCommand.class);
@Option(name = "-generatorConfigFile", required = false, metaVar = "<string>",
usage = "Config file for segment generator.")
private String _generatorConfigFile;
@Option(name = "-dataDir", required = false, metaVar = "<string>", usage = "Directory containing the data.")
private String _dataDir;
@Option(name = "-format", required = false, metaVar = "<AVRO/CSV/JSON>", usage = "Input data format.")
private FileFormat _format;
@Option(name = "-outDir", required = false, metaVar = "<string>", usage = "Name of output directory.")
private String _outDir;
@Option(name = "-overwrite", required = false, usage = "Overwrite existing output directory.")
private boolean _overwrite = false;
@Option(name = "-tableName", required = false, metaVar = "<string>", usage = "Name of the table.")
private String _tableName;
@Option(name = "-segmentName", required = false, metaVar = "<string>", usage = "Name of the segment.")
private String _segmentName;
@Option(name = "-schemaFile", required = false, metaVar = "<string>", usage = "File containing schema for data.")
private String _schemaFile;
@Option(name = "-readerConfigFile", required = false, metaVar = "<string>", usage = "Config file for record reader.")
private String _readerConfigFile;
@Option(name = "-enableStarTreeIndex", required = false, usage = "Enable Star Tree Index.")
boolean _enableStarTreeIndex = false;
@Option(name = "-starTreeIndexSpecFile", required = false, metaVar = "<string>",
usage = "Config file for star tree index.")
private String _starTreeIndexSpecFile;
@Option(name = "-numThreads", required = false, metaVar = "<int>",
usage = "Parallelism while generating segments, default is 1.")
private int _numThreads = 1;
@Option(name = "-help", required = false, help = true, aliases = {"-h", "--h", "--help"},
usage = "Print this message.")
private boolean _help = false;
public CreateSegmentCommand setGeneratorConfigFile(String generatorConfigFile) {
_generatorConfigFile = generatorConfigFile;
return this;
}
public CreateSegmentCommand setDataDir(String dataDir) {
_dataDir = dataDir;
return this;
}
public CreateSegmentCommand setFormat(FileFormat format) {
_format = format;
return this;
}
public CreateSegmentCommand setOutDir(String outDir) {
_outDir = outDir;
return this;
}
public CreateSegmentCommand setOverwrite(boolean overwrite) {
_overwrite = overwrite;
return this;
}
public CreateSegmentCommand setTableName(String tableName) {
_tableName = tableName;
return this;
}
public CreateSegmentCommand setSegmentName(String segmentName) {
_segmentName = segmentName;
return this;
}
public CreateSegmentCommand setSchemaFile(String schemaFile) {
_schemaFile = schemaFile;
return this;
}
public CreateSegmentCommand setReaderConfigFile(String readerConfigFile) {
_readerConfigFile = readerConfigFile;
return this;
}
public CreateSegmentCommand setEnableStarTreeIndex(boolean enableStarTreeIndex) {
_enableStarTreeIndex = enableStarTreeIndex;
return this;
}
public CreateSegmentCommand setStarTreeIndexSpecFile(String starTreeIndexSpecFile) {
_starTreeIndexSpecFile = starTreeIndexSpecFile;
return this;
}
public CreateSegmentCommand setNumThreads(int numThreads) {
_numThreads = numThreads;
return this;
}
@Override
public String toString() {
return ("CreateSegment -generatorConfigFile " + _generatorConfigFile + " -dataDir " + _dataDir + " -format "
+ _format + " -outDir " + _outDir + " -overwrite " + _overwrite + " -tableName " + _tableName + " -segmentName "
+ _segmentName + " -schemaFile " + _schemaFile + " -readerConfigFile " + _readerConfigFile
+ " -enableStarTreeIndex " + _enableStarTreeIndex + " -starTreeIndexSpecFile " + _starTreeIndexSpecFile
+ " -numThreads " + _numThreads);
}
@Override
public final String getName() {
return "CreateSegment";
}
@Override
public String description() {
return "Create pinot segments from provided avro/csv/json input data.";
}
@Override
public boolean getHelp() {
return _help;
}
@Override
public boolean execute()
throws Exception {
LOGGER.info("Executing command: {}", toString());
// Load generator config if exist.
final SegmentGeneratorConfig segmentGeneratorConfig;
if (_generatorConfigFile != null) {
segmentGeneratorConfig =
new ObjectMapper().readValue(new File(_generatorConfigFile), SegmentGeneratorConfig.class);
} else {
segmentGeneratorConfig = new SegmentGeneratorConfig();
}
// Load config from segment generator config.
String configDataDir = segmentGeneratorConfig.getDataDir();
if (_dataDir == null) {
if (configDataDir == null) {
throw new RuntimeException("Must specify dataDir.");
}
_dataDir = configDataDir;
} else {
if (configDataDir != null && !configDataDir.equals(_dataDir)) {
LOGGER.warn("Find dataDir conflict in command line and config file, use config in command line: {}", _dataDir);
}
}
FileFormat configFormat = segmentGeneratorConfig.getFormat();
if (_format == null) {
if (configFormat == null) {
throw new RuntimeException("Format cannot be null in config file.");
}
_format = configFormat;
} else {
if (configFormat != _format && configFormat != FileFormat.AVRO) {
LOGGER.warn("Find format conflict in command line and config file, use config in command line: {}", _format);
}
}
String configOutDir = segmentGeneratorConfig.getOutDir();
if (_outDir == null) {
if (configOutDir == null) {
throw new RuntimeException("Must specify outDir.");
}
_outDir = configOutDir;
} else {
if (configOutDir != null && !configOutDir.equals(_outDir)) {
LOGGER.warn("Find outDir conflict in command line and config file, use config in command line: {}", _outDir);
}
}
if (segmentGeneratorConfig.isOverwrite()) {
_overwrite = true;
}
String configTableName = segmentGeneratorConfig.getTableName();
if (_tableName == null) {
if (configTableName == null) {
throw new RuntimeException("Must specify tableName.");
}
_tableName = configTableName;
} else {
if (configTableName != null && !configTableName.equals(_tableName)) {
LOGGER.warn("Find tableName conflict in command line and config file, use config in command line: {}",
_tableName);
}
}
String configSegmentName = segmentGeneratorConfig.getSegmentName();
if (_segmentName == null) {
if (configSegmentName == null) {
throw new RuntimeException("Must specify segmentName.");
}
_segmentName = configSegmentName;
} else {
if (configSegmentName != null && !configSegmentName.equals(_segmentName)) {
LOGGER.warn("Find segmentName conflict in command line and config file, use config in command line: {}",
_segmentName);
}
}
// Filter out all input files.
File dir = new File(_dataDir);
if (!dir.exists() || !dir.isDirectory()) {
throw new RuntimeException("Data directory " + _dataDir + " not found.");
}
File[] files = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(_format.toString().toLowerCase());
}
});
if ((files == null) || (files.length == 0)) {
throw new RuntimeException(
"Data directory " + _dataDir + " does not contain " + _format.toString().toUpperCase() + " files.");
}
// Make sure output directory does not already exist, or can be overwritten.
File outDir = new File(_outDir);
if (outDir.exists()) {
if (!_overwrite) {
throw new IOException("Output directory " + _outDir + " already exists.");
} else {
FileUtils.deleteDirectory(outDir);
}
}
// Set other generator configs from command line.
segmentGeneratorConfig.setDataDir(_dataDir);
segmentGeneratorConfig.setFormat(_format);
segmentGeneratorConfig.setOutDir(_outDir);
segmentGeneratorConfig.setOverwrite(_overwrite);
segmentGeneratorConfig.setTableName(_tableName);
segmentGeneratorConfig.setSegmentName(_segmentName);
if (_schemaFile != null) {
if (segmentGeneratorConfig.getSchemaFile() != null && !segmentGeneratorConfig.getSchemaFile()
.equals(_schemaFile)) {
LOGGER.warn("Find schemaFile conflict in command line and config file, use config in command line: {}",
_schemaFile);
}
segmentGeneratorConfig.setSchemaFile(_schemaFile);
}
if (_readerConfigFile != null) {
if (segmentGeneratorConfig.getReaderConfigFile() != null && !segmentGeneratorConfig.getReaderConfigFile()
.equals(_readerConfigFile)) {
LOGGER.warn("Find readerConfigFile conflict in command line and config file, use config in command line: {}",
_readerConfigFile);
}
segmentGeneratorConfig.setReaderConfigFile(_readerConfigFile);
}
if (_enableStarTreeIndex) {
segmentGeneratorConfig.setEnableStarTreeIndex(true);
}
if (_starTreeIndexSpecFile != null) {
if (segmentGeneratorConfig.getStarTreeIndexSpecFile() != null
&& !segmentGeneratorConfig.getStarTreeIndexSpecFile().equals(_starTreeIndexSpecFile)) {
LOGGER.warn(
"Find starTreeIndexSpecFile conflict in command line and config file, use config in command line: {}",
_starTreeIndexSpecFile);
}
segmentGeneratorConfig.setStarTreeIndexSpecFile(_starTreeIndexSpecFile);
}
ExecutorService executor = Executors.newFixedThreadPool(_numThreads);
int cnt = 0;
for (final File file : files) {
final int segCnt = cnt;
executor.execute(new Runnable() {
@Override
public void run() {
try {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(segmentGeneratorConfig);
config.setInputFilePath(file.getAbsolutePath());
config.setSegmentName(_segmentName + "_" + segCnt);
config.loadConfigFiles();
final SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
driver.init(config);
driver.build();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
cnt += 1;
}
executor.shutdown();
return executor.awaitTermination(1, TimeUnit.HOURS);
}
}