Java Examples for com.streamsets.datacollector.store.PipelineInfo
The following java examples will help you to understand the usage of com.streamsets.datacollector.store.PipelineInfo. These source code samples are taken from different open source projects.
Example 1
| Project: datacollector-master File: ClusterProviderImpl.java View source code |
@SuppressWarnings("unchecked")
private ApplicationState startPipelineInternal(SystemProcessFactory systemProcessFactory, File clusterManager, File outputDir, Map<String, String> environment, Map<String, String> sourceInfo, PipelineConfiguration pipelineConfiguration, StageLibraryTask stageLibrary, File etcDir, File resourcesDir, File staticWebDir, File bootstrapDir, URLClassLoader apiCL, URLClassLoader containerCL, long timeToWaitForFailure, File stagingDir, RuleDefinitions ruleDefinitions, Acl acl) throws IOException, TimeoutException {
environment = Maps.newHashMap(environment);
// create libs.tar.gz file for pipeline
Map<String, List<URL>> streamsetsLibsCl = new HashMap<>();
Map<String, List<URL>> userLibsCL = new HashMap<>();
Map<String, String> sourceConfigs = new HashMap<>();
ImmutableList.Builder<StageConfiguration> pipelineConfigurations = ImmutableList.builder();
// order is important here as we don't want error stage
// configs overriding source stage configs
String clusterToken = UUID.randomUUID().toString();
Set<String> jarsToShip = new LinkedHashSet<>();
List<Issue> errors = new ArrayList<>();
PipelineBean pipelineBean = PipelineBeanCreator.get().create(false, stageLibrary, pipelineConfiguration, errors);
if (!errors.isEmpty()) {
String msg = Utils.format("Found '{}' configuration errors: {}", errors.size(), errors);
throw new IllegalStateException(msg);
}
pipelineConfigurations.add(pipelineBean.getErrorStage().getConfiguration());
StageBean statsStage = pipelineBean.getStatsAggregatorStage();
// statsStage is null for pre 1.3 pipelines
if (statsStage != null) {
pipelineConfigurations.add(statsStage.getConfiguration());
}
pipelineConfigurations.add(pipelineBean.getOrigin().getConfiguration());
for (StageBean stageBean : pipelineBean.getPipelineStageBeans().getStages()) {
pipelineConfigurations.add(stageBean.getConfiguration());
}
ExecutionMode executionMode = ExecutionMode.STANDALONE;
for (StageConfiguration stageConf : pipelineConfigurations.build()) {
StageDefinition stageDef = stageLibrary.getStage(stageConf.getLibrary(), stageConf.getStageName(), false);
if (stageConf.getInputLanes().isEmpty()) {
for (Config conf : stageConf.getConfiguration()) {
if (conf.getValue() != null) {
Object value = conf.getValue();
if (value instanceof List) {
List values = (List) value;
if (values.isEmpty()) {
LOG.debug("Conf value for " + conf.getName() + " is empty");
} else {
Object first = values.get(0);
if (canCastToString(first)) {
sourceConfigs.put(conf.getName(), Joiner.on(",").join(values));
} else if (first instanceof Map) {
addToSourceConfigs(sourceConfigs, (List<Map<String, Object>>) values);
} else {
LOG.info("List is of type '{}' which cannot be converted to property value.", first.getClass().getName());
}
}
} else if (canCastToString(conf.getValue())) {
LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
sourceConfigs.put(conf.getName(), String.valueOf(value));
} else if (value instanceof Enum) {
value = ((Enum) value).name();
LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
sourceConfigs.put(conf.getName(), String.valueOf(value));
} else {
LOG.warn("Conf value is of unknown type " + conf.getValue());
}
}
}
executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration, new ArrayList<Issue>());
List<String> libJarsRegex = stageDef.getLibJarsRegex();
if (!libJarsRegex.isEmpty()) {
for (URL jarUrl : ((URLClassLoader) stageDef.getStageClassLoader()).getURLs()) {
File jarFile = new File(jarUrl.getPath());
for (String libJar : libJarsRegex) {
Pattern pattern = Pattern.compile(libJar);
Matcher matcher = pattern.matcher(jarFile.getName());
if (matcher.matches()) {
jarsToShip.add(jarFile.getAbsolutePath());
}
}
}
}
}
String type = StageLibraryUtils.getLibraryType(stageDef.getStageClassLoader());
String name = StageLibraryUtils.getLibraryName(stageDef.getStageClassLoader());
if (ClusterModeConstants.STREAMSETS_LIBS.equals(type)) {
streamsetsLibsCl.put(name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
} else if (ClusterModeConstants.USER_LIBS.equals(type)) {
userLibsCL.put(name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
} else {
throw new IllegalStateException(Utils.format("Error unknown stage library type: '{}'", type));
}
// Add all jars of stagelib to --jars. We only really need stuff from the extras directory.
if (stageDef.getClassName().equals(SPARK_PROCESSOR_STAGE)) {
LOG.info("Spark processor found in pipeline, adding to spark-submit");
File extras = new File(System.getenv("STREAMSETS_LIBRARIES_EXTRA_DIR"));
LOG.info("Found extras dir: " + extras.toString());
File stageLibExtras = new File(extras.toString() + "/" + stageConf.getLibrary() + "/" + "lib");
LOG.info("StageLib Extras dir: " + stageLibExtras.toString());
File[] extraJarsForStageLib = stageLibExtras.listFiles();
if (extraJarsForStageLib != null) {
stream(extraJarsForStageLib).map(File::toString).forEach(jarsToShip::add);
}
addJarsToJarsList((URLClassLoader) stageDef.getStageClassLoader(), jarsToShip, "streamsets-datacollector-spark-api-[0-9]+.*");
}
}
if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING || executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
LOG.info("Execution Mode is CLUSTER_STREAMING. Adding container jar and API jar to spark-submit");
addJarsToJarsList(containerCL, jarsToShip, "streamsets-datacollector-container-[0-9]+.*");
addJarsToJarsList(apiCL, jarsToShip, "streamsets-datacollector-api-[0-9]+.*");
}
LOG.info("stagingDir = '{}'", stagingDir);
LOG.info("bootstrapDir = '{}'", bootstrapDir);
LOG.info("etcDir = '{}'", etcDir);
LOG.info("resourcesDir = '{}'", resourcesDir);
LOG.info("staticWebDir = '{}'", staticWebDir);
Utils.checkState(staticWebDir.isDirectory(), Utils.format("Expected '{}' to be a directory", staticWebDir));
File libsTarGz = new File(stagingDir, "libs.tar.gz");
try {
TarFileCreator.createLibsTarGz(findJars("api", apiCL, null), findJars("container", containerCL, null), streamsetsLibsCl, userLibsCL, staticWebDir, libsTarGz);
} catch (Exception ex) {
String msg = errorString("Serializing classpath: '{}'", ex);
throw new RuntimeException(msg, ex);
}
File resourcesTarGz = new File(stagingDir, "resources.tar.gz");
try {
resourcesDir = createDirectoryClone(resourcesDir, "resources", stagingDir);
TarFileCreator.createTarGz(resourcesDir, resourcesTarGz);
} catch (Exception ex) {
String msg = errorString("Serializing resources directory: '{}': {}", resourcesDir.getName(), ex);
throw new RuntimeException(msg, ex);
}
File etcTarGz = new File(stagingDir, "etc.tar.gz");
File sdcPropertiesFile;
File bootstrapJar = getBootstrapMainJar(bootstrapDir, BOOTSTRAP_MAIN_JAR_PATTERN);
File clusterBootstrapJar;
String mesosHostingJarDir = null;
String mesosURL = null;
Pattern clusterBootstrapJarFile = findClusterBootstrapJar(executionMode, pipelineConfiguration, stageLibrary);
clusterBootstrapJar = getBootstrapClusterJar(bootstrapDir, clusterBootstrapJarFile);
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
String topic = sourceConfigs.get(TOPIC);
String pipelineName = sourceInfo.get(ClusterModeConstants.CLUSTER_PIPELINE_NAME);
mesosHostingJarDir = MESOS_HOSTING_DIR_PARENT + File.separatorChar + getSha256(getMesosHostingDir(topic, pipelineName));
mesosURL = runtimeInfo.getBaseHttpUrl() + File.separatorChar + mesosHostingJarDir + File.separatorChar + clusterBootstrapJar.getName();
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
jarsToShip.add(getBootstrapClusterJar(bootstrapDir, CLUSTER_BOOTSTRAP_API_JAR_PATTERN).getAbsolutePath());
}
try {
etcDir = createDirectoryClone(etcDir, "etc", stagingDir);
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
try (InputStream clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties")) {
File log4jProperty = new File(etcDir, runtimeInfo.getLog4jPropertiesFileName());
if (!log4jProperty.isFile()) {
throw new IllegalStateException(Utils.format("Log4j config file doesn't exist: '{}'", log4jProperty.getAbsolutePath()));
}
LOG.info("Copying log4j properties for mesos cluster mode");
FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperty);
}
}
PipelineInfo pipelineInfo = Utils.checkNotNull(pipelineConfiguration.getInfo(), "Pipeline Info");
String pipelineName = pipelineInfo.getPipelineId();
File rootDataDir = new File(etcDir, "data");
File pipelineBaseDir = new File(rootDataDir, PipelineDirectoryUtil.PIPELINE_INFO_BASE_DIR);
File pipelineDir = new File(pipelineBaseDir, PipelineUtils.escapedPipelineName(pipelineName));
if (!pipelineDir.exists()) {
if (!pipelineDir.mkdirs()) {
throw new RuntimeException("Failed to create pipeline directory " + pipelineDir.getPath());
}
}
File pipelineFile = new File(pipelineDir, FilePipelineStoreTask.PIPELINE_FILE);
ObjectMapperFactory.getOneLine().writeValue(pipelineFile, BeanHelper.wrapPipelineConfiguration(pipelineConfiguration));
File infoFile = new File(pipelineDir, FilePipelineStoreTask.INFO_FILE);
ObjectMapperFactory.getOneLine().writeValue(infoFile, BeanHelper.wrapPipelineInfo(pipelineInfo));
Utils.checkNotNull(ruleDefinitions, "ruleDefinitions");
File rulesFile = new File(pipelineDir, FilePipelineStoreTask.RULES_FILE);
ObjectMapperFactory.getOneLine().writeValue(rulesFile, BeanHelper.wrapRuleDefinitions(ruleDefinitions));
if (null != acl) {
// acl could be null if permissions is not enabled
File aclFile = new File(pipelineDir, FileAclStoreTask.ACL_FILE);
ObjectMapperFactory.getOneLine().writeValue(aclFile, AclDtoJsonMapper.INSTANCE.toAclJson(acl));
}
sdcPropertiesFile = new File(etcDir, "sdc.properties");
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
String hdfsS3ConfDirValue = PipelineBeanCreator.get().getHdfsS3ConfDirectory(pipelineConfiguration);
if (hdfsS3ConfDirValue != null && !hdfsS3ConfDirValue.isEmpty()) {
File hdfsS3ConfDir = new File(resourcesDir, hdfsS3ConfDirValue).getAbsoluteFile();
if (!hdfsS3ConfDir.exists()) {
String msg = Utils.format("HDFS/S3 Checkpoint Configuration Directory '{}' doesn't exist", hdfsS3ConfDir.getPath());
throw new IllegalArgumentException(msg);
} else {
File coreSite = new File(hdfsS3ConfDir, "core-site.xml");
if (!coreSite.exists()) {
String msg = Utils.format("HDFS/S3 Checkpoint Configuration file core-site.xml '{}' doesn't exist", coreSite.getPath());
throw new IllegalStateException(msg);
}
sourceConfigs.put("hdfsS3ConfDir", hdfsS3ConfDirValue);
}
} else {
throw new IllegalStateException("HDFS/S3 Checkpoint configuration directory is required");
}
}
rewriteProperties(sdcPropertiesFile, etcDir, sourceConfigs, sourceInfo, clusterToken, Optional.ofNullable(mesosURL));
TarFileCreator.createTarGz(etcDir, etcTarGz);
} catch (RuntimeException ex) {
String msg = errorString("serializing etc directory: {}", ex);
throw new RuntimeException(msg, ex);
}
File log4jProperties = new File(stagingDir, "log4j.properties");
InputStream clusterLog4jProperties = null;
try {
if (executionMode == ExecutionMode.CLUSTER_BATCH) {
clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-mr-log4j.properties"), "Cluster Log4J Properties");
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties");
}
if (clusterLog4jProperties != null) {
FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperties);
}
} catch (IOException ex) {
String msg = errorString("copying log4j configuration: {}", ex);
throw new RuntimeException(msg, ex);
} finally {
if (clusterLog4jProperties != null) {
IOUtils.closeQuietly(clusterLog4jProperties);
}
}
addKerberosConfiguration(environment);
errors.clear();
PipelineConfigBean config = PipelineBeanCreator.get().create(pipelineConfiguration, errors);
Utils.checkArgument(config != null, Utils.formatL("Invalid pipeline configuration: {}", errors));
String numExecutors = sourceInfo.get(ClusterModeConstants.NUM_EXECUTORS_KEY);
List<String> args;
File hostingDir = null;
if (executionMode == ExecutionMode.CLUSTER_BATCH) {
LOG.info("Submitting MapReduce Job");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MAPREDUCE);
args = generateMRArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory), config.clusterSlaveJavaOpts, libsTarGz.getAbsolutePath(), etcTarGz.getAbsolutePath(), resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(), bootstrapJar.getAbsolutePath(), sdcPropertiesFile.getAbsolutePath(), clusterBootstrapJar.getAbsolutePath(), jarsToShip);
} else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
LOG.info("Submitting Spark Job on Yarn");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
args = generateSparkArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory), config.clusterSlaveJavaOpts, numExecutors, libsTarGz.getAbsolutePath(), etcTarGz.getAbsolutePath(), resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(), bootstrapJar.getAbsolutePath(), jarsToShip, clusterBootstrapJar.getAbsolutePath());
} else if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
LOG.info("Submitting Spark Job on Mesos");
environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
environment.put(STAGING_DIR, stagingDir.getAbsolutePath());
environment.put(MESOS_UBER_JAR_PATH, clusterBootstrapJar.getAbsolutePath());
environment.put(MESOS_UBER_JAR, clusterBootstrapJar.getName());
environment.put(ETC_TAR_ARCHIVE, "etc.tar.gz");
environment.put(LIBS_TAR_ARCHIVE, "libs.tar.gz");
environment.put(RESOURCES_TAR_ARCHIVE, "resources.tar.gz");
hostingDir = new File(runtimeInfo.getDataDir(), Utils.checkNotNull(mesosHostingJarDir, "mesos jar dir cannot be null"));
if (!hostingDir.mkdirs()) {
throw new RuntimeException("Couldn't create hosting dir: " + hostingDir.toString());
}
environment.put(MESOS_HOSTING_JAR_DIR, hostingDir.getAbsolutePath());
args = generateMesosArgs(clusterManager.getAbsolutePath(), config.mesosDispatcherURL, Utils.checkNotNull(mesosURL, "mesos jar url cannot be null"));
} else {
throw new IllegalStateException(Utils.format("Incorrect execution mode: {}", executionMode));
}
SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), outputDir, args);
LOG.info("Starting: " + process);
try {
process.start(environment);
long start = System.currentTimeMillis();
Set<String> applicationIds = new HashSet<>();
while (true) {
long elapsedSeconds = TimeUnit.SECONDS.convert(System.currentTimeMillis() - start, TimeUnit.MILLISECONDS);
LOG.debug("Waiting for application id, elapsed seconds: " + elapsedSeconds);
if (applicationIds.size() > 1) {
logOutput("unknown", process);
throw new IllegalStateException(errorString("Found more than one application id: {}", applicationIds));
} else if (!applicationIds.isEmpty()) {
String appId = applicationIds.iterator().next();
logOutput(appId, process);
ApplicationState applicationState = new ApplicationState();
applicationState.setId(appId);
applicationState.setSdcToken(clusterToken);
if (mesosHostingJarDir != null) {
applicationState.setDirId(mesosHostingJarDir);
}
return applicationState;
}
if (!ThreadUtil.sleep(1000)) {
if (hostingDir != null) {
FileUtils.deleteQuietly(hostingDir);
}
throw new IllegalStateException("Interrupted while waiting for pipeline to start");
}
List<String> lines = new ArrayList<>();
lines.addAll(process.getOutput());
lines.addAll(process.getError());
Matcher m;
for (String line : lines) {
if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
m = MESOS_DRIVER_ID_REGEX.matcher(line);
} else {
m = YARN_APPLICATION_ID_REGEX.matcher(line);
}
if (m.find()) {
LOG.info("Found application id " + m.group(1));
applicationIds.add(m.group(1));
}
m = NO_VALID_CREDENTIALS.matcher(line);
if (m.find()) {
LOG.info("Kerberos Error found on line: " + line);
String msg = "Kerberos Error: " + m.group(1);
throw new IOException(msg);
}
}
if (elapsedSeconds > timeToWaitForFailure) {
logOutput("unknown", process);
String msg = Utils.format("Timed out after waiting {} seconds for for cluster application to start. " + "Submit command {} alive.", elapsedSeconds, (process.isAlive() ? "is" : "is not"));
if (hostingDir != null) {
FileUtils.deleteQuietly(hostingDir);
}
throw new IllegalStateException(msg);
}
}
} finally {
process.cleanup();
}
}