Java Examples for com.streamsets.datacollector.store.PipelineInfo

The following java examples will help you to understand the usage of com.streamsets.datacollector.store.PipelineInfo. These source code samples are taken from different open source projects.

Example 1
Project: datacollector-master  File: ClusterProviderImpl.java View source code
@SuppressWarnings("unchecked")
private ApplicationState startPipelineInternal(SystemProcessFactory systemProcessFactory, File clusterManager, File outputDir, Map<String, String> environment, Map<String, String> sourceInfo, PipelineConfiguration pipelineConfiguration, StageLibraryTask stageLibrary, File etcDir, File resourcesDir, File staticWebDir, File bootstrapDir, URLClassLoader apiCL, URLClassLoader containerCL, long timeToWaitForFailure, File stagingDir, RuleDefinitions ruleDefinitions, Acl acl) throws IOException, TimeoutException {
    environment = Maps.newHashMap(environment);
    // create libs.tar.gz file for pipeline
    Map<String, List<URL>> streamsetsLibsCl = new HashMap<>();
    Map<String, List<URL>> userLibsCL = new HashMap<>();
    Map<String, String> sourceConfigs = new HashMap<>();
    ImmutableList.Builder<StageConfiguration> pipelineConfigurations = ImmutableList.builder();
    // order is important here as we don't want error stage
    // configs overriding source stage configs
    String clusterToken = UUID.randomUUID().toString();
    Set<String> jarsToShip = new LinkedHashSet<>();
    List<Issue> errors = new ArrayList<>();
    PipelineBean pipelineBean = PipelineBeanCreator.get().create(false, stageLibrary, pipelineConfiguration, errors);
    if (!errors.isEmpty()) {
        String msg = Utils.format("Found '{}' configuration errors: {}", errors.size(), errors);
        throw new IllegalStateException(msg);
    }
    pipelineConfigurations.add(pipelineBean.getErrorStage().getConfiguration());
    StageBean statsStage = pipelineBean.getStatsAggregatorStage();
    // statsStage is null for pre 1.3 pipelines
    if (statsStage != null) {
        pipelineConfigurations.add(statsStage.getConfiguration());
    }
    pipelineConfigurations.add(pipelineBean.getOrigin().getConfiguration());
    for (StageBean stageBean : pipelineBean.getPipelineStageBeans().getStages()) {
        pipelineConfigurations.add(stageBean.getConfiguration());
    }
    ExecutionMode executionMode = ExecutionMode.STANDALONE;
    for (StageConfiguration stageConf : pipelineConfigurations.build()) {
        StageDefinition stageDef = stageLibrary.getStage(stageConf.getLibrary(), stageConf.getStageName(), false);
        if (stageConf.getInputLanes().isEmpty()) {
            for (Config conf : stageConf.getConfiguration()) {
                if (conf.getValue() != null) {
                    Object value = conf.getValue();
                    if (value instanceof List) {
                        List values = (List) value;
                        if (values.isEmpty()) {
                            LOG.debug("Conf value for " + conf.getName() + " is empty");
                        } else {
                            Object first = values.get(0);
                            if (canCastToString(first)) {
                                sourceConfigs.put(conf.getName(), Joiner.on(",").join(values));
                            } else if (first instanceof Map) {
                                addToSourceConfigs(sourceConfigs, (List<Map<String, Object>>) values);
                            } else {
                                LOG.info("List is of type '{}' which cannot be converted to property value.", first.getClass().getName());
                            }
                        }
                    } else if (canCastToString(conf.getValue())) {
                        LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
                        sourceConfigs.put(conf.getName(), String.valueOf(value));
                    } else if (value instanceof Enum) {
                        value = ((Enum) value).name();
                        LOG.debug("Adding to source configs " + conf.getName() + "=" + value);
                        sourceConfigs.put(conf.getName(), String.valueOf(value));
                    } else {
                        LOG.warn("Conf value is of unknown type " + conf.getValue());
                    }
                }
            }
            executionMode = PipelineBeanCreator.get().getExecutionMode(pipelineConfiguration, new ArrayList<Issue>());
            List<String> libJarsRegex = stageDef.getLibJarsRegex();
            if (!libJarsRegex.isEmpty()) {
                for (URL jarUrl : ((URLClassLoader) stageDef.getStageClassLoader()).getURLs()) {
                    File jarFile = new File(jarUrl.getPath());
                    for (String libJar : libJarsRegex) {
                        Pattern pattern = Pattern.compile(libJar);
                        Matcher matcher = pattern.matcher(jarFile.getName());
                        if (matcher.matches()) {
                            jarsToShip.add(jarFile.getAbsolutePath());
                        }
                    }
                }
            }
        }
        String type = StageLibraryUtils.getLibraryType(stageDef.getStageClassLoader());
        String name = StageLibraryUtils.getLibraryName(stageDef.getStageClassLoader());
        if (ClusterModeConstants.STREAMSETS_LIBS.equals(type)) {
            streamsetsLibsCl.put(name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
        } else if (ClusterModeConstants.USER_LIBS.equals(type)) {
            userLibsCL.put(name, findJars(name, (URLClassLoader) stageDef.getStageClassLoader(), stageDef.getClassName()));
        } else {
            throw new IllegalStateException(Utils.format("Error unknown stage library type: '{}'", type));
        }
        // Add all jars of stagelib to --jars. We only really need stuff from the extras directory.
        if (stageDef.getClassName().equals(SPARK_PROCESSOR_STAGE)) {
            LOG.info("Spark processor found in pipeline, adding to spark-submit");
            File extras = new File(System.getenv("STREAMSETS_LIBRARIES_EXTRA_DIR"));
            LOG.info("Found extras dir: " + extras.toString());
            File stageLibExtras = new File(extras.toString() + "/" + stageConf.getLibrary() + "/" + "lib");
            LOG.info("StageLib Extras dir: " + stageLibExtras.toString());
            File[] extraJarsForStageLib = stageLibExtras.listFiles();
            if (extraJarsForStageLib != null) {
                stream(extraJarsForStageLib).map(File::toString).forEach(jarsToShip::add);
            }
            addJarsToJarsList((URLClassLoader) stageDef.getStageClassLoader(), jarsToShip, "streamsets-datacollector-spark-api-[0-9]+.*");
        }
    }
    if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING || executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
        LOG.info("Execution Mode is CLUSTER_STREAMING. Adding container jar and API jar to spark-submit");
        addJarsToJarsList(containerCL, jarsToShip, "streamsets-datacollector-container-[0-9]+.*");
        addJarsToJarsList(apiCL, jarsToShip, "streamsets-datacollector-api-[0-9]+.*");
    }
    LOG.info("stagingDir = '{}'", stagingDir);
    LOG.info("bootstrapDir = '{}'", bootstrapDir);
    LOG.info("etcDir = '{}'", etcDir);
    LOG.info("resourcesDir = '{}'", resourcesDir);
    LOG.info("staticWebDir = '{}'", staticWebDir);
    Utils.checkState(staticWebDir.isDirectory(), Utils.format("Expected '{}' to be a directory", staticWebDir));
    File libsTarGz = new File(stagingDir, "libs.tar.gz");
    try {
        TarFileCreator.createLibsTarGz(findJars("api", apiCL, null), findJars("container", containerCL, null), streamsetsLibsCl, userLibsCL, staticWebDir, libsTarGz);
    } catch (Exception ex) {
        String msg = errorString("Serializing classpath: '{}'", ex);
        throw new RuntimeException(msg, ex);
    }
    File resourcesTarGz = new File(stagingDir, "resources.tar.gz");
    try {
        resourcesDir = createDirectoryClone(resourcesDir, "resources", stagingDir);
        TarFileCreator.createTarGz(resourcesDir, resourcesTarGz);
    } catch (Exception ex) {
        String msg = errorString("Serializing resources directory: '{}': {}", resourcesDir.getName(), ex);
        throw new RuntimeException(msg, ex);
    }
    File etcTarGz = new File(stagingDir, "etc.tar.gz");
    File sdcPropertiesFile;
    File bootstrapJar = getBootstrapMainJar(bootstrapDir, BOOTSTRAP_MAIN_JAR_PATTERN);
    File clusterBootstrapJar;
    String mesosHostingJarDir = null;
    String mesosURL = null;
    Pattern clusterBootstrapJarFile = findClusterBootstrapJar(executionMode, pipelineConfiguration, stageLibrary);
    clusterBootstrapJar = getBootstrapClusterJar(bootstrapDir, clusterBootstrapJarFile);
    if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
        String topic = sourceConfigs.get(TOPIC);
        String pipelineName = sourceInfo.get(ClusterModeConstants.CLUSTER_PIPELINE_NAME);
        mesosHostingJarDir = MESOS_HOSTING_DIR_PARENT + File.separatorChar + getSha256(getMesosHostingDir(topic, pipelineName));
        mesosURL = runtimeInfo.getBaseHttpUrl() + File.separatorChar + mesosHostingJarDir + File.separatorChar + clusterBootstrapJar.getName();
    } else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
        jarsToShip.add(getBootstrapClusterJar(bootstrapDir, CLUSTER_BOOTSTRAP_API_JAR_PATTERN).getAbsolutePath());
    }
    try {
        etcDir = createDirectoryClone(etcDir, "etc", stagingDir);
        if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            try (InputStream clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties")) {
                File log4jProperty = new File(etcDir, runtimeInfo.getLog4jPropertiesFileName());
                if (!log4jProperty.isFile()) {
                    throw new IllegalStateException(Utils.format("Log4j config file doesn't exist: '{}'", log4jProperty.getAbsolutePath()));
                }
                LOG.info("Copying log4j properties for mesos cluster mode");
                FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperty);
            }
        }
        PipelineInfo pipelineInfo = Utils.checkNotNull(pipelineConfiguration.getInfo(), "Pipeline Info");
        String pipelineName = pipelineInfo.getPipelineId();
        File rootDataDir = new File(etcDir, "data");
        File pipelineBaseDir = new File(rootDataDir, PipelineDirectoryUtil.PIPELINE_INFO_BASE_DIR);
        File pipelineDir = new File(pipelineBaseDir, PipelineUtils.escapedPipelineName(pipelineName));
        if (!pipelineDir.exists()) {
            if (!pipelineDir.mkdirs()) {
                throw new RuntimeException("Failed to create pipeline directory " + pipelineDir.getPath());
            }
        }
        File pipelineFile = new File(pipelineDir, FilePipelineStoreTask.PIPELINE_FILE);
        ObjectMapperFactory.getOneLine().writeValue(pipelineFile, BeanHelper.wrapPipelineConfiguration(pipelineConfiguration));
        File infoFile = new File(pipelineDir, FilePipelineStoreTask.INFO_FILE);
        ObjectMapperFactory.getOneLine().writeValue(infoFile, BeanHelper.wrapPipelineInfo(pipelineInfo));
        Utils.checkNotNull(ruleDefinitions, "ruleDefinitions");
        File rulesFile = new File(pipelineDir, FilePipelineStoreTask.RULES_FILE);
        ObjectMapperFactory.getOneLine().writeValue(rulesFile, BeanHelper.wrapRuleDefinitions(ruleDefinitions));
        if (null != acl) {
            // acl could be null if permissions is not enabled
            File aclFile = new File(pipelineDir, FileAclStoreTask.ACL_FILE);
            ObjectMapperFactory.getOneLine().writeValue(aclFile, AclDtoJsonMapper.INSTANCE.toAclJson(acl));
        }
        sdcPropertiesFile = new File(etcDir, "sdc.properties");
        if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
            String hdfsS3ConfDirValue = PipelineBeanCreator.get().getHdfsS3ConfDirectory(pipelineConfiguration);
            if (hdfsS3ConfDirValue != null && !hdfsS3ConfDirValue.isEmpty()) {
                File hdfsS3ConfDir = new File(resourcesDir, hdfsS3ConfDirValue).getAbsoluteFile();
                if (!hdfsS3ConfDir.exists()) {
                    String msg = Utils.format("HDFS/S3 Checkpoint Configuration Directory '{}' doesn't exist", hdfsS3ConfDir.getPath());
                    throw new IllegalArgumentException(msg);
                } else {
                    File coreSite = new File(hdfsS3ConfDir, "core-site.xml");
                    if (!coreSite.exists()) {
                        String msg = Utils.format("HDFS/S3 Checkpoint Configuration file core-site.xml '{}' doesn't exist", coreSite.getPath());
                        throw new IllegalStateException(msg);
                    }
                    sourceConfigs.put("hdfsS3ConfDir", hdfsS3ConfDirValue);
                }
            } else {
                throw new IllegalStateException("HDFS/S3 Checkpoint configuration directory is required");
            }
        }
        rewriteProperties(sdcPropertiesFile, etcDir, sourceConfigs, sourceInfo, clusterToken, Optional.ofNullable(mesosURL));
        TarFileCreator.createTarGz(etcDir, etcTarGz);
    } catch (RuntimeException ex) {
        String msg = errorString("serializing etc directory: {}", ex);
        throw new RuntimeException(msg, ex);
    }
    File log4jProperties = new File(stagingDir, "log4j.properties");
    InputStream clusterLog4jProperties = null;
    try {
        if (executionMode == ExecutionMode.CLUSTER_BATCH) {
            clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-mr-log4j.properties"), "Cluster Log4J Properties");
        } else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
            clusterLog4jProperties = Utils.checkNotNull(getClass().getResourceAsStream("/cluster-spark-log4j.properties"), "Cluster Log4J Properties");
        }
        if (clusterLog4jProperties != null) {
            FileUtils.copyInputStreamToFile(clusterLog4jProperties, log4jProperties);
        }
    } catch (IOException ex) {
        String msg = errorString("copying log4j configuration: {}", ex);
        throw new RuntimeException(msg, ex);
    } finally {
        if (clusterLog4jProperties != null) {
            IOUtils.closeQuietly(clusterLog4jProperties);
        }
    }
    addKerberosConfiguration(environment);
    errors.clear();
    PipelineConfigBean config = PipelineBeanCreator.get().create(pipelineConfiguration, errors);
    Utils.checkArgument(config != null, Utils.formatL("Invalid pipeline configuration: {}", errors));
    String numExecutors = sourceInfo.get(ClusterModeConstants.NUM_EXECUTORS_KEY);
    List<String> args;
    File hostingDir = null;
    if (executionMode == ExecutionMode.CLUSTER_BATCH) {
        LOG.info("Submitting MapReduce Job");
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MAPREDUCE);
        args = generateMRArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory), config.clusterSlaveJavaOpts, libsTarGz.getAbsolutePath(), etcTarGz.getAbsolutePath(), resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(), bootstrapJar.getAbsolutePath(), sdcPropertiesFile.getAbsolutePath(), clusterBootstrapJar.getAbsolutePath(), jarsToShip);
    } else if (executionMode == ExecutionMode.CLUSTER_YARN_STREAMING) {
        LOG.info("Submitting Spark Job on Yarn");
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_YARN);
        args = generateSparkArgs(clusterManager.getAbsolutePath(), String.valueOf(config.clusterSlaveMemory), config.clusterSlaveJavaOpts, numExecutors, libsTarGz.getAbsolutePath(), etcTarGz.getAbsolutePath(), resourcesTarGz.getAbsolutePath(), log4jProperties.getAbsolutePath(), bootstrapJar.getAbsolutePath(), jarsToShip, clusterBootstrapJar.getAbsolutePath());
    } else if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
        LOG.info("Submitting Spark Job on Mesos");
        environment.put(CLUSTER_TYPE, CLUSTER_TYPE_MESOS);
        environment.put(STAGING_DIR, stagingDir.getAbsolutePath());
        environment.put(MESOS_UBER_JAR_PATH, clusterBootstrapJar.getAbsolutePath());
        environment.put(MESOS_UBER_JAR, clusterBootstrapJar.getName());
        environment.put(ETC_TAR_ARCHIVE, "etc.tar.gz");
        environment.put(LIBS_TAR_ARCHIVE, "libs.tar.gz");
        environment.put(RESOURCES_TAR_ARCHIVE, "resources.tar.gz");
        hostingDir = new File(runtimeInfo.getDataDir(), Utils.checkNotNull(mesosHostingJarDir, "mesos jar dir cannot be null"));
        if (!hostingDir.mkdirs()) {
            throw new RuntimeException("Couldn't create hosting dir: " + hostingDir.toString());
        }
        environment.put(MESOS_HOSTING_JAR_DIR, hostingDir.getAbsolutePath());
        args = generateMesosArgs(clusterManager.getAbsolutePath(), config.mesosDispatcherURL, Utils.checkNotNull(mesosURL, "mesos jar url cannot be null"));
    } else {
        throw new IllegalStateException(Utils.format("Incorrect execution mode: {}", executionMode));
    }
    SystemProcess process = systemProcessFactory.create(ClusterProviderImpl.class.getSimpleName(), outputDir, args);
    LOG.info("Starting: " + process);
    try {
        process.start(environment);
        long start = System.currentTimeMillis();
        Set<String> applicationIds = new HashSet<>();
        while (true) {
            long elapsedSeconds = TimeUnit.SECONDS.convert(System.currentTimeMillis() - start, TimeUnit.MILLISECONDS);
            LOG.debug("Waiting for application id, elapsed seconds: " + elapsedSeconds);
            if (applicationIds.size() > 1) {
                logOutput("unknown", process);
                throw new IllegalStateException(errorString("Found more than one application id: {}", applicationIds));
            } else if (!applicationIds.isEmpty()) {
                String appId = applicationIds.iterator().next();
                logOutput(appId, process);
                ApplicationState applicationState = new ApplicationState();
                applicationState.setId(appId);
                applicationState.setSdcToken(clusterToken);
                if (mesosHostingJarDir != null) {
                    applicationState.setDirId(mesosHostingJarDir);
                }
                return applicationState;
            }
            if (!ThreadUtil.sleep(1000)) {
                if (hostingDir != null) {
                    FileUtils.deleteQuietly(hostingDir);
                }
                throw new IllegalStateException("Interrupted while waiting for pipeline to start");
            }
            List<String> lines = new ArrayList<>();
            lines.addAll(process.getOutput());
            lines.addAll(process.getError());
            Matcher m;
            for (String line : lines) {
                if (executionMode == ExecutionMode.CLUSTER_MESOS_STREAMING) {
                    m = MESOS_DRIVER_ID_REGEX.matcher(line);
                } else {
                    m = YARN_APPLICATION_ID_REGEX.matcher(line);
                }
                if (m.find()) {
                    LOG.info("Found application id " + m.group(1));
                    applicationIds.add(m.group(1));
                }
                m = NO_VALID_CREDENTIALS.matcher(line);
                if (m.find()) {
                    LOG.info("Kerberos Error found on line: " + line);
                    String msg = "Kerberos Error: " + m.group(1);
                    throw new IOException(msg);
                }
            }
            if (elapsedSeconds > timeToWaitForFailure) {
                logOutput("unknown", process);
                String msg = Utils.format("Timed out after waiting {} seconds for for cluster application to start. " + "Submit command {} alive.", elapsedSeconds, (process.isAlive() ? "is" : "is not"));
                if (hostingDir != null) {
                    FileUtils.deleteQuietly(hostingDir);
                }
                throw new IllegalStateException(msg);
            }
        }
    } finally {
        process.cleanup();
    }
}