/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.dstream.nifi;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Stream;
import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.logging.ProcessorLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Processor;
import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import io.dstream.DStream;
import io.dstream.DStreamConstants;
import io.dstream.utils.Assert;
/**
* Base implementation of the {@link Processor} to support {@link DStream}
* applications.
*
* It handles most common functionality required by the {@link DStream} and
* sub-classes are only required to implement {@link #getDStream(String)} method,
* which returns an instance of an executable {@link DStream}.<br>
* <p>
* This {@link Processor} is an <a href="http://www.enterpriseintegrationpatterns.com/patterns/messaging/EventDrivenConsumer.html">Event Driven Consumer</a>
* and it's triggered by an arrival of the execution configuration file (e.g., WordCount.cfg).
* Once configuration file is arrived it's added to the current class-path and an attempt is made to get an
* instance of a {@link DStream} (see {@link #getDStream(String)}) for a specific execution
* name - determined based on the name of the configuration file minus extension
* (e.g., WordCount.cfg -> 'WordCount'). <br>
* <p>
* <i>Basically, the {@link #getDStream(String)} method allows you to host multiple {@link DStream}
* implementations within a single NAR bundle essentially grouping them based on some criteria.</i>
* <p>
* Once the executable {@link DStream} is determined, it's executed and its output path
* is written as an attribute to the downstream {@link FlowFile} to ensure downstream components
* will have access to the results of the execution. For additional convenience there is also
* a {@link #postProcessResults(Stream)} method that could be implemented by a sub-class
* if there is a need to gain access to the results before they are sent downstream (e.g., testing)<br>
* <p>
* This {@link Processor} defines a single configuration property - \"Execution completion timeout (milliseconds)\"
* with default value of 0. This property indicates how long to wait for completion of {@link DStream} execution.
* While it has a default value, it is <b>highly recommended</b> to set it to a more realistic value indicating
* how long are you willing to wait for the result completion. With default value it will wait indefinitely.
*/
@EventDriven
public abstract class AbstractDStreamProcessor extends AbstractProcessor {
public static final Relationship OUTPUT = new Relationship.Builder().name("success")
.description("Upon successfull completion of DStream execution, its output path is forwarded to success").build();
public static final PropertyDescriptor EXECUTION_COMPLETION_TIMEOUT = new PropertyDescriptor.Builder()
.name("Execution completion timeout (milliseconds)")
.description("Indicates how long to wait for completion of DStream execution. Defaults to 0 (wait indefinitely).")
.addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
.defaultValue("0")
.required(true)
.build();
private volatile Set<Relationship> relationships;
private volatile List<PropertyDescriptor> properties;
/**
*
*/
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final ProcessorLog log = this.getLogger();
long executionCompletionTimeout = Long.parseLong(context.getProperty(EXECUTION_COMPLETION_TIMEOUT).getValue());
FlowFile flowFile = session.get();
if (flowFile != null){
try {
String configurationName = flowFile.getAttribute("filename");
Assert.isTrue(configurationName.endsWith(".cfg"), "Received invalid configuration file '" +
configurationName + "'. DStream configuration file must end with '.cfg'.");
log.info("Recieved configuration '" + configurationName + "'");
AtomicReference<String> outputPathRef = new AtomicReference<String>();
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream confFileInputStream) throws IOException {
outputPathRef.set(installConfiguration(configurationName, confFileInputStream));
}
});
String executionName = configurationName.split("\\.")[0];
DStream<?> dstream = this.getDStream(executionName);
if (dstream != null){
log.info("Executing DStream for '" + executionName + "'");
this.postProcessResults(this.executeDStream(dstream, executionName, executionCompletionTimeout));
FlowFile resultFlowFile = session.create();
resultFlowFile = session.putAttribute(resultFlowFile, CoreAttributes.FILENAME.key(), outputPathRef.get());
session.getProvenanceReporter().receive(resultFlowFile, outputPathRef.get());
session.transfer(resultFlowFile, OUTPUT);
} else {
log.warn("Failed to locate DStream for execution '" + executionName + "'"
+ ". Nothing was executed. Possible reasons: " + this.getClass().getSimpleName()
+ " may not have provided a DStream for '" + executionName + "'");
}
}
catch (Exception e) {
throw new IllegalStateException("Failed DStream execution with unexpected exception ", e);
}
finally {
session.remove(flowFile);
session.commit();
}
}
}
/**
*
*/
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
/**
*
*/
@Override
protected void init(final ProcessorInitializationContext context) {
Set<Relationship> relationships = new HashSet<>();
relationships.add(OUTPUT);
this.relationships = Collections.unmodifiableSet(relationships);
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(EXECUTION_COMPLETION_TIMEOUT);
this.properties = Collections.unmodifiableList(properties);
}
/**
*
*/
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return properties;
}
/**
* Returns an instance of the {@link DStream} for a given execution name.
* May return 'null'.
* @param executionName the execution name used when invoking {@link DStream#executeAs(String)}
* operation.
*/
protected abstract <T> DStream<T> getDStream(String executionName);
/**
* Gives you a handle to execution result partitions (see {@link DStream#executeAs(String)} )<br>
* Typically used for testing/debugging
* @param resultPartitions {@link Stream} of {@link Stream}s where each represents an individual partition.
*/
protected <T> void postProcessResults(Stream<Stream<T>> resultPartitions) {
// default NOOP
}
/**
*
* @param dstream
*/
@SuppressWarnings("unchecked")
private <T> Stream<Stream<T>> executeDStream(DStream<?> dstream, String executionName, long executionCompletionTimeout) {
ProcessorLog log = this.getLogger();
Future<?> resultFuture = dstream.executeAs(executionName);
try {
if (executionCompletionTimeout > 0){
return (Stream<Stream<T>>) resultFuture.get(executionCompletionTimeout, TimeUnit.MILLISECONDS);
}
else {
log.warn("Waiting for completion of '" + executionName + "' indefinitely. "
+ "Consider setting 'Execution completion timeout' property of your processor"
+ "when configured via UI." );
return (Stream<Stream<T>>) resultFuture.get();
}
}
catch (InterruptedException | ExecutionException | TimeoutException e) {
if (e instanceof InterruptedException){
Thread.currentThread().interrupt();
}
throw new IllegalStateException("Failed while waiting for execution to complete", e);
}
}
/**
* Will generate execution configuration and add it to the classpath
* @param context
*/
private String installConfiguration(String configurationName, InputStream confFileInputStream){
String outputPath;
try {
File confDir = new File(System.getProperty("java.io.tmpdir") + "/dstream_" + UUID.randomUUID());
confDir.mkdirs();
File executionConfig = new File(confDir, configurationName);
executionConfig.deleteOnExit();
FileOutputStream confFileOs = new FileOutputStream(executionConfig);
Properties configurationProperties = new Properties();
configurationProperties.load(confFileInputStream);
configurationProperties.store(confFileOs, configurationName + " configuration");
this.addToClassPath(confDir);
outputPath = configurationProperties.containsKey(DStreamConstants.OUTPUT)
? configurationProperties.getProperty(DStreamConstants.OUTPUT)
: configurationName.split("\\.")[0] + "/out";
}
catch (Exception e) {
throw new IllegalStateException("Failed to generate execution config", e);
}
return outputPath;
}
/**
*
*/
private void addToClassPath(File configurationDir){
try {
URLClassLoader dstreamCl = URLClassLoader
.newInstance(new URL[]{configurationDir.toURI().toURL()}, Thread.currentThread().getContextClassLoader());
Thread.currentThread().setContextClassLoader(dstreamCl);
}
catch (Exception e) {
throw new IllegalStateException("Failed to update classpath with path '" + configurationDir + "'.", e);
}
}
}