/*
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2015
*/
package parallel;
import static com.ibm.streamsx.topology.file.FileStreams.directoryWatcher;
import static com.ibm.streamsx.topology.file.FileStreams.textFileReader;
import java.io.ObjectStreamException;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.streamsx.topology.TStream;
import com.ibm.streamsx.topology.Topology;
import com.ibm.streamsx.topology.context.StreamsContextFactory;
import com.ibm.streamsx.topology.function.Function;
import com.ibm.streamsx.topology.function.Predicate;
public class ParallelRegexGrep {
static final Logger trace = Logger.getLogger("samples");
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
String contextType = args[0];
String directory = args[1];
final Pattern pattern = Pattern.compile(args[2]);
// Define the topology
Topology topology = new Topology("ParallelRegexGrep");
// All streams with tuples that are Java String objects
TStream<String> files = directoryWatcher(topology, directory);
// Create a stream of lines from each file.
TStream<String> lines = textFileReader(files);
// Count the total number of lines before they are split between
// different parallel channels.
TStream<String> lines_counter = lines.transform(
new Function<String, String>() {
private int numSentStrings = 0;
@Override
public String apply(String v1) {
trace.info("Have sent " + (++numSentStrings)
+ "to be filtered.");
return v1;
}
});
// Parallelize the Stream.
// Since there are 5 channels of the stream, the approximate number of
// lines sent to each channel should be numSentStrings/5. This can be
// verified by comparing the outputs of the lines_counter stream to that
// of the parallel channels.
TStream<String> lines_parallel = lines_counter.parallel(5);
// Filter for the matched string, and print the number strings that have
// been tested. This is happening in parallel.
TStream<String> filtered_parallel = lines_parallel
.filter(new Predicate<String>() {
private int numReceivedStrings = 0;
@Override
public boolean test(String v1) {
trace.info("Have received " + (++numReceivedStrings)
+ "strings on this parallel channel.");
// Pass the line through if it matches the
// regular expression pattern
return matcher.reset(v1).matches();
}
transient Matcher matcher;
private Object readResolve() throws ObjectStreamException {
matcher = pattern.matcher("");
return this;
}
});
// Join the results of each parallel filter into one stream,
// merging the parallel streams back into one stream.
TStream<String> filtered_condensed = filtered_parallel.endParallel();
// Print the combined results
filtered_condensed.print();
// Execute the topology
StreamsContextFactory.getStreamsContext(contextType).submit(topology);
}
}