/*
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2015
*/
package simple;
import java.io.ObjectStreamException;
import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ibm.streamsx.topology.TStream;
import com.ibm.streamsx.topology.Topology;
import com.ibm.streamsx.topology.context.StreamsContextFactory;
import com.ibm.streamsx.topology.file.FileStreams;
import com.ibm.streamsx.topology.function.Predicate;
/**
* Sample continuous (streaming) regular expression grep topology application.
* This is a variant of the {@link Grep} application that demonstrates
* filtering using Java functional programming.
* This Java application builds a
* simple topology that watches a directory for files, reads each file and
* output lines that match a regular expression.
* Thus as each file is added to the directory, the application will read
* it and output matching lines.
* <BR>
* The application implements the typical pattern of code that declares a
* topology followed by submission of the topology to a Streams context {@code
* com.ibm.streamsx.topology.context.StreamsContext}.
* <BR>
* This demonstrates Java functional programming using an anonymous class.
* <P>
* <BR>
* This may be executed from the {@code samples/java/functional} directory as:
* <UL>
* <LI>
* {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar
* simple.RegexGrep CONTEXT_TYPE $HOME/books ".*Queen.*England.*"
* } - Run directly from the command line.
* <i>CONTEXT_TYPE</i> is one of:
* <UL>
* <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed
* application.</LI>
* <LI>{@code STANDALONE} - Run as an IBM Streams standalone
* application.</LI>
* <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI>
* <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI>
* <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI>
* </UL>
* </LI>
* <LI>
* An application execution within your IDE once you set the class path to include the correct jars.</LI>
* </UL>
* </P>
*/
public class RegexGrep {
@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {
String contextType = args[0];
String directory = args[1];
final Pattern pattern = Pattern.compile(args[2]);
// Define the topology
Topology topology = new Topology("RegexGrep");
// All streams with tuples that are Java String objects
TStream<String> files = FileStreams.directoryWatcher(topology, directory);
TStream<String> lines = FileStreams.textFileReader(files);
/*
* Functional filter using an anonymous class to define the
* filtering logic, in this case execution of a regular
* expression against each input String tuple (each line
* of the files in the directory).
*/
TStream<String> filtered = lines.filter(new Predicate<String>() {
@Override
public boolean test(String v1) {
// Pass the line through if it matches the
// regular expression pattern
return matcher.reset(v1).matches();
}
// Recreate the matcher (which is not serializable)
// when the object is deserialized using readResolve.
transient Matcher matcher;
/*
* Since the constructor is no invoked after serialization
* we use readResolve as a hook to execute initialization
* code, in this case creating the matcher from the
* pattern.
* The alternative would be to create it on its first use,
* which would require an if statement in the test method.
*/
private Object readResolve() throws ObjectStreamException {
matcher = pattern.matcher("");
return this;
}
});
// For debugging just print out the tuples
filtered.print();
// Execute the topology, just like Grep.
Future<?> future = StreamsContextFactory.getStreamsContext(contextType)
.submit(topology);
Thread.sleep(30000);
future.cancel(true);
}
}