Grep.java example

Explorer
streamsx.topology-master
/*
# Licensed Materials - Property of IBM
# Copyright IBM Corp. 2015  
 */
package simple;

import java.util.concurrent.Future;

import com.ibm.streamsx.topology.TStream;
import com.ibm.streamsx.topology.Topology;
import com.ibm.streamsx.topology.context.StreamsContextFactory;
import com.ibm.streamsx.topology.file.FileStreams;
import com.ibm.streamsx.topology.streams.StringStreams;

/**
 * Sample continuous (streaming) grep topology application. This Java application builds a
 * simple topology that watches a directory for files, reads each file and
 * output lines that contain the search term.
 * Thus as each file is added to the directory, the application will read
 * it and output matching lines.
 * <BR>
 * The application implements the typical pattern of code that declares a
 * topology followed by submission of the topology to a Streams context
 * {@code com.ibm.streamsx.topology.context.StreamsContext}.
 * <BR>
 * This demonstrates the a continuous application and use of
 * utility classes that produce streams.
 * <P>
 * <BR>
 * This may be executed from the {@code samples/java/functional} directory as:
 * <UL>
 * <LI>
 * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar
 *    simple.Grep CONTEXT_TYPE $HOME/books Elizabeth
 * } - Run directly from the command line.
 * </LI>
 * <i>CONTEXT_TYPE</i> is one of:
 * <UL>
 * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed
 * application.</LI>
 * <LI>{@code STANDALONE} - Run as an IBM Streams standalone
 * application.</LI>
 * <LI>{@code EMBEDDED} - Run embedded within this JVM.</LI>
 * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI>
 * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI>
 * </UL>
 * </LI>
 * <LI>
 * An application execution within your IDE once you set the class path to include the correct jars.</LI>
 * </UL>
 * </P>
 */
public class Grep {
    
    /**
     * Sample continuous (streaming) grep topology application. This Java
     * application builds a simple topology that watches a directory for
     * files, reads each file and output lines that contain the search term.
     * Thus as each file is added to the directory, the application will read
     * it and output matching lines.
     * <BR>
     * The application implements the typical pattern of code that declares a
     * topology followed by submission of the topology to a Streams context {@code
     * com.ibm.streamsx.topology.context.StreamsContext}.
     * <P>
     * Three arguments are required:
     * <UL>
     * <LI>{@code contextType} - The type of the context to execute the topology in, e.g. {@code EMBEDDED or STANDALONE}.</LI>
     * <LI>{@code directory} - Directory to watch for files.</LI>
     * <LI>{@code term} - Search term, if any line in a file contains {@code term} then it will be printed.
     * </UL>
     * For example (classpath omitted for brevity):
     * <BR>
     * {@code java simple.Grep EMBEDDED $HOME/books Elizabeth}
     */
    public static void main(String[] args) throws Exception {
        String contextType = args[0];
        String directory = args[1];
        String term = args[2];

        Topology topology = new Topology("Grep");

        /*
         * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams
         * to declare a stream that will contain file names from the specified directory.
         * As each new file is created in directory its absolute file path will
         * appear on fileNames.
         */
        TStream<String> filePaths = FileStreams.directoryWatcher(topology, directory);
        
        /* 
         * Use the file stream utility class com.ibm.streamsx.topology.file.FileStreams
         * to declare a stream that will contain the contents of the files.
         * FileStreams.textFileReader creates a function that for each input
         * file pat, opens the file and reads its contents as a text file,
         * producing a tuple for each line of the file. The tuple contains
         * the contents of the line, as a String.
         */
        TStream<String> lines = FileStreams.textFileReader(filePaths);
        
        /*
         * Use the string stream utility class com.ibm.streamsx.topology.streams.StringStreams
         * to filter out non-matching lines. StringStreams.contains creates a functional
         * Predicate that will be executed for each tuple on lines, that is each line
         * read from a file.
         */
        TStream<String> matching = StringStreams.contains(lines, term);
        
        /*
         * And print the matching lines to standard out.
         */
        matching.print();

        /*
         * Execute the topology, since FileStreams.directoryWatcher declares
         * a stream that lasts forever, that it is is always watching the directory,
         * then when submit() returns the application is not complete.
         * In fact it will never complete.
         */
        Future<?> future = StreamsContextFactory.getStreamsContext(contextType)
                .submit(topology);
        
        /*
         * Let the application run for thirty seconds (30,000ms)
         * and then cancel it.
         */
        Thread.sleep(30 * 1000);
        future.cancel(true);
    }
}