/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.storm.wordcount; import org.apache.storm.generated.StormTopology; import org.apache.storm.topology.TopologyBuilder; import org.apache.storm.tuple.Fields; import org.apache.flink.examples.java.wordcount.util.WordCountData; import org.apache.flink.storm.util.BoltFileSink; import org.apache.flink.storm.util.BoltPrintSink; import org.apache.flink.storm.util.NullTerminatingSpout; import org.apache.flink.storm.util.OutputFormatter; import org.apache.flink.storm.util.TupleOutputFormatter; import org.apache.flink.storm.wordcount.operators.BoltCounter; import org.apache.flink.storm.wordcount.operators.BoltCounterByName; import org.apache.flink.storm.wordcount.operators.BoltTokenizer; import org.apache.flink.storm.wordcount.operators.BoltTokenizerByName; import org.apache.flink.storm.wordcount.operators.WordCountFileSpout; import org.apache.flink.storm.wordcount.operators.WordCountInMemorySpout; /** * Implements the "WordCount" program that computes a simple word occurrence histogram over text files in a streaming * fashion. The program is constructed as a regular {@link StormTopology}. * <p> * The input is a plain text file with lines separated by newline characters. * <p> * Usage: * <code>WordCount[Local|LocalByName|RemoteByClient|RemoteBySubmitter] <text path> <result path></code><br> * If no parameters are provided, the program is run with default data from {@link WordCountData}. * <p> * This example shows how to: * <ul> * <li>how to construct a regular Storm topology as Flink program</li> * </ul> */ public class WordCountTopology { public final static String spoutId = "source"; public final static String tokenierzerId = "tokenizer"; public final static String counterId = "counter"; public final static String sinkId = "sink"; private final static OutputFormatter formatter = new TupleOutputFormatter(); public static TopologyBuilder buildTopology() { return buildTopology(true); } public static TopologyBuilder buildTopology(boolean indexOrName) { final TopologyBuilder builder = new TopologyBuilder(); // get input data if (fileInputOutput) { // read the text file from given input path final String[] tokens = textPath.split(":"); final String inputFile = tokens[tokens.length - 1]; // inserting NullTerminatingSpout only required to stabilize integration test builder.setSpout(spoutId, new NullTerminatingSpout(new WordCountFileSpout(inputFile))); } else { builder.setSpout(spoutId, new WordCountInMemorySpout()); } if (indexOrName) { // split up the lines in pairs (2-tuples) containing: (word,1) builder.setBolt(tokenierzerId, new BoltTokenizer(), 4).shuffleGrouping(spoutId); // group by the tuple field "0" and sum up tuple field "1" builder.setBolt(counterId, new BoltCounter(), 4).fieldsGrouping(tokenierzerId, new Fields(BoltTokenizer.ATTRIBUTE_WORD)); } else { // split up the lines in pairs (2-tuples) containing: (word,1) builder.setBolt(tokenierzerId, new BoltTokenizerByName(), 4).shuffleGrouping( spoutId); // group by the tuple field "0" and sum up tuple field "1" builder.setBolt(counterId, new BoltCounterByName(), 4).fieldsGrouping( tokenierzerId, new Fields(BoltTokenizerByName.ATTRIBUTE_WORD)); } // emit result if (fileInputOutput) { // read the text file from given input path final String[] tokens = outputPath.split(":"); final String outputFile = tokens[tokens.length - 1]; builder.setBolt(sinkId, new BoltFileSink(outputFile, formatter)).shuffleGrouping(counterId); } else { builder.setBolt(sinkId, new BoltPrintSink(formatter), 4).shuffleGrouping(counterId); } return builder; } // ************************************************************************* // UTIL METHODS // ************************************************************************* private static boolean fileInputOutput = false; private static String textPath; private static String outputPath; static boolean parseParameters(final String[] args) { if (args.length > 0) { // parse input arguments fileInputOutput = true; if (args.length == 2) { textPath = args[0]; outputPath = args[1]; } else { System.err.println("Usage: WordCount* <text path> <result path>"); return false; } } else { System.out.println("Executing WordCount example with built-in default data"); System.out.println(" Provide parameters to read input data from a file"); System.out.println(" Usage: WordCount* <text path> <result path>"); } return true; } }