TridentKafkaWordCount.java example

Explorer
jst-master
- jstorm-master
package org.apache.storm.starter.trident;
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 * Contains some contributions under the Thrift Software License.
 * Please see doc/old-thrift-license.txt in the Thrift distribution for
 * details.
 */

import java.util.Properties;

import org.apache.storm.starter.spout.RandomSentenceSpout;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.LocalDRPC;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import storm.trident.Stream;
import storm.trident.TridentState;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.operation.builtin.FilterNull;
import storm.trident.operation.builtin.MapGet;
import storm.trident.testing.MemoryMapState;
import storm.trident.testing.Split;

/**
 * A sample word count trident topology using transactional kafka spout that has
 * the following components.
 * <ol>
 * <li>{@link KafkaBolt}
 * that receives random sentences from {@link RandomSentenceSpout} and
 * publishes the sentences to a kafka "test" topic.
 * </li>
 * <li>{@link TransactionalTridentKafkaSpout}
 * that consumes sentences from the "test" topic, splits it into words,
 * aggregates
 * and stores the word count in a {@link MemoryMapState}.
 * </li>
 * <li>DRPC query
 * that returns the word counts by querying the trident state (MemoryMapState).
 * </li>
 * </ol>
 * <p>
 * For more background read the
 * <a href="https://storm.apache.org/documentation/Trident-tutorial.html">
 * trident tutorial</a>,
 * <a href="https://storm.apache.org/documentation/Trident-state">trident
 * state</a> and
 * <a href="https://github.com/apache/storm/tree/master/external/storm-kafka">
 * Storm Kafka </a>.
 * </p>
 */
public class TridentKafkaWordCount {

//    private String zkUrl;
//    private String brokerUrl;
//
//    TridentKafkaWordCount(String zkUrl, String brokerUrl) {
//        this.zkUrl = zkUrl;
//        this.brokerUrl = brokerUrl;
//    }
//
//    /**
//     * Creates a transactional kafka spout that consumes any new data published to "test" topic.
//     * <p/>
//     * For more info on transactional spouts
//     * see "Transactional spouts" section in
//     * <a href="https://storm.apache.org/documentation/Trident-state"> Trident state</a> doc.
//     *
//     * @return a transactional trident kafka spout.
//     */
//    private TransactionalTridentKafkaSpout createKafkaSpout() {
//        ZkHosts hosts = new ZkHosts(zkUrl);
//        TridentKafkaConfig config = new TridentKafkaConfig(hosts, "test");
//        config.scheme = new SchemeAsMultiScheme(new StringScheme());
//
//        // Consume new data from the topic
//        config.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
//        return new TransactionalTridentKafkaSpout(config);
//    }
//
//
//    private Stream addDRPCStream(TridentTopology tridentTopology, TridentState state, LocalDRPC drpc) {
//        return tridentTopology.newDRPCStream("words", drpc)
//                .each(new Fields("args"), new Split(), new Fields("word"))
//                .groupBy(new Fields("word"))
//                .stateQuery(state, new Fields("word"), new MapGet(), new Fields("count"))
//                .each(new Fields("count"), new FilterNull())
//                .project(new Fields("word", "count"));
//    }
//
//    private TridentState addTridentState(TridentTopology tridentTopology) {
//        return tridentTopology.newStream("spout1", createKafkaSpout()).parallelismHint(1)
//                .each(new Fields("str"), new Split(), new Fields("word"))
//                .groupBy(new Fields("word"))
//                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
//                .parallelismHint(1);
//    }
//
//    /**
//     * Creates a trident topology that consumes sentences from the kafka "test" topic using a
//     * {@link TransactionalTridentKafkaSpout} computes the word count and stores it in a {@link MemoryMapState}.
//     * A DRPC stream is then created to query the word counts.
//     * @param drpc
//     * @return
//     */
//    public StormTopology buildConsumerTopology(LocalDRPC drpc) {
//        TridentTopology tridentTopology = new TridentTopology();
//        addDRPCStream(tridentTopology, addTridentState(tridentTopology), drpc);
//        return tridentTopology.build();
//    }
//
//    /**
//     * Return the consumer topology config.
//     *
//     * @return the topology config
//     */
//    public Config getConsumerConfig() {
//        Config conf = new Config();
//        conf.setMaxSpoutPending(20);
//        //  conf.setDebug(true);
//        return conf;
//    }
//
//    /**
//     * A topology that produces random sentences using {@link RandomSentenceSpout} and
//     * publishes the sentences using a KafkaBolt to kafka "test" topic.
//     *
//     * @return the storm topology
//     */
//    public StormTopology buildProducerTopology(Properties prop) {
//        TopologyBuilder builder = new TopologyBuilder();
//        builder.setSpout("spout", new RandomSentenceSpout(), 2);
//        /**
//         * The output field of the RandomSentenceSpout ("word") is provided as the boltMessageField
//         * so that this gets written out as the message in the kafka topic.
//         */
//        KafkaBolt bolt = new KafkaBolt().withProducerProperties(prop)
//                .withTopicSelector(new DefaultTopicSelector("test"))
//                .withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper("key", "word"));
//        builder.setBolt("forwardToKafka", bolt, 1).shuffleGrouping("spout");
//        return builder.createTopology();
//    }
//
//    /**
//     * Returns the storm config for the topology that publishes sentences to kafka "test" topic using a kafka bolt.
//     * The KAFKA_BROKER_PROPERTIES is needed for the KafkaBolt.
//     *
//     * @return the topology config
//     */
//    public Properties getProducerConfig() {
//        Properties props = new Properties();
//        props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerUrl);
//        props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
//        props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
//        props.put(ProducerConfig.CLIENT_ID_CONFIG, "storm-kafka-producer");
//        return props;
//    }
//
//    /**
//     * <p>
//     * To run this topology ensure you have a kafka broker running.
//     * </p>
//     * Create a topic test with command line,
//     * kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partition 1 --topic test
//     */
//    public static void main(String[] args) throws Exception {
//
//        String zkUrl = "localhost:2181";        // the defaults.
//        String brokerUrl = "localhost:9092";
//
//        if (args.length > 2 || (args.length == 1 && args[0].matches("^-h|--help$"))) {
//            System.out.println("Usage: TridentKafkaWordCount [kafka zookeeper url] [kafka broker url]");
//            System.out.println("   E.g TridentKafkaWordCount [" + zkUrl + "]" + " [" + brokerUrl + "]");
//            System.exit(1);
//        } else if (args.length == 1) {
//            zkUrl = args[0];
//        } else if (args.length == 2) {
//            zkUrl = args[0];
//            brokerUrl = args[1];
//        }
//
//        System.out.println("Using Kafka zookeeper url: " + zkUrl + " broker url: " + brokerUrl);
//
//        TridentKafkaWordCount wordCount = new TridentKafkaWordCount(zkUrl, brokerUrl);
//
//        LocalDRPC drpc = new LocalDRPC();
//        LocalCluster cluster = new LocalCluster();
//
//        // submit the consumer topology.
//        cluster.submitTopology("wordCounter", wordCount.getConsumerConfig(), wordCount.buildConsumerTopology(drpc));
//
//        Config conf = new Config();
//        conf.setMaxSpoutPending(20);
//        // submit the producer topology.
//        cluster.submitTopology("kafkaBolt", conf, wordCount.buildProducerTopology(wordCount.getProducerConfig()));
//
//        // keep querying the word counts for a minute.
//        for (int i = 0; i < 60; i++) {
//            System.out.println("DRPC RESULT: " + drpc.execute("words", "the and apple snow jumped"));
//            Thread.sleep(1000);
//        }
//
//        cluster.killTopology("kafkaBolt");
//        cluster.killTopology("wordCounter");
//        cluster.shutdown();
//    }
}