package org.apache.storm.starter.trident;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* Contains some contributions under the Thrift Software License.
* Please see doc/old-thrift-license.txt in the Thrift distribution for
* details.
*/
import java.util.Properties;
import org.apache.storm.starter.spout.RandomSentenceSpout;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.LocalDRPC;
import backtype.storm.generated.StormTopology;
import backtype.storm.spout.SchemeAsMultiScheme;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import storm.trident.Stream;
import storm.trident.TridentState;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.operation.builtin.FilterNull;
import storm.trident.operation.builtin.MapGet;
import storm.trident.testing.MemoryMapState;
import storm.trident.testing.Split;
/**
* A sample word count trident topology using transactional kafka spout that has
* the following components.
* <ol>
* <li>{@link KafkaBolt}
* that receives random sentences from {@link RandomSentenceSpout} and
* publishes the sentences to a kafka "test" topic.
* </li>
* <li>{@link TransactionalTridentKafkaSpout}
* that consumes sentences from the "test" topic, splits it into words,
* aggregates
* and stores the word count in a {@link MemoryMapState}.
* </li>
* <li>DRPC query
* that returns the word counts by querying the trident state (MemoryMapState).
* </li>
* </ol>
* <p>
* For more background read the
* <a href="https://storm.apache.org/documentation/Trident-tutorial.html">
* trident tutorial</a>,
* <a href="https://storm.apache.org/documentation/Trident-state">trident
* state</a> and
* <a href="https://github.com/apache/storm/tree/master/external/storm-kafka">
* Storm Kafka </a>.
* </p>
*/
public class TridentKafkaWordCount {
// private String zkUrl;
// private String brokerUrl;
//
// TridentKafkaWordCount(String zkUrl, String brokerUrl) {
// this.zkUrl = zkUrl;
// this.brokerUrl = brokerUrl;
// }
//
// /**
// * Creates a transactional kafka spout that consumes any new data published to "test" topic.
// * <p/>
// * For more info on transactional spouts
// * see "Transactional spouts" section in
// * <a href="https://storm.apache.org/documentation/Trident-state"> Trident state</a> doc.
// *
// * @return a transactional trident kafka spout.
// */
// private TransactionalTridentKafkaSpout createKafkaSpout() {
// ZkHosts hosts = new ZkHosts(zkUrl);
// TridentKafkaConfig config = new TridentKafkaConfig(hosts, "test");
// config.scheme = new SchemeAsMultiScheme(new StringScheme());
//
// // Consume new data from the topic
// config.startOffsetTime = kafka.api.OffsetRequest.LatestTime();
// return new TransactionalTridentKafkaSpout(config);
// }
//
//
// private Stream addDRPCStream(TridentTopology tridentTopology, TridentState state, LocalDRPC drpc) {
// return tridentTopology.newDRPCStream("words", drpc)
// .each(new Fields("args"), new Split(), new Fields("word"))
// .groupBy(new Fields("word"))
// .stateQuery(state, new Fields("word"), new MapGet(), new Fields("count"))
// .each(new Fields("count"), new FilterNull())
// .project(new Fields("word", "count"));
// }
//
// private TridentState addTridentState(TridentTopology tridentTopology) {
// return tridentTopology.newStream("spout1", createKafkaSpout()).parallelismHint(1)
// .each(new Fields("str"), new Split(), new Fields("word"))
// .groupBy(new Fields("word"))
// .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
// .parallelismHint(1);
// }
//
// /**
// * Creates a trident topology that consumes sentences from the kafka "test" topic using a
// * {@link TransactionalTridentKafkaSpout} computes the word count and stores it in a {@link MemoryMapState}.
// * A DRPC stream is then created to query the word counts.
// * @param drpc
// * @return
// */
// public StormTopology buildConsumerTopology(LocalDRPC drpc) {
// TridentTopology tridentTopology = new TridentTopology();
// addDRPCStream(tridentTopology, addTridentState(tridentTopology), drpc);
// return tridentTopology.build();
// }
//
// /**
// * Return the consumer topology config.
// *
// * @return the topology config
// */
// public Config getConsumerConfig() {
// Config conf = new Config();
// conf.setMaxSpoutPending(20);
// // conf.setDebug(true);
// return conf;
// }
//
// /**
// * A topology that produces random sentences using {@link RandomSentenceSpout} and
// * publishes the sentences using a KafkaBolt to kafka "test" topic.
// *
// * @return the storm topology
// */
// public StormTopology buildProducerTopology(Properties prop) {
// TopologyBuilder builder = new TopologyBuilder();
// builder.setSpout("spout", new RandomSentenceSpout(), 2);
// /**
// * The output field of the RandomSentenceSpout ("word") is provided as the boltMessageField
// * so that this gets written out as the message in the kafka topic.
// */
// KafkaBolt bolt = new KafkaBolt().withProducerProperties(prop)
// .withTopicSelector(new DefaultTopicSelector("test"))
// .withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper("key", "word"));
// builder.setBolt("forwardToKafka", bolt, 1).shuffleGrouping("spout");
// return builder.createTopology();
// }
//
// /**
// * Returns the storm config for the topology that publishes sentences to kafka "test" topic using a kafka bolt.
// * The KAFKA_BROKER_PROPERTIES is needed for the KafkaBolt.
// *
// * @return the topology config
// */
// public Properties getProducerConfig() {
// Properties props = new Properties();
// props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerUrl);
// props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
// props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
// props.put(ProducerConfig.CLIENT_ID_CONFIG, "storm-kafka-producer");
// return props;
// }
//
// /**
// * <p>
// * To run this topology ensure you have a kafka broker running.
// * </p>
// * Create a topic test with command line,
// * kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partition 1 --topic test
// */
// public static void main(String[] args) throws Exception {
//
// String zkUrl = "localhost:2181"; // the defaults.
// String brokerUrl = "localhost:9092";
//
// if (args.length > 2 || (args.length == 1 && args[0].matches("^-h|--help$"))) {
// System.out.println("Usage: TridentKafkaWordCount [kafka zookeeper url] [kafka broker url]");
// System.out.println(" E.g TridentKafkaWordCount [" + zkUrl + "]" + " [" + brokerUrl + "]");
// System.exit(1);
// } else if (args.length == 1) {
// zkUrl = args[0];
// } else if (args.length == 2) {
// zkUrl = args[0];
// brokerUrl = args[1];
// }
//
// System.out.println("Using Kafka zookeeper url: " + zkUrl + " broker url: " + brokerUrl);
//
// TridentKafkaWordCount wordCount = new TridentKafkaWordCount(zkUrl, brokerUrl);
//
// LocalDRPC drpc = new LocalDRPC();
// LocalCluster cluster = new LocalCluster();
//
// // submit the consumer topology.
// cluster.submitTopology("wordCounter", wordCount.getConsumerConfig(), wordCount.buildConsumerTopology(drpc));
//
// Config conf = new Config();
// conf.setMaxSpoutPending(20);
// // submit the producer topology.
// cluster.submitTopology("kafkaBolt", conf, wordCount.buildProducerTopology(wordCount.getProducerConfig()));
//
// // keep querying the word counts for a minute.
// for (int i = 0; i < 60; i++) {
// System.out.println("DRPC RESULT: " + drpc.execute("words", "the and apple snow jumped"));
// Thread.sleep(1000);
// }
//
// cluster.killTopology("kafkaBolt");
// cluster.killTopology("wordCounter");
// cluster.shutdown();
// }
}