package backtype.storm.contrib.hbase.examples; import java.util.List; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import storm.trident.TridentTopology; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.operation.builtin.Count; import storm.trident.state.StateFactory; import storm.trident.testing.FixedBatchSpout; import storm.trident.tuple.TridentTuple; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.contrib.hbase.trident.HBaseAggregateState; import backtype.storm.contrib.hbase.utils.TridentConfig; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; /** * An example Storm Trident topology that uses {@link HBaseAggregateState} for * stateful stream processing. * <p> * This example persists idempotent counts in HBase for the number of times a * shortened URL has been seen in the stream for each day, week, and month. * <p> * Assumes the HBase table has been created.<br> * <tt>create 'shorturl', {NAME => 'data', VERSIONS => 3}, * {NAME => 'daily', VERSION => 1, TTL => 604800}, * {NAME => 'weekly', VERSION => 1, TTL => 2678400}, * {NAME => 'monthly', VERSION => 1, TTL => 31536000}</tt> */ public class HBaseTridentAggregateTopology { /** * Partitions a tuple into three to represent daily, weekly, and monthly stats * <p> * For example, when passed the following tuple:<br> * {shortid:http://bit.ly/ZK6t, date:20120816} * <p> * The function will output the following three tuples:<br> * {shortid:http://bit.ly/ZK6t, date:20120816, cf:daily, cq:20120816}<br> * {shortid:http://bit.ly/ZK6t, date:20120816, cf:weekly, cq:201233}<br> * {shortid:http://bit.ly/ZK6t, date:20120816, cf:monthly, cq:201208} */ @SuppressWarnings("serial") static class DatePartitionFunction extends BaseFunction { final String cfStatsDaily = "daily"; final String cfStatsWeekly = "weekly"; final String cfStatsMonthly = "monthly"; final static transient DateTimeFormatter dtf = DateTimeFormat .forPattern("YYYYMMdd"); @Override public void execute(TridentTuple tuple, TridentCollector collector) { String monthly = tuple.getString(1).substring(0, 6); Integer week = dtf.parseDateTime(tuple.getString(1)).getWeekOfWeekyear(); String weekly = tuple.getString(1).substring(0, 4) .concat(week.toString()); collector.emit(new Values(cfStatsDaily, tuple.getString(1))); collector.emit(new Values(cfStatsMonthly, monthly)); collector.emit(new Values(cfStatsWeekly, weekly)); } } /** * @param args * @throws InterruptedException */ @SuppressWarnings({ "unchecked", "rawtypes" }) public static void main(String[] args) throws InterruptedException { List<Object> v0 = HBaseCountersBatchTopology.values.get(0).get(0); List<Object> v1 = HBaseCountersBatchTopology.values.get(0).get(1); List<Object> v2 = HBaseCountersBatchTopology.values.get(0).get(2); List<Object> v3 = HBaseCountersBatchTopology.values.get(0).get(3); List<Object> v4 = HBaseCountersBatchTopology.values.get(0).get(4); List<Object> v5 = HBaseCountersBatchTopology.values.get(1).get(0); List<Object> v6 = HBaseCountersBatchTopology.values.get(1).get(1); List<Object> v7 = HBaseCountersBatchTopology.values.get(1).get(2); List<Object> v8 = HBaseCountersBatchTopology.values.get(2).get(0); List<Object> v9 = HBaseCountersBatchTopology.values.get(2).get(1); List<Object> v10 = HBaseCountersBatchTopology.values.get(2).get(2); HBaseCountersBatchTopology.values.values(); FixedBatchSpout spout = new FixedBatchSpout(new Fields("shortid", "url", "user", "date"), 3, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10); spout.setCycle(false); TridentConfig config = new TridentConfig("shorturl", "shortid"); config.setBatch(false); StateFactory state = HBaseAggregateState.transactional(config); TridentTopology topology = new TridentTopology(); topology .newStream("spout", spout) .each(new Fields("shortid", "date"), new DatePartitionFunction(), new Fields("cf", "cq")).project(new Fields("shortid", "cf", "cq")) .groupBy(new Fields("shortid", "cf", "cq")) .persistentAggregate(state, new Count(), new Fields("count")); Config conf = new Config(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("hbase-trident-aggregate", conf, topology.build()); Utils.sleep(5000); cluster.shutdown(); } }