/* * visitante: Web analytic using Hadoop Map Reduce and Storm * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, softwarSessionSummarizere * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.visitante.realtime; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.chombo.storm.GenericBolt; import org.chombo.storm.MessageHolder; import org.chombo.storm.MessageQueue; import org.chombo.storm.PubSub; import org.chombo.util.ConfigUtility; import org.hoidla.stream.HyperLogLog; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import backtype.storm.Config; import backtype.storm.task.TopologyContext; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; /** * @author pranab * */ public class UniqueVisitorCounterBolt extends GenericBolt { private int tickFrequencyInSeconds; private HyperLogLog uniqueCounter; private PubSub pubSub; private long minTotalCount; private MessageHolder msg; private static final String COMM_RESET = "reset"; private static final Logger LOG = LoggerFactory.getLogger(UniqueVisitorCounterBolt.class); public UniqueVisitorCounterBolt(int tickFrequencyInSeconds) { super(); this.tickFrequencyInSeconds = tickFrequencyInSeconds; } @Override public Map<String, Object> getComponentConfiguration() { Config conf = new Config(); conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, tickFrequencyInSeconds); return conf; } @Override public void intialize(Map stormConf, TopologyContext context) { int bucketBitCount = ConfigUtility.getInt(stormConf, "bucket.bit.count"); uniqueCounter = new HyperLogLog(bucketBitCount); minTotalCount = ConfigUtility.getLong(stormConf, "min.total.count"); //pub sub for command String commStore = ConfigUtility.getString(stormConf, "command.store", "none"); if (!commStore.equals("none")) { int numUniqueCounterBolt = ConfigUtility.getInt(stormConf, "unique.count.bolt.threads", 1); pubSub = PubSub.createPubSub(stormConf, commStore, numUniqueCounterBolt); } } @Override public boolean process(Tuple input) { boolean status = true; outputMessages.clear(); if (isTickTuple(input)) { LOG.info("got tick tuple "); long totalCount = uniqueCounter.getCount(); if (totalCount > minTotalCount) { //only after some minimum number of items have been processed long count = uniqueCounter.getUnqueCount(); msg = new MessageHolder(); msg.setMessage(new Values(getID(), count)); outputMessages.add(msg); LOG.info("emitted count:" + count); } //check if there is request to reset if (null != pubSub) { String command = pubSub.subscribe(getID()); if (null != command) { if (command.equals(COMM_RESET)) { uniqueCounter.clear(); } } } } else { String userID = input.getStringByField(UniqueVisitorTopology.USER_ID); uniqueCounter.add(userID); LOG.info("added to counter"); } return status; } @Override public List<MessageHolder> getOutput() { return outputMessages; } }