/* * Sifarish: Recommendation Engine * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.sifarish.realtime; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.chombo.storm.GenericBolt; import org.chombo.storm.MessageHolder; import org.chombo.storm.MessageQueue; import org.chombo.util.ConfigUtility; import org.hoidla.util.BoundedSortedObjects; import org.hoidla.util.Utility; import org.hoidla.util.BoundedSortedObjects.SortableObject; import backtype.storm.task.TopologyContext; import backtype.storm.tuple.Tuple; /** * @author pranab * Aggregates frequent item counts from multiple bolts * */ public class TrendingAggregateBolt extends GenericBolt { private Set<String> sketchedBolts = new HashSet<String>(); private Map<String, Integer> frequentItems = new HashMap<String, Integer>(); private int numSkethesBolt; private BoundedSortedObjects sortedObjects; private MessageQueue topHittersMsgQueue; private String topHittersQueue; private static final Logger LOG = Logger.getLogger(TrendingAggregateBolt.class); private static final long serialVersionUID = 8275719621097842135L; @Override public Map<String, Object> getComponentConfiguration() { // TODO Auto-generated method stub return null; } @Override public void intialize(Map stormConf, TopologyContext context) { numSkethesBolt = ConfigUtility.getInt(stormConf, "sketches.bolt.threads", 1); int mostFrequentCount = ConfigUtility.getInt(stormConf, "sketches.most.freq.count", 3); sortedObjects = new BoundedSortedObjects(mostFrequentCount); debugOn = ConfigUtility.getBoolean(stormConf,"debug.on", false); topHittersQueue = ConfigUtility.getString(stormConf, "redis.top.hitters.queue"); topHittersMsgQueue = MessageQueue.createMessageQueue(stormConf, topHittersQueue); if (debugOn) { LOG.setLevel(Level.INFO);; LOG.info("TrendingAggregateBolt intialized " ); } } @Override public boolean process(Tuple input) { boolean status = true; String sketchesBoltID = input.getStringByField(TrendingSketchesBolt.BOLT_ID); if (sketchedBolts.contains(sketchesBoltID)) { //all bolts have not reported for current epoch LOG.info("bolts didn't synchronize"); sketchedBolts.clear(); frequentItems.clear(); } String freqItems = input.getStringByField(TrendingSketchesBolt.FREQ_COUNTS); String[] parts = freqItems.split(":"); for (int i = 0; i < parts.length; i += 2) { String itemID = parts[i]; int count = Integer.parseInt(parts[i+1]); Integer curCount = frequentItems.get(itemID); if (null == curCount) { frequentItems.put(itemID, count); } else { frequentItems.put(itemID, curCount + count); } } sketchedBolts.add(sketchesBoltID); //all bolts have joined if (sketchedBolts.size() == numSkethesBolt) { LOG.info("**Frequent item stats***"); sortedObjects.clear(); for (String itemID : frequentItems.keySet()) { LOG.info("item:" + itemID + " count:" + frequentItems.get(itemID)); sortedObjects.add(itemID, frequentItems.get(itemID)); } LOG.info("after merge"); sortedObjects.truncate(); List<SortableObject> topHitters = sortedObjects.get(); for (SortableObject topHitter : topHitters) { LOG.info("item:" + topHitter.getItem() + " count:" + topHitter.getRank()); } //write to Redis String serFreqCounts = Utility.join(topHitters, ":"); topHittersMsgQueue.send(serFreqCounts); frequentItems.clear(); sketchedBolts.clear(); } return status; } @Override public List<MessageHolder> getOutput() { // TODO Auto-generated method stub return null; } }