/* * Copyright (C) 2012 Michael Koppen * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.fhb.twitalyse; import java.io.IOException; import java.util.Collection; import java.util.Enumeration; import java.util.HashSet; import java.util.Properties; import org.mortbay.log.Log; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.topology.TopologyBuilder; import com.google.common.collect.Sets; import de.fhb.twitalyse.bolt.redis.CountHashTagsBolt; import de.fhb.twitalyse.bolt.redis.CountLanguageBolt; import de.fhb.twitalyse.bolt.redis.CountSourceBolt; import de.fhb.twitalyse.bolt.redis.CountWordsBolt; import de.fhb.twitalyse.bolt.redis.CountWordsInCircleBolt; import de.fhb.twitalyse.bolt.status.coords.FilterCoordsBolt; import de.fhb.twitalyse.bolt.status.coords.GetCoordsBolt; import de.fhb.twitalyse.bolt.status.hashtag.GetHashTagsBolt; import de.fhb.twitalyse.bolt.status.source.GetStatusSourceBolt; import de.fhb.twitalyse.bolt.status.text.GetStatusTextBolt; import de.fhb.twitalyse.bolt.status.text.SplitStatusTextBolt; import de.fhb.twitalyse.bolt.status.user.GetLanguageBolt; import de.fhb.twitalyse.spout.TwitterStreamSpout; import de.fhb.twitalyse.utils.Point; import de.fhb.twitalyse.utils.PropertyLoader; import java.io.File; import java.text.SimpleDateFormat; import java.util.Date; import java.util.logging.ConsoleHandler; import java.util.logging.FileHandler; import java.util.logging.Handler; import java.util.logging.Level; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; /** * This Topology analyses Twitter Stati posted on the Twitter Public Channel. * * @author Christoph Ott <ott@fh-brandenburg.de> */ public class TwitalyseTopology { private final static Logger LOGGER = Logger.getLogger(TwitalyseTopology.class.getName()); private static final String TWITTERSPOUT = "twitterSpout"; private TopologyBuilder builder; private String consumerKey; private String consumerKeySecure; private final int DEFAULT_NUMBEROFWORKERS = 4; private Collection<String> stopWords; private String redisHost; private int redisPort; private String token; private String tokenSecret; public TwitalyseTopology() throws IOException { initProperties(); initBuilder(); } private void initBuilder() { builder = new TopologyBuilder(); initTwitterSpout(); initSourceCount(); initWordCount(); initLanguageCount(); initGetCoordsInCircle(); initGetHashTags(); } private void initGetHashTags() { GetHashTagsBolt getHashTag = new GetHashTagsBolt(); CountHashTagsBolt countHashTags = new CountHashTagsBolt(redisHost, redisPort); builder.setBolt("5_1 getHashTags", getHashTag, 4).shuffleGrouping( TWITTERSPOUT); builder.setBolt("5_2 countHashTags", countHashTags, 4).shuffleGrouping( "5_1 getHashTags"); } private void initGetCoordsInCircle() { // New York Point centerPoint = new Point(40.712134, -74.004988); // Mitte EU // Point centerPoint = new Point(49.124219, 5.882080); double radius = 3000; GetCoordsBolt coords = new GetCoordsBolt(); FilterCoordsBolt filterCoords = new FilterCoordsBolt(centerPoint, radius, redisHost, redisPort); SplitStatusTextBolt splitText = new SplitStatusTextBolt(stopWords, redisHost, redisPort); CountWordsInCircleBolt count = new CountWordsInCircleBolt(redisHost, redisPort); builder.setBolt("1_1 getCoords", coords, 8).shuffleGrouping( TWITTERSPOUT); builder.setBolt("1_2 coordsInCircle", filterCoords, 8).shuffleGrouping( "1_1 getCoords"); builder.setBolt("1_3 splitText", splitText, 8).shuffleGrouping( "1_2 coordsInCircle"); builder.setBolt("1_4 countWordsInCircle", count, 8).shuffleGrouping( "1_3 splitText"); } private void initLogger() { Level consoleHandlerLevel = Level.SEVERE; Level fileHandlerLevel = Level.INFO; Date today = new Date(); SimpleDateFormat sdf = new SimpleDateFormat("dd_MM_yyyy"); //setting up ConsoleHandler Logger rootLogger = Logger.getLogger(""); Handler[] handlers = rootLogger.getHandlers(); ConsoleHandler chandler = null; for (int i = 0; i < handlers.length; i++) { if (handlers[i] instanceof ConsoleHandler) { chandler = (ConsoleHandler) handlers[i]; } } if (chandler != null) { chandler.setLevel(consoleHandlerLevel); } else { LOGGER.log(Level.SEVERE,"No ConsoleHandler there."); } //setting up FileHandler FileHandler fh = null; try { fh = new FileHandler("log/log_" + sdf.format(today) + ".log"); fh.setFormatter(new SimpleFormatter()); fh.setLevel(fileHandlerLevel); } catch (IOException ex) { new File("log").mkdir(); try { fh = new FileHandler("log/log_" + sdf.format(today) + ".log"); fh.setFormatter(new SimpleFormatter()); fh.setLevel(fileHandlerLevel); } catch (IOException ex1) { LOGGER.log(Level.SEVERE,"Input-output-error while creating the initial log."); LOGGER.log(Level.SEVERE, null, ex1); } catch (SecurityException ex1) { LOGGER.log(Level.SEVERE, null, ex1); } LOGGER.log(Level.SEVERE, null, ex); } catch (SecurityException ex) { LOGGER.log(Level.SEVERE,"Cannot open/access Log-Folder so I will not log anything."); LOGGER.log(Level.SEVERE, null, ex); } if (fh != null) { rootLogger.addHandler(fh); } } private void initLanguageCount() { GetLanguageBolt getLanguageBolt = new GetLanguageBolt(); CountLanguageBolt countLanguageBolt = new CountLanguageBolt(redisHost, redisPort); builder.setBolt("2_1 getLanguage", getLanguageBolt, 4).shuffleGrouping( TWITTERSPOUT); builder.setBolt("2_2 countLanguage", countLanguageBolt, 4) .shuffleGrouping("2_1 getLanguage"); } private void initProperties() throws IOException { PropertyLoader propLoader = new PropertyLoader(); Properties twitterProps = propLoader .loadSystemProperty("twitterProps.properties"); consumerKey = twitterProps.getProperty("consumerKey"); consumerKeySecure = twitterProps.getProperty("consumerKeySecure"); token = twitterProps.getProperty("token"); tokenSecret = twitterProps.getProperty("tokenSecret"); Enumeration<Object> enumOfStopWords = propLoader.loadSystemProperty( "stopWords.properties").elements(); stopWords = new HashSet<String>(); while (enumOfStopWords.hasMoreElements()) { String stopWordsLang = (String) enumOfStopWords.nextElement(); stopWords.addAll(Sets.newHashSet(stopWordsLang.split(";"))); } Properties redisProps = propLoader .loadSystemProperty("redisProps.properties"); redisHost = redisProps.getProperty("host"); redisPort = Integer.valueOf(redisProps.getProperty("port")); } private void initSourceCount() { GetStatusSourceBolt getStatusSourceBolt = new GetStatusSourceBolt(); CountSourceBolt countSourceBolt = new CountSourceBolt(redisHost, redisPort); builder.setBolt("3_1 getSource", getStatusSourceBolt, 10) .shuffleGrouping(TWITTERSPOUT); builder.setBolt("3_2 countSource", countSourceBolt, 10) .shuffleGrouping("3_1 getSource"); } private void initTwitterSpout() { TwitterStreamSpout twitterStreamSpout = new TwitterStreamSpout( consumerKey, consumerKeySecure, token, tokenSecret, redisHost, redisPort); builder.setSpout(TWITTERSPOUT, twitterStreamSpout, 1); } private void initWordCount() { GetStatusTextBolt getTextBolt = new GetStatusTextBolt(); SplitStatusTextBolt splitStatusTextBolt = new SplitStatusTextBolt( stopWords, redisHost, redisPort); CountWordsBolt countWordsBolt = new CountWordsBolt(redisHost, redisPort); builder.setBolt("4_1 getStatusText", getTextBolt, 8).shuffleGrouping( TWITTERSPOUT); builder.setBolt("4_2 splitStatusText", splitStatusTextBolt, 8) .shuffleGrouping("4_1 getStatusText"); builder.setBolt("4_3 countWords", countWordsBolt, 8).shuffleGrouping( "4_2 splitStatusText"); } /** * (args.length == 0) LocalCluster <br> * args[0] - Name of Topology for Storm ui (String)<br> * args[1] - Number of workers (int) * * @param args * @throws AlreadyAliveException * @throws InvalidTopologyException * @throws InterruptedException */ public void startTopology(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException { Config conf = new Config(); conf.setDebug(false); conf.setMaxTaskParallelism(8); if (args != null && args.length > 0) { if (args.length > 1) { conf.setNumWorkers(Integer.parseInt(args[1])); } else { conf.setNumWorkers(DEFAULT_NUMBEROFWORKERS); } LOGGER.log(Level.SEVERE,"Starting Cluster......"); StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); } else { conf.setMaxTaskParallelism(8); conf.setNumWorkers(2); LocalCluster cluster = new LocalCluster(); LOGGER.log(Level.SEVERE,"Starting Cluster......"); cluster.submitTopology("twitalyse", conf, builder.createTopology()); Thread.sleep(20000); cluster.shutdown(); } } public static void main(String[] args) throws IOException { TwitalyseTopology topology = new TwitalyseTopology(); try { topology.startTopology(args); } catch (AlreadyAliveException e) { LOGGER.log(Level.SEVERE,"{0}\n{1}", new Object[] { e, e.getMessage() }); } catch (InvalidTopologyException e) { LOGGER.log(Level.SEVERE,"{0}\n{1}", new Object[] { e, e.getMessage() }); } catch (InterruptedException e) { LOGGER.log(Level.SEVERE,"{0}\n{1}", new Object[] { e, e.getMessage() }); } } }