/*
* Copyright 2013 Produban
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.produban.openbus.examples;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.produban.openbus.analysis.SimpleFileStringSpout;
import com.produban.openbus.analysis.WebServerLog2TSDB;
import com.produban.openbus.analysis.WebServerLogFilter;
import com.produban.openbus.analysis.HDFSPersistence;
import com.produban.openbus.util.Conf;
import com.produban.openbus.util.Constant;
import com.produban.openbus.util.DatePartition;
import com.produban.openbus.util.LogFilter;
import com.produban.openbus.util.WebServerLog2Json;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.testing.MemoryMapState;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
/**
* Test topology openbus-realtime with Spout file and no persistence in HBase
*/
public class OpenbusProcessorFileTopology {
private static Logger LOG = LoggerFactory.getLogger(OpenbusProcessorFileTopology.class);
public static StormTopology buildTopology(Config conf) {
TridentTopology topology = new TridentTopology();
Stream stream = null;
List<String> fieldsWebLog = new ArrayList<String>();
fieldsWebLog.add("host");
fieldsWebLog.add("log");
fieldsWebLog.add("user");
fieldsWebLog.add("datetime");
fieldsWebLog.add("request");
fieldsWebLog.add("status");
fieldsWebLog.add("size");
fieldsWebLog.add("referer");
fieldsWebLog.add("userAgent");
fieldsWebLog.add("session");
fieldsWebLog.add("responseTime");
fieldsWebLog.add("timestamp");
fieldsWebLog.add("json");
SimpleFileStringSpout spout = new SimpleFileStringSpout("data/webserverlogs.json", "rawLogs");
spout.setCycle(true);
stream = topology.newStream("spout", spout);
stream = stream.each(new Fields("rawLogs"), new WebServerLog2Json(), new Fields(fieldsWebLog));
stream = stream.each(new Fields(fieldsWebLog), new WebServerLogFilter());
stream.each(new Fields("request", "datetime"), new DatePartition(), new Fields("cq", "cf"))
.groupBy(new Fields("request", "cq", "cf"))
.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
.newValuesStream()
.each(new Fields("request", "cq", "cf", "count"), new LogFilter());
stream.each(new Fields("user", "datetime"), new DatePartition(), new Fields("cq", "cf"))
.groupBy(new Fields("user", "cq", "cf"))
.persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))
.newValuesStream()
.each(new Fields("user", "cq", "cf", "count"), new LogFilter());
if (Constant.YES.equals(conf.get(Conf.PROP_OPENTSDB_USE))) {
LOG.info("OpenTSDB: " + conf.get(Conf.PROP_OPENTSDB_USE));
stream.groupBy(new Fields(fieldsWebLog)).aggregate(new Fields(fieldsWebLog), new WebServerLog2TSDB(), new Fields("count"))
.each(new Fields("request", "count"), new LogFilter());
}
if (Constant.YES.equals(conf.get(Conf.PROP_HDFS_USE))) {
LOG.info("HDFS: " + conf.get(Conf.PROP_HDFS_USE));
stream.each(new Fields(fieldsWebLog), new HDFSPersistence(), new Fields("result"))
.each(new Fields("result"), new LogFilter());
}
return topology.build();
}
public static void main(String[] args) throws Exception {
Config conf = new Config();
//conf.setDebug(true);
conf.put(Conf.PROP_BROKER_TOPIC, Conf.KAFKA_TOPIC);
conf.put(Conf.PROP_KAFKA_IDCLIENT, Conf.KAFKA_IDCLIENT);
conf.put(Conf.PROP_ZOOKEEPER_HOST, Conf.ZOOKEEPER_HOST + ":" + Conf.ZOOKEEPER_PORT);
conf.put(Conf.PROP_ZOOKEEPER_BROKER, Conf.ZOOKEEPER_BROKER);
conf.put(Conf.PROP_HBASE_TABLE_REQUEST, Conf.HBASE_TABLE_REQUEST);
conf.put(Conf.PROP_HBASE_ROWID_USER, Conf.HBASE_ROWID_USER);
conf.put(Conf.PROP_HBASE_TABLE_USER, Conf.HBASE_TABLE_USER);
conf.put(Conf.PROP_HBASE_ROWID_REQUEST, Conf.HBASE_ROWID_REQUEST);
conf.put(Conf.PROP_OPENTSDB_USE, Conf.OPENTSDB_USE);
conf.put(Conf.PROP_HDFS_USE, Conf.HDFS_USE);
if (args.length == 0) {
LOG.info("Storm mode local");
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("openbus", conf, buildTopology(conf));
Thread.sleep(2000);
//cluster.shutdown();
} else {
LOG.info("Storm mode cluster");
StormSubmitter.submitTopology("openbus", conf, buildTopology(conf));
Thread.sleep(2000);
}
}
}