/*
* visitante: Web analytic using Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, softwarSessionSummarizere
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.visitante.realtime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.chombo.storm.GenericBolt;
import org.chombo.storm.MessageHolder;
import org.chombo.util.ConfigUtility;
import org.chombo.util.Pair;
import backtype.storm.Config;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
/**
* Tracks pages visited for a session
* @author pranab
*
*/
public class VisitSessionBolt extends GenericBolt {
private static final long serialVersionUID = -4001182742881831041L;
private int tickFrequencyInSeconds;
private Map<String, SessionDetail> sessions = new HashMap<String, SessionDetail>();
private String logOutPattern;
private String pageIdPatternStr;
private Pattern pageIdPattern;
private long sessionTimeout;
private MessageHolder msg;
private List<String> expiredSessions = new ArrayList<String>();
private static final Logger LOG = LoggerFactory.getLogger(VisitSessionBolt.class);
public VisitSessionBolt(int tickFrequencyInSeconds) {
super();
this.tickFrequencyInSeconds = tickFrequencyInSeconds;
}
@Override
public Map<String, Object> getComponentConfiguration() {
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, tickFrequencyInSeconds);
return conf;
}
@Override
public void intialize(Map stormConf, TopologyContext context) {
debugOn = ConfigUtility.getBoolean(stormConf,"debug.on", false);
if (debugOn) {
}
pageIdPatternStr = ConfigUtility.getString(stormConf, "page.id.pattern");
pageIdPattern = Pattern.compile(pageIdPatternStr);
logOutPattern = ConfigUtility.getString(stormConf, "logOut.pattern");
sessionTimeout = ConfigUtility.getLong(stormConf, "session.timeOut");
}
@Override
public boolean process(Tuple input) {
boolean status = true;
outputMessages.clear();
if (isTickTuple(input)) {
//find sessions that have timed out
LOG.info("got tick tuple ");
expiredSessions.clear();
long expiryTime = System.currentTimeMillis() - sessionTimeout * 1000;
for (String sessionID : sessions.keySet()) {
SessionDetail sessDetail = sessions.get(sessionID);
List<Long> timeStamps = sessDetail.getLeft();
if ((timeStamps.get(timeStamps.size()-1)) < expiryTime) {
LOG.debug("found expired session");
String pageId = sessDetail.getRight();
if (pageId != null) {
msg = new MessageHolder();
msg.setMessage(new Values(pageId, timeStamps.size()));
outputMessages.add(msg);
LOG.debug("sent pageID:" + pageId + " session depth: " + timeStamps.size());
}
expiredSessions.add(sessionID);
}
}
for (String sessionID : expiredSessions) {
sessions.remove(sessionID);
}
} else {
String sessionID = input.getStringByField(VisitTopology.SESSION_ID);
long visitTime = input.getLongByField(VisitTopology.VISIT_TIME);
String url = input.getStringByField(VisitTopology.VISIT_URL);
Matcher matcher = pageIdPattern.matcher(url);
String pageId = matcher.find()? matcher.group(1) : null;
LOG.debug("pageId:" + pageId);
SessionDetail sessDetail = sessions.get(sessionID);
List<Long> timeStamps = null;
if (null == sessDetail) {
//new session
timeStamps = new ArrayList<Long>();
sessDetail = new SessionDetail(timeStamps, pageId);
sessions.put(sessionID, sessDetail);
LOG.debug("new session pageId:" + pageId);
} else {
timeStamps = sessDetail.getLeft();
if (null != pageId) {
sessDetail.setRight(pageId);
LOG.debug("existing session pageId:" + pageId);
}
}
timeStamps.add(visitTime);
if (url.contains(logOutPattern)) {
//send page count
LOG.debug("got logout page");
pageId = sessDetail.getRight();
if (null != pageId) {
msg = new MessageHolder();
msg.setMessage(new Values(pageId,timeStamps.size()));
outputMessages.add(msg);
LOG.debug("sent pageID:" + pageId + " session depth: " + timeStamps.size());
}
sessions.remove(sessionID);
}
}
return status;
}
@Override
public List<MessageHolder> getOutput() {
return outputMessages;
}
public static class SessionDetail extends Pair<List<Long>, String> {
public SessionDetail(List<Long> timeStamps, String pageId) {
super(timeStamps, pageId);
}
}
}