/*
* visitante: Web analytic using Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, softwarSessionSummarizere
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.visitante.realtime;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.chombo.storm.GenericSpout;
import org.chombo.storm.MessageHolder;
import org.chombo.storm.MessageQueue;
import org.chombo.util.ConfigUtility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Values;
public class VisitDepthSpout extends GenericSpout {
private String logQueue;
private String seesionRegex;
private DateFormat dF = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private int dateOrd;
private int timeOrd;
private int urlOrd;
private Pattern pattern;
private MessageQueue msgQueue;
private static final Logger LOG = LoggerFactory.getLogger(VisitDepthSpout.class);
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public void activate() {
// TODO Auto-generated method stub
}
@Override
public void deactivate() {
// TODO Auto-generated method stub
}
@Override
public Map<String, Object> getComponentConfiguration() {
// TODO Auto-generated method stub
return null;
}
@Override
public void intialize(Map stormConf, TopologyContext context) {
logQueue = ConfigUtility.getString(stormConf, "redis.log.queue");
msgQueue = MessageQueue.createMessageQueue(stormConf, logQueue);
dateOrd = ConfigUtility.getInt(stormConf, "date.ordinal");
timeOrd = ConfigUtility.getInt(stormConf, "time.ordinal");
urlOrd = ConfigUtility.getInt(stormConf, "url.ordinal");
seesionRegex = ConfigUtility.getString(stormConf, "session.regex");
pattern = Pattern.compile(seesionRegex);
debugOn = ConfigUtility.getBoolean(stormConf,"debug.on", false);
if (debugOn) {
}
}
@Override
public MessageHolder nextSpoutMessage() {
MessageHolder msgHolder = null;
String message = msgQueue.receive();
if(null != message) {
//message in event queue
String[] items = message.split("\\s+");
try {
Date date = dF.parse(items[dateOrd] + " " + items[timeOrd]);
long epochTime = date.getTime();
String url = items[urlOrd];
Matcher matcher = pattern.matcher(url);
String session = matcher.matches() ? matcher.group(1) : null;
if (null != session) {
Values values = new Values(session, epochTime, url);
msgHolder = new MessageHolder(values);
} else {
LOG.error("could not extract session id");;
}
} catch (ParseException e) {
LOG.error("invalid date format");;
}
}
return msgHolder;
}
@Override
public void handleFailedMessage(Values tuple) {
// TODO Auto-generated method stub
}
}