package com.produban.openbus.topologies;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
public class ProxyParser extends BaseFunction {
/**
*
*/
private static final long serialVersionUID = 1L;
// private static final Logger log =
// LoggerFactory.getLogger(PostfixParser.class);
private static final Logger log = LoggerFactory.getLogger(ProxyParser.class);
public static final char SEPARADOR = '\001'; // caracter SOH
public static Pattern pattern = Pattern
.compile("((?<eventTimeStamp>(.{19})) (?<timeTaken>(\\d+)) (?<clientIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<User>(.*?)) (?<Group>(.*?)) (?<Exception>(.*?)) (?<filterResult>(\\S+)) (?<category>\"?([^\"]*)\"?) (?<referer>(\\S+))\\s+(?<responseCode>(\\d+)) (?<action>(\\S+)) (?<method>(\\S+)) (?<contentType>(\\S+)) (?<protocol>(\\S+)) (?<requestDomain>(\\S+)) (?<requestPort>(\\d+)) (?<requestPath>(\\S+)) (?<requestQuery>(.*?)) (?<requestURIExtension>(\\S+)) (?<userAgent>\"?([^\"]*)\"?) (?<serverIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<scBytes>(\\d+)) (?<csBytes>(\\d+)) (?<virusID>(\\S+)) (\\S+) (\\S+) (?<destinationIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}))?(.*))");
private String origen;
public ProxyParser(String origen) {
this.origen = origen;
}
@Override
public void execute(TridentTuple tupla, TridentCollector colector) {
// TODO Auto-generated method stub
Matcher matcher = pattern.matcher("");
/*
* if(origen.equals("disco")){ matcher =
* pattern.matcher(tupla.getString(0)); }else{ matcher =
* pattern.matcher(new String((byte[]) tupla.toArray()[0])); }
*/
List objetos = tupla.getValues();
if (objetos.get(0) instanceof String) {
matcher = pattern.matcher(tupla.getString(0));
}
else if (objetos.get(0) instanceof byte[]) {
matcher = pattern.matcher(new String((byte[]) tupla.toArray()[0]));
}
if (matcher.find()) {
colector.emit(new Values(matcher.group("eventTimeStamp"), matcher.group("timeTaken"), matcher.group("clientIP"), matcher.group("User"), matcher.group("Group"), matcher
.group("Exception"), matcher.group("filterResult"), matcher.group("category"), matcher.group("referer"), matcher.group("responseCode"),
matcher.group("action"), matcher.group("method"), matcher.group("contentType"), matcher.group("protocol"), matcher.group("requestDomain"), matcher
.group("requestPort"), matcher.group("requestPath"), matcher.group("requestQuery"), matcher.group("requestURIExtension"), matcher.group("userAgent"),
matcher.group("serverIP"), matcher.group("scBytes"), matcher.group("csBytes"), matcher.group("virusID"), matcher.group("destinationIP")));
}
}
}