package com.produban.openbus.trident;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Hex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
public class ParseProxy extends BaseFunction {
private static final Logger log = LoggerFactory.getLogger(ParseProxy.class);
public static final char SEPARADOR='\001'; //caracter SOH
public static Pattern pattern = Pattern.compile("(?<cabecera>(.{15}) (.*) ([^:]*)): \\s+((?<eventTimeStamp>(.{19})) (?<timeTaken>(\\d+)) (?<clientIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<User>(.*?)) (?<Group>(.*?)) (?<Exception>(.*?)) (?<filterResult>(\\S+)) (?<category>\"?([^\"]*)\"?) (?<referer>(\\S+))\\s+(?<responseCode>(\\d+)) (?<action>(\\S+)) (?<method>(\\S+)) (?<contentType>(\\S+)) (?<protocol>(\\S+)) (?<requestDomain>(\\S+)) (?<requestPort>(\\d+)) (?<requestPath>(\\S+)) (?<requestQuery>(.*?)) (?<requestURIExtension>(\\S+)) (?<userAgent>\"?([^\"]*)\"?) (?<serverIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<scBytes>(\\d+)) (?<csBytes>(\\d+)) (?<virusID>(\\S+)) (\\S+) (\\S+) (?<destinationIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}))?(.*))");
@Override
public void execute(TridentTuple tupla, TridentCollector colector) {
Matcher matcher = pattern.matcher(tupla.getString(0));
String resultado;
if (matcher.find()) {
//colector.emit(new Values(resultado));
colector.emit(new Values(matcher.group("eventTimeStamp"),
matcher.group("timeTaken"),
matcher.group("clientIP"),
matcher.group("User"),
matcher.group("Group"),
matcher.group("Exception"),
matcher.group("filterResult"),
matcher.group("category"),
matcher.group("referer"),
matcher.group("responseCode"),
matcher.group("action"),
matcher.group("method"),
matcher.group("contentType"),
matcher.group("protocol"),
matcher.group("requestDomain"),
matcher.group("requestPort"),
matcher.group("requestPath"),
matcher.group("requestQuery"),
matcher.group("requestURIExtension"),
matcher.group("userAgent"),
matcher.group("serverIP"),
matcher.group("scBytes"),
matcher.group("csBytes"),
matcher.group("virusID"),
matcher.group("destinationIP")));
}
}
}