package com.produban.openbus.topologies; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import backtype.storm.tuple.Values; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.tuple.TridentTuple; public class ProxyParser extends BaseFunction{ /** * */ private static final long serialVersionUID = 1L; //private static final Logger log = LoggerFactory.getLogger(PostfixParser.class); private static final Logger log = LoggerFactory.getLogger(ProxyParser.class); public static final char SEPARADOR='\001'; //caracter SOH public static Pattern pattern = Pattern.compile("((?<eventTimeStamp>(.{19})) (?<timeTaken>(\\d+)) (?<clientIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<User>(.*?)) (?<Group>(.*?)) (?<Exception>(.*?)) (?<filterResult>(\\S+)) (?<category>\"?([^\"]*)\"?) (?<referer>(\\S+))\\s+(?<responseCode>(\\d+)) (?<action>(\\S+)) (?<method>(\\S+)) (?<contentType>(\\S+)) (?<protocol>(\\S+)) (?<requestDomain>(\\S+)) (?<requestPort>(\\d+)) (?<requestPath>(\\S+)) (?<requestQuery>(.*?)) (?<requestURIExtension>(\\S+)) (?<userAgent>\"?([^\"]*)\"?) (?<serverIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3})) (?<scBytes>(\\d+)) (?<csBytes>(\\d+)) (?<virusID>(\\S+)) (\\S+) (\\S+) (?<destinationIP>(\\d{1,3}.\\d{1,3}.\\d{1,3}.\\d{1,3}))?(.*))"); private String origen; public ProxyParser(String origen){ this.origen=origen; } @Override public void execute(TridentTuple tupla, TridentCollector colector) { // TODO Auto-generated method stub Matcher matcher=pattern.matcher(""); /* if(origen.equals("disco")){ matcher = pattern.matcher(tupla.getString(0)); }else{ matcher = pattern.matcher(new String((byte[]) tupla.toArray()[0])); }*/ List objetos =tupla.getValues(); if (objetos.get(0) instanceof String){ matcher = pattern.matcher(tupla.getString(0)); }else if(objetos.get(0) instanceof byte[]){ matcher = pattern.matcher(new String((byte[]) tupla.toArray()[0])); } if(matcher.find()){ colector.emit(new Values( matcher.group("eventTimeStamp") ,matcher.group("timeTaken") ,matcher.group("clientIP") ,matcher.group("User") ,matcher.group("Group") ,matcher.group("Exception") ,matcher.group("filterResult") ,matcher.group("category") ,matcher.group("referer") ,matcher.group("responseCode") ,matcher.group("action") ,matcher.group("method") ,matcher.group("contentType") ,matcher.group("protocol") ,matcher.group("requestDomain") ,matcher.group("requestPort") ,matcher.group("requestPath") ,matcher.group("requestQuery") ,matcher.group("requestURIExtension") ,matcher.group("userAgent") ,matcher.group("serverIP") ,matcher.group("scBytes") ,matcher.group("csBytes") ,matcher.group("virusID") ,matcher.group("destinationIP"))); } } }