/*
* Copyright 2013 Produban
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.produban.openbus.analysis;
import backtype.storm.tuple.Values;
import storm.trident.operation.TridentCollector;
import storm.trident.operation.TridentOperationContext;
import storm.trident.tuple.TridentTuple;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.produban.openbus.util.Common.join;
/**
* A parser for proxy logs. This class translates proxy log records into hashmaps with relevant fields.
*
*/
public class ProxyLogParser implements LogParser {
//Fields:
public static final String BATCHDATE = "BATCHDATE";
public static final String PROXYCLASS = "PROXYCLASS";
public static final String PROXYIP = "PROXYIP";
public static final String USER = "USER";
public static final String REQUESTDATE = "REQUESTDATE";
public static final String HTTPMETHOD = "HTTPMETHOD";
public static final String URL = "URL";
public static final String HTTPSTATUS = "HTTPSTATUS";
public static final String PORT = "PORT";
public static final String SQUIDRESULTCODE = "SQUIDRESULTCODE";
public static final String SQUIDHIERARCHYCODE = "SQUIDHIERARCHYCODE";
public static final String POLICY = "POLICY";
public static final String EXTRAFIELDS = "EXTRAFIELDS";
public static final String CLIENTIP = "CLIENTIP";
//Regex:
public static Pattern pattern = Pattern.compile("(?<"+BATCHDATE+">\\w+\\s\\d\\d\\s\\d\\d:\\d\\d:\\d\\d)\\s+\\w+\\s\\w+\\s+(?<"+PROXYCLASS+">(\\w+\\.?)+):?\\s+(?<"+PROXYIP+">(\\d+\\.?)+)\\s+\"?(?<"+USER+">(\\-|[^\"]+))\"?\\s+\\-\\s+\\[(?<"+REQUESTDATE+">[^\\]]*)\\]\\s+\"(?<"+HTTPMETHOD+">\\w+)\\s+(?<"+URL+">[^\"]+)\"\\s+(?<"+HTTPSTATUS+">\\d+)\\s+(?<"+PORT+">\\d+)\\s+(?<"+SQUIDRESULTCODE+">\\w+):(?<"+SQUIDHIERARCHYCODE+">\\w+)\\s+\\d+\\s+(?<"+POLICY+">[\\w|\\-]+)\\s+(?<"+EXTRAFIELDS+">\\<[^\\>]+\\>)[\\s|\\-]+client\\-ip\\s+\"(?<"+CLIENTIP+">[\\d|\\.]+)\"");
private HashMap<String, String> record = new HashMap<String, String>();
/**
* Translate a log line from a proxy log into a HashMap with relevant fields.
*
* @param logLine the raw log line to be processed
* @return a HashMap containing the extracted fields
*/
public HashMap<String,String> parse(String logLine){
Matcher matcher = pattern.matcher(logLine);
if (matcher.find()) {
record.put(BATCHDATE, matcher.group(BATCHDATE));
record.put(PROXYCLASS, matcher.group(PROXYCLASS));
record.put(PROXYIP, matcher.group(PROXYIP));
record.put(USER, matcher.group(USER).replaceAll("\\n", "\\\\n")); //some users contain the literal \n
record.put(REQUESTDATE, matcher.group(REQUESTDATE));
record.put(HTTPMETHOD, matcher.group(HTTPMETHOD));
record.put(URL, matcher.group(URL));
record.put(HTTPSTATUS, matcher.group(HTTPSTATUS));
record.put(PORT, matcher.group(PORT));
record.put(SQUIDRESULTCODE, matcher.group(SQUIDRESULTCODE));
record.put(SQUIDHIERARCHYCODE, matcher.group(SQUIDHIERARCHYCODE));
record.put(POLICY, matcher.group(POLICY));
record.put(EXTRAFIELDS, matcher.group(EXTRAFIELDS));
record.put(CLIENTIP, matcher.group(CLIENTIP));
}
return record;
}
@Override
public List<String> fieldNames() {
List<String> fieldNames = new ArrayList<>();
fieldNames.add(BATCHDATE);
fieldNames.add(PROXYCLASS);
fieldNames.add(PROXYIP);
fieldNames.add(USER);
fieldNames.add(REQUESTDATE);
fieldNames.add(HTTPMETHOD);
fieldNames.add(URL);
fieldNames.add(HTTPSTATUS);
fieldNames.add(PORT);
fieldNames.add(SQUIDRESULTCODE);
fieldNames.add(SQUIDHIERARCHYCODE);
fieldNames.add(POLICY);
fieldNames.add(EXTRAFIELDS);
fieldNames.add(CLIENTIP);
return fieldNames;
}
@Override
public void execute(TridentTuple objects, TridentCollector tridentCollector) {
String logLine = new String(objects.getBinary(0));
HashMap<String, String> parsedFields = this.parse(logLine);
Values tridentValues = new Values();
tridentValues.add(parsedFields.get(BATCHDATE));
tridentValues.add(parsedFields.get(PROXYCLASS));
tridentValues.add(parsedFields.get(PROXYIP));
tridentValues.add(parsedFields.get(USER));
tridentValues.add(parsedFields.get(REQUESTDATE));
tridentValues.add(parsedFields.get(HTTPMETHOD));
tridentValues.add(parsedFields.get(URL));
tridentValues.add(parsedFields.get(HTTPSTATUS));
tridentValues.add(parsedFields.get(PORT));
tridentValues.add(parsedFields.get(SQUIDRESULTCODE));
tridentValues.add(parsedFields.get(SQUIDHIERARCHYCODE));
tridentValues.add(parsedFields.get(POLICY));
tridentValues.add(parsedFields.get(EXTRAFIELDS));
tridentValues.add(parsedFields.get(CLIENTIP));
tridentCollector.emit(tridentValues);
}
@Override
public void prepare(Map map, TridentOperationContext tridentOperationContext) {
}
@Override
public void cleanup() {
}
}