/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * * This class is taken from on the DataBricks "Log Analysis with Spark" reference application. * The original file may be found here: * http://databricks.gitbooks.io/databricks-spark-reference-applications/content/logs_analyzer/ * chapter1/java8/src/main/java/com/databricks/apps/logs/ApacheAccessLog.java * * Attribution-NonCommercial 3.0 Unported: * http://databricks.gitbooks.io/databricks-spark-reference-applications/content/LICENSE * */ package co.cask.cdap.examples.loganalysis; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Serializable; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This class represents an Apache access log line. * See http://httpd.apache.org/docs/2.2/logs.html for more details. */ public class ApacheAccessLog implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(ApacheAccessLog.class); private String ipAddress; private String clientIdentd; private String userID; private String dateTimeString; private String method; private String endpoint; private String protocol; private int responseCode; private long contentSize; private ApacheAccessLog(String ipAddress, String clientIdentd, String userID, String dateTime, String method, String endpoint, String protocol, String responseCode, String contentSize) { this.ipAddress = ipAddress; this.clientIdentd = clientIdentd; this.userID = userID; this.dateTimeString = dateTime; this.method = method; this.endpoint = endpoint; this.protocol = protocol; this.responseCode = Integer.parseInt(responseCode); this.contentSize = Long.parseLong(contentSize); } public String getIpAddress() { return ipAddress; } public String getClientIdentd() { return clientIdentd; } public String getUserID() { return userID; } public String getDateTimeString() { return dateTimeString; } public String getMethod() { return method; } public String getEndpoint() { return endpoint; } public String getProtocol() { return protocol; } public int getResponseCode() { return responseCode; } public long getContentSize() { return contentSize; } public void setIpAddress(String ipAddress) { this.ipAddress = ipAddress; } public void setClientIdentd(String clientIdentd) { this.clientIdentd = clientIdentd; } public void setUserID(String userID) { this.userID = userID; } public void setDateTimeString(String dateTimeString) { this.dateTimeString = dateTimeString; } public void setMethod(String method) { this.method = method; } public void setEndpoint(String endpoint) { this.endpoint = endpoint; } public void setProtocol(String protocol) { this.protocol = protocol; } public void setResponseCode(int responseCode) { this.responseCode = responseCode; } public void setContentSize(long contentSize) { this.contentSize = contentSize; } // Example Apache log line: // 127.0.0.1 - - [21/Jul/2014:9:55:27 -0800] "GET /home.html HTTP/1.1" 200 2048 private static final String LOG_ENTRY_PATTERN = // 1:IP 2:client 3:user 4:date time 5:method 6:req 7:proto 8:respcode 9:size "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)"; private static final Pattern PATTERN = Pattern.compile(LOG_ENTRY_PATTERN); public static ApacheAccessLog parseFromLogLine(String logline) { Matcher m = PATTERN.matcher(logline); if (!m.find()) { LOG.error("Cannot parse logline" + logline); throw new RuntimeException("Error parsing logline"); } return new ApacheAccessLog(m.group(1), m.group(2), m.group(3), m.group(4), m.group(5), m.group(6), m.group(7), m.group(8), m.group(9)); } @Override public String toString() { return String.format("%s %s %s [%s] \"%s %s %s\" %s %s", ipAddress, clientIdentd, userID, dateTimeString, method, endpoint, protocol, responseCode, contentSize); } }