package com.lucidworks.storm.io.parsers;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.TimeZone;
import oi.thekraken.grok.api.Grok;
import oi.thekraken.grok.api.Match;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class GrokLogLineParser implements LogLineParser {
public static Logger log = LoggerFactory.getLogger(GrokLogLineParser.class);
public static final String ISO_8601_TIMESTAMP_FIELD_PROP = "iso8601TimestampFieldName";
public static final String LOG_DATE_FIELD_PROP = "dateFieldName";
public static final String LOG_DATE_FORMAT_PROP = "dateFieldFormat";
protected Grok grok;
protected String grokPatternFile;
protected String grokPattern;
protected String dateFieldName;
protected String dateFieldFormat;
protected String timestampFieldName;
protected ThreadLocal<SimpleDateFormat> df = null;
protected ThreadLocal<SimpleDateFormat> iso8601 = null;
public GrokLogLineParser(String grokPatternFile,
String grokPattern,
String timestampFieldName,
String dateFieldName,
String dateFieldFormat)
throws Exception
{
if (grokPatternFile == null || grokPatternFile.isEmpty())
throw new IllegalArgumentException("Must specify a Grok pattern file!");
if (grokPattern == null || grokPattern.isEmpty())
throw new IllegalArgumentException("Must specify a Grok pattern!");
this.grokPatternFile = grokPatternFile;
this.grokPattern = grokPattern;
if (grokPatternFile.startsWith("patterns/")) {
// load built-in from classpath
grok = new Grok();
InputStreamReader isr = null;
try {
InputStream in = getClass().getClassLoader().getResourceAsStream(grokPatternFile);
if (in == null)
throw new FileNotFoundException(grokPatternFile+" not found on classpath!");
isr = new InputStreamReader(in, StandardCharsets.UTF_8);
grok.addPatternFromReader(isr);
} finally {
if (isr != null) {
try {
isr.close();
} catch (Exception ignore){}
}
}
} else {
// initialize from an external file
grok = Grok.create(grokPatternFile);
}
grok.compile(grokPattern);
// optionally, we can set the iso 8601 timestamp field on each log message by parsing a custom date in the log
this.timestampFieldName = timestampFieldName;
this.dateFieldName = dateFieldName;
this.dateFieldFormat = dateFieldFormat;
if (this.timestampFieldName != null) {
if (this.dateFieldFormat != null) {
df = new ThreadLocal<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
SimpleDateFormat sdf = new SimpleDateFormat(GrokLogLineParser.this.dateFieldFormat);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf;
}
};
iso8601 = new ThreadLocal<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf;
}
};
}
log.info("Configured "+getClass().getSimpleName()+" to set the "+ timestampFieldName+
" field to an ISO-8601 timestamp by parsing "+dateFieldName+" using format: "+dateFieldFormat);
}
}
public Map<String, Object> parseLine(String fileName, int lineNum, String line) throws Exception {
if (line == null || line.isEmpty())
return null;
Match gm = grok.match(line);
gm.captures();
if (gm.isNull())
return null;
Map<String,Object> grokMap = gm.toMap();
// add the ISO-8601 timestamp field if was requested in the config
if (timestampFieldName != null) {
Date timestamp = getLogDate(grokMap);
if (timestamp != null) {
grokMap.put(timestampFieldName, iso8601.get().format(timestamp));
}
}
return grokMap;
}
protected Date getLogDate(Map<String,Object> grokMap) throws ParseException {
Date timestamp = null;
if (dateFieldName != null) {
Object dateFieldValue = grokMap.get(dateFieldName);
if (dateFieldValue != null) {
timestamp = df.get().parse((String)dateFieldValue);
}
}
return timestamp;
}
@Override
public String toString() {
return getClass().getSimpleName()+": "+grokPattern;
}
}