/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.common;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class GrokTests extends ESTestCase {
private Map<String, String> basePatterns;
@Before
public void setup() throws IOException {
basePatterns = IngestCommonPlugin.loadBuiltinPatterns();
}
public void testMatchWithoutCaptures() {
String line = "value";
Grok grok = new Grok(basePatterns, "value");
Map<String, Object> matches = grok.captures(line);
assertEquals(0, matches.size());
}
public void testSimpleSyslogLine() {
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}");
Map<String, Object> matches = grok.captures(line);
assertEquals("evita", matches.get("logsource"));
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
assertEquals("connect from camomile.cloud9.net[168.100.1.3]", matches.get("message"));
assertEquals("postfix/smtpd", matches.get("program"));
assertEquals("1713", matches.get("pid"));
}
public void testSyslog5424Line() {
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
"Hello, syslog.";
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}");
Map<String, Object> matches = grok.captures(line);
assertEquals("191", matches.get("syslog5424_pri"));
assertEquals("1", matches.get("syslog5424_ver"));
assertEquals("2009-06-30T18:30:00+02:00", matches.get("syslog5424_ts"));
assertEquals("paxton.local", matches.get("syslog5424_host"));
assertEquals("grokdebug", matches.get("syslog5424_app"));
assertEquals("4123", matches.get("syslog5424_proc"));
assertEquals(null, matches.get("syslog5424_msgid"));
assertEquals("[id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"]", matches.get("syslog5424_sd"));
assertEquals("Hello, syslog.", matches.get("syslog5424_msg"));
}
public void testDatePattern() {
String line = "fancy 12-12-12 12:12:12";
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})");
Map<String, Object> matches = grok.captures(line);
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
}
public void testNilCoercedValues() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)");
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals(28.4f, matches.get("duration"));
matches = grok.captures("test N/A");
assertEquals(null, matches.get("duration"));
}
public void testNilWithNoCoercion() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)");
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals("28.4", matches.get("duration"));
matches = grok.captures("test N/A");
assertEquals(null, matches.get("duration"));
}
public void testUnicodeSyslog() {
Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
"%{GREEDYDATA:syslog_message}");
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
"SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: " +
"email@domain.no");
assertThat(matches.get("syslog_pri"), equalTo("22"));
assertThat(matches.get("syslog_program"), equalTo("postfix/policy-spf"));
assertThat(matches.get("tags"), nullValue());
}
public void testNamedFieldsWithWholeTextMatch() {
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}");
Map<String, Object> matches = grok.captures("11/01/01");
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
}
public void testWithOniguramaNamedCaptures() {
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)");
Map<String, Object> matches = grok.captures("hello world");
assertThat(matches.get("foo"), equalTo("hello"));
}
public void testISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
List<String> timeMessages = Arrays.asList(
"2001-01-01T00:00:00",
"1974-03-02T04:09:09",
"2010-05-03T08:18:18+00:00",
"2004-07-04T12:27:27-00:00",
"2001-09-05T16:36:36+0000",
"2001-11-06T20:45:45-0000",
"2001-12-07T23:54:54Z",
"2001-01-01T00:00:00.123456",
"1974-03-02T04:09:09.123456",
"2010-05-03T08:18:18.123456+00:00",
"2004-07-04T12:27:27.123456-00:00",
"2001-09-05T16:36:36.123456+0000",
"2001-11-06T20:45:45.123456-0000",
"2001-12-07T23:54:54.123456Z",
"2001-12-07T23:54:60.123456Z" // '60' second is a leap second.
);
for (String msg : timeMessages) {
assertThat(grok.match(msg), is(true));
}
}
public void testNotISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
List<String> timeMessages = Arrays.asList(
"2001-13-01T00:00:00", // invalid month
"2001-00-01T00:00:00", // invalid month
"2001-01-00T00:00:00", // invalid day
"2001-01-32T00:00:00", // invalid day
"2001-01-aT00:00:00", // invalid day
"2001-01-1aT00:00:00", // invalid day
"2001-01-01Ta0:00:00", // invalid hour
"2001-01-01T0:00:00", // invalid hour
"2001-01-01T25:00:00", // invalid hour
"2001-01-01T01:60:00", // invalid minute
"2001-01-01T00:aa:00", // invalid minute
"2001-01-01T00:00:aa", // invalid second
"2001-01-01T00:00:-1", // invalid second
"2001-01-01T00:00:61", // invalid second
"2001-01-01T00:00:00A", // invalid timezone
"2001-01-01T00:00:00+", // invalid timezone
"2001-01-01T00:00:00+25", // invalid timezone
"2001-01-01T00:00:00+2500", // invalid timezone
"2001-01-01T00:00:00+25:00", // invalid timezone
"2001-01-01T00:00:00-25", // invalid timezone
"2001-01-01T00:00:00-2500", // invalid timezone
"2001-01-01T00:00:00-00:61" // invalid timezone
);
for (String msg : timeMessages) {
assertThat(grok.match(msg), is(false));
}
}
public void testNoNamedCaptures() {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "Tal");
bank.put("EXCITED_NAME", "!!!%{NAME:name}!!!");
bank.put("TEST", "hello world");
String text = "wowza !!!Tal!!! - Tal";
String pattern = "%{EXCITED_NAME} - %{NAME}";
Grok g = new Grok(bank, pattern, false);
assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
assertEquals(true, g.match(text));
Object actual = g.captures(text);
Map<String, Object> expected = new HashMap<>();
expected.put("EXCITED_NAME_0", "!!!Tal!!!");
expected.put("NAME_21", "Tal");
expected.put("NAME_22", "Tal");
assertEquals(expected, actual);
}
public void testNumericCapturesCoercion() {
Map<String, String> bank = new HashMap<>();
bank.put("BASE10NUM", "(?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))");
bank.put("NUMBER", "(?:%{BASE10NUM})");
String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
Grok g = new Grok(bank, pattern);
String text = "12009.34 200 9032";
Map<String, Object> expected = new HashMap<>();
expected.put("bytes", 12009.34f);
expected.put("status", "200");
Map<String, Object> actual = g.captures(text);
assertEquals(expected, actual);
}
public void testApacheLog() {
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}");
Map<String, Object> matches = grok.captures(logLine);
assertEquals("31.184.238.164", matches.get("clientip"));
assertEquals("-", matches.get("ident"));
assertEquals("-", matches.get("auth"));
assertEquals("24/Jul/2014:05:35:37 +0530", matches.get("timestamp"));
assertEquals("GET", matches.get("verb"));
assertEquals("/logs/access.log", matches.get("request"));
assertEquals("1.0", matches.get("httpversion"));
assertEquals("200", matches.get("response"));
assertEquals("69849", matches.get("bytes"));
assertEquals("\"http://8rursodiol.enjin.com\"", matches.get("referrer"));
assertEquals(null, matches.get("port"));
assertEquals("\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.12785 " +
"YaBrowser/13.12.1599.12785 Safari/537.36\"", matches.get("agent"));
}
public void testComplete() {
Map<String, String> bank = new HashMap<>();
bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
bank.put("MONTH", "\\b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)" +
"?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\\b");
bank.put("MINUTE", "(?:[0-5][0-9])");
bank.put("YEAR", "(?>\\d\\d){1,2}");
bank.put("HOUR", "(?:2[0123]|[01]?[0-9])");
bank.put("SECOND", "(?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)");
bank.put("TIME", "(?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])");
bank.put("INT", "(?:[+-]?(?:[0-9]+))");
bank.put("HTTPDATE", "%{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}");
bank.put("WORD", "\\b\\w+\\b");
bank.put("BASE10NUM", "(?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\\.[0-9]+)?)|(?:\\.[0-9]+)))");
bank.put("NUMBER", "(?:%{BASE10NUM})");
bank.put("IPV6", "((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]" +
"\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4})" +
"{1,2})|:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3})|:))|(([0-9A-Fa-f]{1,4}:)" +
"{4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\" +
"d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]" +
"\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4})" +
"{1,5})" +
"|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))" +
"|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)" +
"(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}" +
":((25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)(\\.(25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]?\\d)){3}))|:)))(%.+)?");
bank.put("IPV4", "(?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.]" +
"(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])");
bank.put("IP", "(?:%{IPV6}|%{IPV4})");
bank.put("HOSTNAME", "\\b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\\.?|\\b)");
bank.put("IPORHOST", "(?:%{IP}|%{HOSTNAME})");
bank.put("USER", "[a-zA-Z0-9._-]+");
bank.put("DATA", ".*?");
bank.put("QS", "(?>(?<!\\\\)(?>\"(?>\\\\.|[^\\\\\"]+)+\"|\"\"|(?>'(?>\\\\.|[^\\\\']+)+')|''|(?>`(?>\\\\.|[^\\\\`]+)+`)|``))");
String text = "83.149.9.216 - - [19/Jul/2015:08:13:42 +0000] \"GET /presentations/logstash-monitorama-2013/images/" +
"kibana-dashboard3.png HTTP/1.1\" 200 171717 \"http://semicomplete.com/presentations/logstash-monitorama-2013/\" " +
"\"Mozilla" +
"/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\"";
String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " +
"HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";
Grok grok = new Grok(bank, pattern);
Map<String, Object> expected = new HashMap<>();
expected.put("clientip", "83.149.9.216");
expected.put("ident", "-");
expected.put("auth", "-");
expected.put("timestamp", "19/Jul/2015:08:13:42 +0000");
expected.put("verb", "GET");
expected.put("request", "/presentations/logstash-monitorama-2013/images/kibana-dashboard3.png");
expected.put("httpversion", "1.1");
expected.put("response", 200);
expected.put("bytes", 171717);
expected.put("referrer", "\"http://semicomplete.com/presentations/logstash-monitorama-2013/\"");
expected.put("agent", "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/32.0.1700.77 Safari/537.36\"");
Map<String, Object> actual = grok.captures(text);
assertEquals(expected, actual);
}
public void testNoMatch() {
Map<String, String> bank = new HashMap<>();
bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
assertThat(grok.captures("nomatch"), nullValue());
}
public void testMultipleNamedCapturesWithSameName() {
Map<String, String> bank = new HashMap<>();
bank.put("SINGLEDIGIT", "[0-9]");
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
Map<String, Object> expected = new HashMap<>();
expected.put("num", "1");
assertThat(grok.captures("12"), equalTo(expected));
}
}