/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.grok; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.nifi.serialization.MalformedRecordException; import org.apache.nifi.serialization.RecordReader; import org.apache.nifi.serialization.record.DataType; import org.apache.nifi.serialization.record.MapRecord; import org.apache.nifi.serialization.record.Record; import org.apache.nifi.serialization.record.RecordField; import org.apache.nifi.serialization.record.RecordFieldType; import org.apache.nifi.serialization.record.RecordSchema; import org.apache.nifi.serialization.record.util.DataTypeUtils; import io.thekraken.grok.api.Grok; import io.thekraken.grok.api.Match; public class GrokRecordReader implements RecordReader { private final BufferedReader reader; private final Grok grok; private final boolean append; private RecordSchema schema; private String nextLine; static final String STACK_TRACE_COLUMN_NAME = "stackTrace"; private static final Pattern STACK_TRACE_PATTERN = Pattern.compile( "^\\s*(?:(?: |\\t)+at )|" + "(?:(?: |\\t)+\\[CIRCULAR REFERENCE\\:)|" + "(?:Caused by\\: )|" + "(?:Suppressed\\: )|" + "(?:\\s+... \\d+ (?:more|common frames? omitted)$)"); public GrokRecordReader(final InputStream in, final Grok grok, final RecordSchema schema, final boolean append) { this.reader = new BufferedReader(new InputStreamReader(in)); this.grok = grok; this.schema = schema; this.append = append; } @Override public void close() throws IOException { reader.close(); } @Override public Record nextRecord() throws IOException, MalformedRecordException { final String line = nextLine == null ? reader.readLine() : nextLine; nextLine = null; // ensure that we don't process nextLine again if (line == null) { return null; } final RecordSchema schema = getSchema(); final Match match = grok.match(line); match.captures(); final Map<String, Object> valueMap = match.toMap(); if (valueMap.isEmpty()) { // We were unable to match the pattern so return an empty Object array. return new MapRecord(schema, Collections.emptyMap()); } // Read the next line to see if it matches the pattern (in which case we will simply leave it for // the next call to nextRecord()) or we will attach it to the previously read record. String stackTrace = null; final StringBuilder toAppend = new StringBuilder(); while ((nextLine = reader.readLine()) != null) { final Match nextLineMatch = grok.match(nextLine); nextLineMatch.captures(); final Map<String, Object> nextValueMap = nextLineMatch.toMap(); if (nextValueMap.isEmpty()) { // next line did not match. Check if it indicates a Stack Trace. If so, read until // the stack trace ends. Otherwise, append the next line to the last field in the record. if (isStartOfStackTrace(nextLine)) { stackTrace = readStackTrace(nextLine); break; } else if (append) { toAppend.append("\n").append(nextLine); } } else { // The next line matched our pattern. break; } } try { final List<DataType> fieldTypes = schema.getDataTypes(); final Map<String, Object> values = new HashMap<>(fieldTypes.size()); for (final RecordField field : schema.getFields()) { Object value = valueMap.get(field.getFieldName()); if (value == null) { for (final String alias : field.getAliases()) { value = valueMap.get(alias); if (value != null) { break; } } } final String fieldName = field.getFieldName(); if (value == null) { values.put(fieldName, null); continue; } final DataType fieldType = field.getDataType(); final Object converted = convert(fieldType, value.toString(), fieldName); values.put(fieldName, converted); } if (append && toAppend.length() > 0) { final String lastFieldName = schema.getField(schema.getFieldCount() - 1).getFieldName(); final int fieldIndex = STACK_TRACE_COLUMN_NAME.equals(lastFieldName) ? schema.getFieldCount() - 2 : schema.getFieldCount() - 1; final String lastFieldBeforeStackTrace = schema.getFieldNames().get(fieldIndex); final Object existingValue = values.get(lastFieldBeforeStackTrace); final String updatedValue = existingValue == null ? toAppend.toString() : existingValue + toAppend.toString(); values.put(lastFieldBeforeStackTrace, updatedValue); } values.put(STACK_TRACE_COLUMN_NAME, stackTrace); return new MapRecord(schema, values); } catch (final Exception e) { throw new MalformedRecordException("Found invalid log record and will skip it. Record: " + line, e); } } private boolean isStartOfStackTrace(final String line) { if (line == null) { return false; } // Stack Traces are generally of the form: // java.lang.IllegalArgumentException: My message // at org.apache.nifi.MyClass.myMethod(MyClass.java:48) // at java.lang.Thread.run(Thread.java:745) [na:1.8.0_60] // Caused by: java.net.SocketTimeoutException: null // ... 13 common frames omitted int index = line.indexOf("Exception: "); if (index < 0) { index = line.indexOf("Error: "); } if (index < 0) { return false; } if (line.indexOf(" ") < index) { return false; } return true; } private String readStackTrace(final String firstLine) throws IOException { final StringBuilder sb = new StringBuilder(firstLine); String line; while ((line = reader.readLine()) != null) { if (isLineInStackTrace(line)) { sb.append("\n").append(line); } else { nextLine = line; break; } } return sb.toString(); } private boolean isLineInStackTrace(final String line) { return STACK_TRACE_PATTERN.matcher(line).find(); } protected Object convert(final DataType fieldType, final String string, final String fieldName) { if (fieldType == null) { return string; } if (string == null) { return null; } // If string is empty then return an empty string if field type is STRING. If field type is // anything else, we can't really convert it so return null if (string.isEmpty() && fieldType.getFieldType() != RecordFieldType.STRING) { return null; } return DataTypeUtils.convertType(string, fieldType, fieldName); } @Override public RecordSchema getSchema() { return schema; } }