/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.tools.rumen; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; class ParsedLine { Properties content; LogRecordType type; static final String KEY = "(\\w+)"; /** * The value string is enclosed in double quotation marks ('"') and * occurrences of '"' and '\' are escaped with a '\'. So the escaped value * string is essentially a string of escaped sequence ('\' followed by any * character) or any character other than '"' and '\'. * * The straightforward REGEX to capture the above is "((?:[^\"\\\\]|\\\\.)*)". * Unfortunately Java's REGEX implementation is "broken" that it does not * perform the NFA-to-DFA conversion and such expressions would lead to * backtracking and stack overflow when matching with long strings. The * following is a manual "unfolding" of the REGEX to get rid of backtracking. */ static final String VALUE = "([^\"\\\\]*+(?:\\\\.[^\"\\\\]*+)*+)"; /** * REGEX to match the Key-Value pairs in an input line. Capture group 1 * matches the key and capture group 2 matches the value (without quotation * marks). */ static final Pattern keyValPair = Pattern.compile(KEY + "=" + "\"" + VALUE + "\""); @SuppressWarnings("unused") ParsedLine(String fullLine, int version) { super(); content = new Properties(); int firstSpace = fullLine.indexOf(" "); if (firstSpace < 0) { firstSpace = fullLine.length(); } if (firstSpace == 0) { return; // This is a junk line of some sort } type = LogRecordType.intern(fullLine.substring(0, firstSpace)); String propValPairs = fullLine.substring(firstSpace + 1); Matcher matcher = keyValPair.matcher(propValPairs); while(matcher.find()){ String key = matcher.group(1); String value = matcher.group(2); content.setProperty(key, value); } } protected LogRecordType getType() { return type; } protected String get(String key) { return content.getProperty(key); } protected long getLong(String key) { String val = get(key); return Long.parseLong(val); } }