//
// Copyright 2010 Cinch Logic Pty Ltd.
//
// http://www.chililog.com
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package org.chililog.server.engine.parsers;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.regex.Pattern;
import org.apache.commons.lang.NullArgumentException;
import org.apache.commons.lang.StringUtils;
import org.chililog.server.common.ChiliLogException;
import org.chililog.server.common.TextTokenizer;
import org.chililog.server.data.MongoJsonParser;
import org.chililog.server.data.RepositoryEntryBO;
import org.chililog.server.data.RepositoryConfigBO;
import org.chililog.server.data.RepositoryParserConfigBO;
import org.chililog.server.data.RepositoryEntryBO.Severity;
import org.chililog.server.data.RepositoryParserConfigBO.AppliesTo;
import org.chililog.server.engine.RepositoryEntryMqMessage;
import com.mongodb.BasicDBObject;
/**
* <p>
* Parses incoming entries to extract fields and keywords
* </p>
* <p>
* This code is NOT designed for multi-threaded use. It should only be used in 1 thread.
* </p>
*/
public abstract class EntryParser {
private String _repoName;
private long _maxKeywords = 0;
private RepositoryParserConfigBO _repoParserInfo;
private Exception _lastParseError = null;
private Pattern _sourcePattern = null;
private String[] _sourceCSV = null;
private Pattern _hostPattern = null;
private String[] _hostCSV = null;
private SimpleDateFormat _dateFormat;
private TextTokenizer _tokenizer;
private MongoJsonParser _inputFieldsParser = null;
/**
* <p>
* Basic constructor
* </p>
*
* @param repoInfo
* Repository (for reporting errors)
* @param repoParserInfo
* Parser information that we need
* @throws ChiliLogException
*/
public EntryParser(RepositoryConfigBO repoInfo, RepositoryParserConfigBO repoParserInfo) {
if (repoInfo == null) {
throw new NullArgumentException("repoInfo is null");
}
if (repoParserInfo == null) {
throw new NullArgumentException("repoParserInfo is null");
}
_repoName = repoInfo.getName();
_repoParserInfo = repoParserInfo;
_maxKeywords = repoInfo.getStorageMaxKeywords();
if (repoParserInfo.getMaxKeywords() != RepositoryParserConfigBO.MAX_KEYWORDS_INHERITED) {
_maxKeywords = repoParserInfo.getMaxKeywords();
}
// Get our regular expression ready for matching source and host
if (_repoParserInfo.getAppliesTo() == AppliesTo.AllowFilteredCSV) {
if (!StringUtils.isBlank(_repoParserInfo.getAppliesToSourceFilter())) {
_sourceCSV = _repoParserInfo.getAppliesToSourceFilter().split(",");
for (int i = 0; i < _sourceCSV.length; i++) {
_sourceCSV[i] = _sourceCSV[i].trim();
}
}
if (!StringUtils.isBlank(_repoParserInfo.getAppliesToHostFilter())) {
_hostCSV = _repoParserInfo.getAppliesToHostFilter().split(",");
for (int i = 0; i < _hostCSV.length; i++) {
_hostCSV[i] = _hostCSV[i].trim();
}
}
} else if (_repoParserInfo.getAppliesTo() == AppliesTo.AllowFilteredRegularExpression) {
if (!StringUtils.isBlank(_repoParserInfo.getAppliesToSourceFilter())) {
_sourcePattern = Pattern.compile(_repoParserInfo.getAppliesToSourceFilter());
}
if (!StringUtils.isBlank(_repoParserInfo.getAppliesToHostFilter())) {
_hostPattern = Pattern.compile(_repoParserInfo.getAppliesToHostFilter());
}
}
// Dates for parsing timestamp
_dateFormat = RepositoryEntryMqMessage.getDateFormatter();
// Tokenizer for keyword extraction
_tokenizer = TextTokenizer.getInstance();
return;
}
/**
* Returns the name of the repository to which this parser it attached
*/
public String getRepoName() {
return _repoName;
}
/**
* Returns the parser meta data
*/
public RepositoryParserConfigBO getRepoParserInfo() {
return _repoParserInfo;
}
/**
* Returns the last error that happened during parsing
*/
public Exception getLastParseError() {
return _lastParseError;
}
/**
* Sets the last error
*
* @param lastParseError
* Exception thrown during parsing
*/
protected void setLastParseError(Exception lastParseError) {
_lastParseError = lastParseError;
}
/**
* Checks if this parser is applicable to the specified source and host
*
* @param source
* Application or service that created the log entry
* @param host
* Computer name or IP address
* @return True if this parser is to be used, False if not
*/
public boolean isApplicable(String source, String host) {
if (_repoParserInfo.getAppliesTo() == AppliesTo.All) {
return true;
} else if (_repoParserInfo.getAppliesTo() == AppliesTo.AllowFilteredCSV) {
if (!StringUtils.isBlank(source) && _sourceCSV != null) {
for (String s : _sourceCSV) {
if (s.equalsIgnoreCase(source)) {
return true;
}
}
}
if (!StringUtils.isBlank(host) && _hostCSV != null) {
for (String s : _hostCSV) {
if (s.equalsIgnoreCase(host)) {
return true;
}
}
}
} else if (_repoParserInfo.getAppliesTo() == AppliesTo.AllowFilteredRegularExpression) {
if (!StringUtils.isBlank(source) && _sourcePattern != null) {
return _sourcePattern.matcher(source).matches();
}
if (!StringUtils.isBlank(host) && _hostPattern != null) {
return _hostPattern.matcher(host).matches();
}
}
return false;
}
/**
* Checks the validity of our arguments before parsing
*
* @param source
* @param host
* @param serverity
* @param message
*/
protected void checkParseArguments(String timestamp, String source, String host, String serverity, String message) {
if (StringUtils.isBlank(timestamp)) {
throw new IllegalArgumentException("Entry timestamp is blank");
}
if (StringUtils.isBlank(source)) {
throw new IllegalArgumentException("Entry source is blank");
}
if (StringUtils.isBlank(host)) {
throw new IllegalArgumentException("Entry host is blank");
}
if (StringUtils.isBlank(serverity)) {
throw new IllegalArgumentException("Entry serverity is blank");
}
if (StringUtils.isBlank(message)) {
throw new IllegalArgumentException("Entry message is blank");
}
}
/**
* Parses the timestamp. Assumes the format is '2011-12-31T23:01:01.123Z'.
*
* @param timestamp
* @return Date
* @throws ParseException
*/
protected Date parseTimestamp(String timestamp) throws ParseException {
return _dateFormat.parse(timestamp);
}
/**
* Parses our message to look for keywords
*
* @param message
* Message to parse
* @return List of keywords
* @throws IOException
*/
protected ArrayList<String> parseKeywords(String source, String host, Severity severity, String message)
throws IOException {
StringBuilder sb = new StringBuilder();
ArrayList<String> l = _tokenizer.tokenize(message, _maxKeywords);
// Add source, host and severity to keywords so that it is indexed
// Our index consists of keywords and timestamp.
// See RepositoryEntryListCriteria for search parameters that use the s, h and v parameters.
sb.append("s=").append(source);
l.add(sb.toString());
sb.setLength(0);
sb.append("h=").append(host);
l.add(sb.toString());
sb.setLength(0);
sb.append("v=").append(severity.toCode());
l.add(sb.toString());
return l;
}
/**
* Read the pre-parsed input fields. Convert the JSON format into DBObject that can be stored by mongo.
*
* @param fields
* Fields pre-parsed by publishers in JSON format.
* @return Fields as mongo DBObject. If fields is null or empty, then an empty DBObject is returned.
*/
protected BasicDBObject readPreparsedFields(String fields) {
if (StringUtils.isBlank(fields)) {
return new BasicDBObject();
}
if (_inputFieldsParser == null) {
Pattern datePattern = RepositoryEntryMqMessage.getTimestampPattern();
Pattern longNumberPattern = RepositoryEntryMqMessage.getLongNumberPattern();
_inputFieldsParser = new MongoJsonParser(fields, datePattern, RepositoryEntryMqMessage.TIMESTAMP_FORMAT,
longNumberPattern);
}
return (BasicDBObject) _inputFieldsParser.parse();
}
/**
* Parse a string for fields. All exceptions are caught and logged. If <code>null</code> is returned, this indicates
* that the entry should be skipped.
*
* @param timetstamp
* Time when this log entry was created at the source on the host.
* @param source
* Name of the application or service that created this log entry
* @param host
* Identifies the device on which the source application or service is running. Should be full qualified
* domain name, static IP address, host name or dynamic IP address.
* @param severity
* Classifies the importance of the entry. Can be the severity code (0-7) or text.
* @param preparsedFields
* Pre-parsed fields in JSON format
* @param message
* Free-form message that provides information about the event
* @return <code>RepositoryEntryBO</code> ready for saving to mongoDB. If the entry is to be skipped and not written
* to mongoDB, then null is returned
*/
public abstract RepositoryEntryBO parse(String timetstamp,
String source,
String host,
String severity,
String preparsedFields,
String message);
}