/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.handlers.syslog;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.handlers.text.EventExtractException;
import com.cloudera.flume.handlers.text.Extractor;
import com.google.common.base.Preconditions;
/**
* This uses the Extractor interface to take a DataInputStream an extract Events
* out. This in combination with removing syscalls to unixtime when
* instantiating EventImpls significantly improved performance when compared to
* the previous regex based approach.
*/
public class SyslogWireExtractor implements Extractor, SyslogConsts {
static final Logger LOG =
LoggerFactory.getLogger(SyslogWireExtractor.class);
static SyslogWireExtractor format = new SyslogWireExtractor();
static int calcSyslogPrio(Event e) {
int slPrio = 0;
byte[] fac = e.get(SYSLOG_FACILITY);
if (fac == null || fac.length != 1) {
slPrio = 1 * 8; // default to syslog facility.
} else {
slPrio = fac[0] * 8;
}
byte[] sev = e.get(SYSLOG_SEVERITY);
if (sev == null || sev.length != 1) {
slPrio += PRIO2SEVERITY[e.getPriority().ordinal()];
} else {
slPrio += sev[0];
}
return slPrio;
}
/**
* This is a version that removes unneeded character encoding and decoding
* steps.
*/
public byte[] toBytes(Event e) {
try {
int slPrio = calcSyslogPrio(e);
ByteArrayOutputStream bais = new ByteArrayOutputStream();
bais.write('<');
bais.write(("" + slPrio).getBytes());
bais.write('>');
bais.write(e.getBody());
bais.write('\n');
return bais.toByteArray();
} catch (IOException e1) {
// TODO Auto-generated catch block
LOG.warn("Ran out of bytes during extraction", e1);
}
return null;
}
public static Event extractEvent(DataInputStream in)
throws EventExtractException {
return format.extract(in);
// return format.extract(in);
}
enum Mode {
START, PRIO, DATA, ERR
};
static Event buildEvent(StringBuilder prio, ByteArrayOutputStream baos) {
int pri = Integer.parseInt(prio.toString());
byte[] facility = { (byte) (pri / 8) };
byte[] sev = { (byte) (pri % 8) };
// 15.2s 14.9s 15.2s
// Event e = new EventImpl(empty, 0,
// SyslogWireFormat.SEVERITY[sev[0]], 0, "localhost");
// 15.0s 15.0s 15.2s
// Event e =
// new EventImpl(baos.toByteArray(), 0, SEVERITY[sev[0]], 0, NetUtils
// .localhost());
// 15.7s 15.3s 14.9s
// Event e = new EventImpl(baos.toByteArray(), 0,
// SyslogWireFormat.SEVERITY[sev[0]], 0, "localhost");
// // Pick correctness over efficiency
// 27.1s (due to sys calls).
Event e = new EventImpl(baos.toByteArray());
// 24.5s 24.9s 25.6s (due to sys calls)
// Event e = new EventImpl(empty);
e.set(SYSLOG_FACILITY, facility);
e.set(SYSLOG_SEVERITY, sev);
return e;
}
/**
* This is basically a state machine implementation of the extract function.
* It uses a DataInputStream instead of a string to avoid the cost of string
* and character encoding
*/
public Event extract(DataInputStream in) throws EventExtractException {
Preconditions.checkNotNull(in);
Mode m = Mode.START;
StringBuilder prio = new StringBuilder();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte b = 0;
long cnt = 0;
try {
while (true) {
b = in.readByte();
cnt++;
switch (m) {
case START:
if (b == '<') {
m = Mode.PRIO;
} else {
m = Mode.ERR;
}
break;
case PRIO:
if (b == '>') {
m = Mode.DATA;
} else {
char ch = (char) b;
if (Character.isDigit(ch)) {
prio.append(ch); // stay in PRIO mode
} else {
m = Mode.ERR;
}
}
break;
case DATA:
if (b == '\n') {
Event e = buildEvent(prio, baos);
return e;
}
baos.write(b);
break;
case ERR:
// read until we get to a \n
if (b == '\n') {
throw new EventExtractException(
"Failed to extract syslog wire entry");
}
// stay in Mode.ERR;
break;
}
}
} catch (EOFException e) {
switch (m) {
case ERR:
// end of stream but was in error state? Throw extraction exception
throw new EventExtractException("Failed to extract syslog wire entry");
case DATA:
// end of stream but had data, return it.
return buildEvent(prio, baos);
default:
// if not in error state just return done;
return null;
}
} catch (IOException e) {
throw new EventExtractException("Failed to extract syslog wire entry: "
+ e.getMessage());
}
}
public static byte[] formatEventToBytes(Event e) {
return format.toBytes(e);
}
}