/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flume.serialization; import com.google.common.collect.Lists; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.annotations.InterfaceAudience; import org.apache.flume.annotations.InterfaceStability; import org.apache.flume.event.EventBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.charset.Charset; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; /** * A deserializer that parses text lines from a file. */ @InterfaceAudience.Private @InterfaceStability.Evolving public class LineDeserializer implements EventDeserializer { private static final Logger logger = LoggerFactory.getLogger (LineDeserializer.class); private final ResettableInputStream in; private final Charset outputCharset; private final int maxLineLength; private volatile boolean isOpen; public static final String OUT_CHARSET_KEY = "outputCharset"; public static final String CHARSET_DFLT = "UTF-8"; public static final String MAXLINE_KEY = "maxLineLength"; public static final int MAXLINE_DFLT = 2048; LineDeserializer(Context context, ResettableInputStream in) { this.in = in; this.outputCharset = Charset.forName( context.getString(OUT_CHARSET_KEY, CHARSET_DFLT)); this.maxLineLength = context.getInteger(MAXLINE_KEY, MAXLINE_DFLT); this.isOpen = true; } /** * Reads a line from a file and returns an event * @return Event containing parsed line * @throws IOException */ @Override public Event readEvent() throws IOException { ensureOpen(); String line = readLine(); if (line == null) { return null; } else { return EventBuilder.withBody(line, outputCharset); } } /** * Batch line read * @param numEvents Maximum number of events to return. * @return List of events containing read lines * @throws IOException */ @Override public List<Event> readEvents(int numEvents) throws IOException { ensureOpen(); List<Event> events = Lists.newLinkedList(); for (int i = 0; i < numEvents; i++) { Event event = readEvent(); if (event != null) { events.add(event); } else { break; } } return events; } @Override public void mark() throws IOException { ensureOpen(); in.mark(); } @Override public void reset() throws IOException { ensureOpen(); in.reset(); } @Override public void close() throws IOException { if (isOpen) { reset(); in.close(); isOpen = false; } } private void ensureOpen() { if (!isOpen) { throw new IllegalStateException("Serializer has been closed"); } } // TODO: consider not returning a final character that is a high surrogate // when truncating private String readLine() throws IOException { StringBuilder sb = new StringBuilder(); int c; int readChars = 0; while ((c = in.readChar()) != -1) { readChars++; // FIXME: support \r\n if (c == '\n') { break; } sb.append((char)c); if (readChars >= maxLineLength) { logger.warn("Line length exceeds max ({}), truncating line!", maxLineLength); break; } } if (readChars > 0) { return sb.toString(); } else { return null; } } public static class Builder implements EventDeserializer.Builder { @Override public EventDeserializer build(Context context, ResettableInputStream in) { return new LineDeserializer(context, in); } } }