/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.handlers.exec; import java.io.IOException; import java.nio.BufferOverflowException; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.util.LinkedList; import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.flume.conf.FlumeConfiguration; import com.cloudera.flume.conf.SourceFactory.SourceBuilder; import com.cloudera.flume.core.Attributes; import com.cloudera.flume.core.Event; import com.cloudera.flume.core.EventImpl; import com.cloudera.flume.core.EventSource; import com.cloudera.util.Clock; import com.cloudera.util.InputStreamPipe; import com.google.common.base.Preconditions; /** * Simple process output source. Uses threads to asynchronously read stdout and * stderr in order to ensure that system buffers are drained. * * Events are either returned line-by-line or aggregated into a single event * containing an entire process' output. * * TODO(henry) - expose more of 'exec' parameters to callers, like ENV and CWD * setting. * * This deprecated and slated for removal after more testing of its replacement, * ExecNioSource. */ @Deprecated public class ExecEventSource extends EventSource.Base { // Input sources ReadableByteChannel stdout = null; ReadableByteChannel stderr = null; // Two threads to read from each source ReaderThread readOut = null, readErr = null; // What command to run String command = null; Process proc = null; // Should we restart the script when it finishes? boolean restart = false; // Time to wait if restart is true int period = 0; // Return line by line, or aggregate into a single pair of events? boolean aggregate = false; // Flags used to signal the end of an input stream final AtomicBoolean errFinished = new AtomicBoolean(false); final AtomicBoolean outFinished = new AtomicBoolean(false); final BlockingQueue<EventImpl> eventQueue = new LinkedBlockingQueue<EventImpl>(); static final Logger LOG = LoggerFactory.getLogger(ExecEventSource.class); public static final String A_PROC_SOURCE = "procsource"; public static final String A_EXEC_CMD = "execcmd"; InputStreamPipe stdinISP = null, stderrISP = null; // Used to signal that both reader and err threads have exited CountDownLatch latch = new CountDownLatch(2); ExecEventSource(String command, boolean aggregate, boolean restart, int period) { this.command = command; this.aggregate = aggregate; this.restart = restart; this.period = period; } /** * Polls an input and formats lines read as events, places them on the event * queue. */ class ReaderThread extends Thread { ReadableByteChannel input = null; volatile boolean shutdown = false; String tag; AtomicBoolean signalDone; List<ByteBuffer> buffers = new LinkedList<ByteBuffer>(); ReaderThread(ReadableByteChannel input, String tag, AtomicBoolean signal) { super("ReaderThread (" + command + "-" + tag + ")"); Preconditions.checkArgument(input != null); Preconditions.checkArgument(signal != null); this.input = input; this.tag = tag; this.signalDone = signal; } /** * Blocks on a line of input to be available from an input stream; formats * as an event and then places it on a queue. */ public void run() { int maxEventSize = (int) FlumeConfiguration.get().getEventMaxSizeBytes(); // Aggregate events are copied twice, // individual events are copied three times (because they are split // from the original buffer) try { ByteBuffer in = ByteBuffer.allocate(32); ByteBuffer buf = ByteBuffer.allocate(maxEventSize); while (!shutdown) { in.clear(); // If interrupted, this throws an IOException int read = input.read(in); if (read == 0) { // don't burn cpu if nothing is read. Clock.sleep(100); continue; } if (read != -1) { if (!aggregate) { // Search for a '\n' in.rewind(); int lastFound = -1; for (int i = 0; i < read; ++i) { if (in.array()[i] == (byte) '\n') { // Take a shallow copy of the buffer ByteBuffer prefix = in.slice(); // Contract the copy to a single line of input prefix.limit(i); prefix.position(lastFound + 1); // Copy to the output buf.put(prefix); // Reset the position of the buffer to 0 and the limit to the // the end of the last write buf.flip(); // Offer as an event ByteBuffer b = ByteBuffer.allocate(buf.limit()); b.put(buf); EventImpl e = new EventImpl(b.array()); Attributes.setString(e, A_PROC_SOURCE, tag); Attributes.setString(e, A_EXEC_CMD, command); Attributes.setString(e, Event.A_SERVICE, "exec"); eventQueue.put(e); // Empty out the event buffer buf.clear(); lastFound = i; } } // After we have added all the '\n', we must fill the outgoing // buffer with what's remaining if (read != 0) { in.position(lastFound + 1); buf.put(in); } } else { if (read != 0) { buffers.add(in); in = ByteBuffer.allocate(32); } } } else { shutdown = true; } } } catch (InterruptedException e) { if (!shutdown) { LOG.warn(tag + " ReaderThread received " + "unexpected InterruptedException", e); } } catch (BufferOverflowException b) { // TODO: offer one full buffer? LOG.warn("Event was too large for buffer", b); } catch (IOException e) { if (!shutdown) { LOG.warn(tag + " ReaderThread received unexpected IOException", e); } } finally { // Make sure we offer as much as we can of the aggregate event - even // if there was an exception if (aggregate && buffers.size() > 0) { int total = 0; for (ByteBuffer b : buffers) { total += b.position(); } ByteBuffer eventBuf = ByteBuffer.allocate(total); for (ByteBuffer b : buffers) { b.flip(); eventBuf.put(b); } buffers.clear(); EventImpl e = new EventImpl(eventBuf.array()); Attributes.setString(e, A_PROC_SOURCE, tag); Attributes.setString(e, A_EXEC_CMD, command); Attributes.setString(e, Event.A_SERVICE, "exec"); try { eventQueue.put(e); } catch (InterruptedException i) { LOG.warn("Unable to append exec event to queue due " + "to InterruptedException", i); } } try { input.close(); } catch (IOException i) { LOG.warn("Failed to close input stream in ExecEventSource", i); } signalDone.set(true); latch.countDown(); } } void shutdown() { this.shutdown = true; if (this.input != null) { this.interrupt(); } } } public void close() throws IOException { // Note that this does not guarantee that any further next() calls will // return the EOF null that signals the process shut down. readOut.shutdown(); readErr.shutdown(); boolean latched = false; // Want to make sure that both threads have exited before we kill the // process try { latched = latch.await(5000, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { LOG.debug("Waiting for exec thread exit was interrupted", e); } stdinISP.shutdown(); stderrISP.shutdown(); if (proc != null) { proc.destroy(); proc = null; } if (!latched) { throw new IOException("Timeout waiting for exec threads to exit"); } } /** * Blocks on either output from stdout / stderr or process exit (at which * point it throws an exception) * * @return an Event with two tags: the stream which produced the line */ public Event next() throws IOException { EventImpl line = null; while (true) { try { line = eventQueue.poll(1000, TimeUnit.MILLISECONDS); if (line == null) { if (errFinished.get() && outFinished.get()) { // We may have missed events between waking up and testing line = eventQueue.poll(); if (line != null) { updateEventProcessingStats(line); return line; } if (restart) { close(); Thread.sleep(period); open(); } else { return null; } } } else { updateEventProcessingStats(line); return line; } } catch (InterruptedException e) { throw new RuntimeException("ExecEventSource was interrupted - " + e); } } } /** * Starts a Process and two threads to read from stdout / stderr */ public void open() throws IOException { if (proc != null) { throw new IllegalStateException("Tried to open exec process twice"); } latch = new CountDownLatch(2); outFinished.set(false); errFinished.set(false); proc = Runtime.getRuntime().exec(command); try { stdinISP = new InputStreamPipe(proc.getInputStream()); stderrISP = new InputStreamPipe(proc.getErrorStream()); stdout = (ReadableByteChannel) stdinISP.getChannel(); stderr = (ReadableByteChannel) stderrISP.getChannel(); } catch (IOException e) { proc.getInputStream().close(); proc.getErrorStream().close(); proc.destroy(); proc = null; throw e; } readOut = new ReaderThread(stdout, "STDOUT", outFinished); readErr = new ReaderThread(stderr, "STDERR", errFinished); stdinISP.start(); stderrISP.start(); readOut.start(); readErr.start(); } protected static class Builder extends SourceBuilder { /** * Takes 1-4 arguments - the command to run, whether to aggregate each * output as a single event, whether to restart after one execution is * finished, and how often if so to restart. */ @Override public EventSource build(String... argv) { Preconditions.checkArgument(argv.length >= 1 && argv.length <= 4, "exec(\"cmdline \"[,aggregate [,restart [,period]]]], )"); String command = argv[0]; boolean aggregate = false; boolean restart = false; int period = 0; if (argv.length >= 2) { aggregate = Boolean.parseBoolean(argv[1]); } if (argv.length >= 3) { restart = Boolean.parseBoolean(argv[2]); } if (argv.length >= 4) { period = Integer.parseInt(argv[3]); } return new ExecEventSource(command, aggregate, restart, period); } } /** * This builder creates a source that periodically execs a program and takes * the entire output as the body of a event. It takes two arguments - the * command to run, and a time period to sleep in millis before executing * again. */ public static SourceBuilder buildPeriodic() { return new SourceBuilder() { @Override public EventSource build(String... argv) { Preconditions.checkArgument(argv.length == 2, "execPeriodic(\"cmdline \",period)"); String command = argv[0]; boolean aggregate = true; boolean restart = true; int period = Integer.parseInt(argv[1]); return new ExecEventSource(command, aggregate, restart, period); } }; } /** * This builder creates a source that execs a long running program and takes * each line of input as the body of an event. It takes one arguemnt, the * command to run. If the command exits, the exec source returns null signally * end of records. */ public static SourceBuilder buildStream() { return new SourceBuilder() { @Override public EventSource build(String... argv) { Preconditions.checkArgument(argv.length == 1, "execStream(\"cmdline \")"); String command = argv[0]; boolean aggregate = false; boolean restart = false; int period = 0; return new ExecEventSource(command, aggregate, restart, period); } }; } public static SourceBuilder builder() { return new Builder(); } }