/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.flume.agent.durability;
import java.io.IOException;
import com.cloudera.flume.conf.Context;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSource;
import com.cloudera.flume.handlers.endtoend.AckListener;
import com.cloudera.flume.handlers.rolling.RollSink;
import com.cloudera.flume.handlers.rolling.RollTrigger;
import com.cloudera.flume.handlers.rolling.Tagger;
import com.cloudera.flume.reporter.Reportable;
/**
* This is the interface for providing durability of events until the reach the
* permanent store. This is intended fo use as a write ahead log option that
* requires an ack before data can be eliminated. Different implementations can
* be encapsulated by this interface.
*
* Implementations of this interface must be thread safe -- it may be called
* from different threads.
*/
public interface WALManager extends Reportable, WALCompletionNotifier {
/**
* These are the states a batch of events can be in.
*
* IMPORT means external to the main flow of data.
*
* WRITING means that there is some entity writing to the set of data.
*
* LOGGED means that the data is durable on the node.
*
* SENDING means that data is durable and is being sent somewhere downstream
*
* SENT means that data has been received by the next downstream node.
*
* E2EACKED means that data has been received and processed by the end point
* node.
*
* ERROR means that the set of data contains an error.
*/
enum State {
IMPORT, WRITING, LOGGED, SENDING, SENT, E2EACKED, ERROR
};
final public static String A_IMPORTED = "imported";
final public static String A_WRITING = "writing";
final public static String A_LOGGED = "logged";
final public static String A_SENDING = "sending";
final public static String A_SENT = "sent";
final public static String A_ACKED = "acked";
final public static String A_RETRY = "retry";
final public static String A_ERROR = "error";
final public static String A_RECOVERED = "recovered";
final public static String A_IN_LOGGED = "loggedQ";
final public static String A_IN_SENT = "sentQ";
/**
* Open the WAL manager to enable reads and writes.
*/
public void open() throws IOException;
/**
* Closes the WAL manager for reads and writes
*/
public void stopDrains() throws IOException;
/**
* Recover data that is not in E2EACKED or RROR state assuming that data
* transmission has failed. This is called to recover durable data and retry
* sends after a crash.
*/
public void recover() throws IOException;
/**
* Get a new sink to write events to. Events are durably written before being
* transmitted. The tagger creates a unique name for the batch of events.
*/
public EventSink newWritingSink(Tagger t) throws IOException;
/**
* Gets an unacked batch. Read from the WAL by getting event sources from the
* WAL manager.
*/
public EventSource getUnackedSource() throws IOException;
/**
* Get a logical sink that breaks stream of data into mini batches
* transparently to the user. When the RollTrigger's predicate condition is
* met, the rolling sink closes the previous sink and calls newWritingSink to
* roll to a new writer.
*
* An ackQueue listener reference is passed to provide callback hooks that are
* called on different rolling sink transitions. (open, error, close).
*/
public RollSink getAckingSink(Context ctx, RollTrigger t,
final AckListener ackQueue, long checkMs) throws IOException;
/**
* Get a source that logically provides a single stream. This should call
* getUnackedSource() underneath to get another event batch.
*/
public EventSource getEventSource() throws IOException;
/**
* This imports "external data" into the WAL. Currently this is used for
* testing.
*
* TODO (jon) This interface is not quite right -- it should take a file and a
* format as an arg. This will be revisited when we revist the Log4J, Log4Net,
* and avro serialization integration.
*/
public void importData() throws IOException;
/**
* Returns true if the WAL has no logged entries. This is useful for
* completeness checking. (wait for after this is true before closing).
*/
boolean isEmpty();
}