/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.agent; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.flume.core.Attributes; import com.cloudera.flume.handlers.endtoend.AckListener; import com.cloudera.flume.reporter.ReportEvent; import com.cloudera.flume.reporter.Reportable; import com.cloudera.util.Clock; import com.google.common.base.Preconditions; /** * This class handles ack checking against the master. * * Here's how acks work. Agents attach extra checksuming/hash data to messages. * when these are received by the collector, and the values match, an ack for * the group's label is sent to the master. Meanwhile, the agent has a list of * acks it is expecting and periodically checks the master to see which have * arrived. * * Potential problems: the master will end up maintaining alot of acks. possible * solutions: 0 ) punt. (which is what this first cut will do) 1) Force an * order. This ends up working alot like sequence numbers then but bounds state * 2) Time out values acks. Worst case the acks get missed and the data gets * sent again. 3) Send notification that ack is received. This allows memory to * be reclaimed proactively. 4) Stale acks get consolidated into larger ack * groups. (latency is less important if we have alot of stale stuff, throughput * more important) * * TODO (jon) Rename to SenderAckManager * * TODO (jon) decouple acks from the WAL */ public class WALAckManager implements Reportable { private static final String A_RETRANSMIT_TIMEOUT = "retransmitTimeout"; private static final String A_PENDING_ACK_INFO = "pendingAckInfo"; static final Logger LOG = LoggerFactory.getLogger(WALAckManager.class); // a pending set of acks final ConcurrentHashMap<String, Long> pending = new ConcurrentHashMap<String, Long>(); MasterRPC client; final AckListener queuer = new PendingAckQueuer(); final AckListener listener; final long retransmitTime; WALAckManager(MasterRPC c, AckListener listener, long ackRetransmit) { Preconditions.checkNotNull(c); Preconditions.checkNotNull(listener); this.client = c; this.listener = listener; this.retransmitTime = ackRetransmit; } /** * This is a handler for the ChecksumAckInjector to put tags and checksums it * expects. before it can delete things. */ class PendingAckQueuer extends AckListener.Empty { @Override public void end(String group) throws IOException { long now = Clock.unixTime(); LOG.info("Ack for " + group + " is queued to be checked"); synchronized (pending) { pending.put(group, now); } } }; public AckListener getAgentAckQueuer() { return queuer; } /** * This contacts the master to find if any of the pending acks are completed, */ synchronized public void checkAcks() { LOG.debug("agent acks waiting for master: " + pending); // TODO (make this a batch operation with only one RPC call) List<String> done = new ArrayList<String>(); for (String k : pending.keySet()) { try { boolean acked = client.checkAck(k); if (acked) { done.add(k); } } catch (IOException e) { // TODO (jon) there is a potential inconsistency here if master comms // fail (but this is recovered when retry happens). LOG.error("Master connection exception", e); } } for (String k : done) { try { listener.end(k); pending.remove(k); LOG.debug("removed ack tag from agent's ack queue: " + k); } catch (IOException e) { LOG.error("problem notifying agent pending ack queue", e); } } } /** * This checks the pending table to see if any acks have been idle for too * long and need to be retried. */ synchronized void checkRetry() { long now = Clock.unixTime(); List<String> retried = new ArrayList<String>(); for (Entry<String, Long> ack : pending.entrySet()) { if (now - ack.getValue() > retransmitTime) { // retransmit.. enqueue to retransimt.... move it back to agent dir.. // (lame but good enough for now) try { LOG.info("Retransmitting " + ack.getKey()); listener.expired(ack.getKey()); retried.add(ack.getKey()); } catch (IOException e) { LOG.error("problem notifying agent pending ack queue", e); } } } // update the time of entries to retry for (String key : retried) { pending.put(key, now); } } synchronized void forceRetry() { long now = Clock.unixTime(); List<String> retried = new ArrayList<String>(); for (Entry<String, Long> ack : pending.entrySet()) { // retransmit.. enqueue to retransimt.... move it back to agent dir.. // (lame but good enough for now) try { LOG.info("Retransmitting " + ack.getKey()); listener.expired(ack.getKey()); retried.add(ack.getKey()); } catch (IOException e) { LOG.error("problem notifying agent pending ack queue", e); } } // update the time of entries to retry for (String key : retried) { pending.put(key, now); } } @Override public String getName() { return "AgentWALAckManager"; } @Override synchronized public ReportEvent getReport() { ReportEvent rpt = new ReportEvent(getName()); Attributes.setLong(rpt, A_RETRANSMIT_TIMEOUT, retransmitTime); StringBuilder pendingAcks = new StringBuilder(); for (Map.Entry<String, Long> e : pending.entrySet()) { pendingAcks.append(e.getKey()); pendingAcks.append(":"); pendingAcks.append(new Date(e.getValue()).toString()); pendingAcks.append(", "); } Attributes.setString(rpt, A_PENDING_ACK_INFO, pendingAcks.toString()); return rpt; } public Set<String> getPendingAckTags() { return Collections.unmodifiableSet(pending.keySet()); } }