/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.handlers.endtoend; import java.io.IOException; import java.nio.ByteBuffer; import java.util.zip.CRC32; import com.cloudera.flume.agent.FlumeNode; import com.cloudera.flume.conf.Context; import com.cloudera.flume.conf.SinkFactory.SinkDecoBuilder; import com.cloudera.flume.core.Event; import com.cloudera.flume.core.EventImpl; import com.cloudera.flume.core.EventSink; import com.cloudera.flume.core.EventSinkDecorator; import com.cloudera.util.CharEncUtils; import com.cloudera.util.Clock; import com.cloudera.util.NetUtils; import com.google.common.base.Preconditions; /** * This first sends a tag/checksum start message with the initial time stamp as * the checksum. Then it tags each message that passes through with a hash of * the message body. When closed, it sends a end message that has a summarized * hash. (XOR of checksums of the messages). * * Rationale: XORing checksums is commutative and thus tolerant of messages that * come in the wrong order. This is assumed to be at a source and that all * messages pass through this. */ public class AckChecksumInjector<S extends EventSink> extends EventSinkDecorator<S> { public final static String ATTR_ACK_HASH = "AckChecksum"; public final static String ATTR_ACK_TYPE = "AckType"; public final static String ATTR_ACK_TAG = "AckTag"; public final static byte[] CHECKSUM_MSG = "msg".getBytes(CharEncUtils.RAW); public final static byte[] CHECKSUM_START = "beg".getBytes(CharEncUtils.RAW); public final static byte[] CHECKSUM_STOP = "end".getBytes(CharEncUtils.RAW); // TODO (jon) consult with someone to make sure this is reasonable. // Another idea is to use a bloom filter and use its bitmap as a signature. CRC32 chk = new CRC32(); // TODO (jon) switch to a different hash, like Paul Hsieh's SuperFastHash. // (no relation). This crc32 checksum actually only is 32 bits, so I'm // wasting space with 64 bits. long checksum; final byte[] tag; AckListener listener; // send notification to external objects public AckChecksumInjector(S s, byte[] tag, AckListener an) { super(s); // Although always currently called with tag == someString.getBytes(), // cloning is better practice. this.tag = tag.clone(); this.listener = an; checksum = 0; } /** * This is only for testing. */ public AckChecksumInjector(S s) { this(s, (NetUtils.localhost() + Clock.nanos()).getBytes(), new AckListener.Empty()); } /** * Open event starts with a random value that the checksum will be based off * of. * * Use the host and the nanos as a tag at the collector side. */ private Event openEvent() { Event e = new EventImpl(new byte[0]); e.set(ATTR_ACK_TYPE, CHECKSUM_START); checksum = e.getTimestamp(); e.set(ATTR_ACK_HASH, ByteBuffer.allocate(8).putLong(checksum).array()); e.set(ATTR_ACK_TAG, tag); return e; } /** * Close events has the cumulative checksum value */ private Event closeEvent() { Event e = new EventImpl(new byte[0]); e.set(ATTR_ACK_TYPE, CHECKSUM_STOP); e.set(ATTR_ACK_HASH, ByteBuffer.allocate(8).putLong(checksum).array()); e.set(ATTR_ACK_TAG, tag); return e; } /** * Send open event after open */ public void open() throws IOException { super.open(); super.append(openEvent()); // purposely using old append listener.start(new String(tag)); } /** * Send close event before close */ public void close() throws IOException { super.append(closeEvent()); // purposely using old append super.close(); listener.end(new String(tag)); } /** * Calculate the crc based on the body of the message and xor it into the * checksum. */ public void append(Event e) throws IOException { chk.reset(); chk.update(e.getBody()); long curchk = chk.getValue(); checksum ^= curchk; // update but do not send. e.set(ATTR_ACK_TYPE, CHECKSUM_MSG); e.set(ATTR_ACK_TAG, tag); e.set(ATTR_ACK_HASH, ByteBuffer.allocate(8).putLong(curchk).array()); super.append(e); } public static SinkDecoBuilder builder() { return new SinkDecoBuilder() { @Override public EventSinkDecorator<EventSink> build(Context context, String... argv) { Preconditions.checkArgument(argv.length == 0, "usage: ackInjector"); AckListener queuer = FlumeNode.getInstance().getAckChecker() .getAgentAckQueuer(); return new AckChecksumInjector<EventSink>(null, (NetUtils.localhost() + Clock.nanos()).getBytes(), queuer); } }; } }