/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.importclient.kinesis; import java.math.BigInteger; import java.net.URI; import java.util.Arrays; import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang.math.NumberUtils; import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.internal.StaticCredentialsProvider; import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException; import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer; import com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessor; import com.amazonaws.services.kinesis.clientlibrary.interfaces.v2.IRecordProcessorFactory; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration; import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker; import com.amazonaws.services.kinesis.clientlibrary.types.InitializationInput; import com.amazonaws.services.kinesis.clientlibrary.types.ProcessRecordsInput; import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownInput; import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason; import com.amazonaws.services.kinesis.model.Record; import org.voltcore.logging.Level; import org.voltdb.client.ClientResponse; import org.voltdb.client.ProcedureCallback; import org.voltdb.importer.AbstractImporter; import org.voltdb.importer.Invocation; import org.voltdb.importer.formatter.FormatException; import org.voltdb.importer.formatter.Formatter; /** * Importer implementation for Kinesis Stream importer. one instance of this per stream, per shard, per app */ public class KinesisStreamImporter extends AbstractImporter { private KinesisStreamImporterConfig m_config; private AtomicLong m_submitCount = new AtomicLong(0); private AtomicLong m_cbcnt = new AtomicLong(0); private Worker m_worker; public KinesisStreamImporter(KinesisStreamImporterConfig config) { m_config = config; } @Override public URI getResourceID() { return m_config.getResourceID(); } @Override public void accept() { info(null, "Starting data stream fetcher for " + m_config.getResourceID().toString()); try { KinesisClientLibConfiguration kclConfig = new KinesisClientLibConfiguration(m_config.getAppName(), m_config.getStreamName(), credentials(), UUID.randomUUID().toString()); kclConfig.withRegionName(m_config.getRegion()).withMaxRecords((int) m_config.getMaxReadBatchSize()) .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON) .withIdleTimeBetweenReadsInMillis(m_config.getIdleTimeBetweenReads()) .withTaskBackoffTimeMillis(m_config.getTaskBackoffTimeMillis()).withKinesisClientConfig( KinesisStreamImporterConfig.getClientConfigWithUserAgent(m_config.getAppName())); m_worker = new Worker.Builder().recordProcessorFactory(new RecordProcessorFactory()).config(kclConfig) .build(); m_worker.run(); } catch (RuntimeException e) { //aws silences all the exceptions but IllegalArgumentException, throw RuntimeException. rateLimitedLog(Level.ERROR, e, "Error in Kinesis stream importer %s", m_config.getResourceID()); if (null != m_worker) m_worker.shutdown(); } info(null, "Data stream fetcher stopped for %s. Callback Rcvd: %d. Submitted: %d", m_config.getResourceID().toString(), m_cbcnt.get(), m_submitCount.get()); } @Override public void stop() { if (null != m_worker) { m_worker.shutdown(); } } @Override public String getName() { return KinesisStreamImporterConfig.APP_NAME; } /** * Create AWSCredentialsProvider with access key id and secret key for the * user. The user should have read/write permission to Kinesis Stream and DynamoDB * @return AWSCredentialsProvider Provides credentials used to sign AWS requests * @throws AmazonClientException */ public AWSCredentialsProvider credentials() throws AmazonClientException { return new StaticCredentialsProvider(new BasicAWSCredentials(m_config.getAccessKey(), m_config.getSecretKey())); } private class RecordProcessorFactory implements IRecordProcessorFactory { @Override public IRecordProcessor createProcessor() { return new StreamConsumer(); } } private class StreamConsumer implements IRecordProcessor { private String m_shardId; private Formatter m_formatter; Gap m_gapTracker = new Gap(Integer.getInteger("KINESIS_IMPORT_GAP_LEAD", 32768)); private BigInteger m_lastFetchCommittedSequenceNumber = BigInteger.ZERO; public StreamConsumer() { } @SuppressWarnings("unchecked") @Override public void initialize(InitializationInput initInput) { m_shardId = initInput.getShardId(); m_formatter = m_config.getFormatterBuilder().create(); String seq = initInput.getExtendedSequenceNumber().getSequenceNumber(); if (NumberUtils.isDigits(seq)) { m_lastFetchCommittedSequenceNumber = new BigInteger(seq); } info(null, "Initializing Kinesis stream processing for shard %s, last committed on: %s", m_shardId, seq); } @Override public void processRecords(ProcessRecordsInput records) { if (records.getRecords().isEmpty()) { return; } BigInteger seq = BigInteger.ZERO; m_gapTracker.resetTo(); int offset = 0; for (Record record : records.getRecords()) { m_submitCount.incrementAndGet(); BigInteger seqNum = new BigInteger(record.getSequenceNumber()); if (seqNum.compareTo(m_lastFetchCommittedSequenceNumber) < 0) { continue; } if (isDebugEnabled()) { debug(null, "last committed seq: %s, current seq:%s shard %s", m_lastFetchCommittedSequenceNumber.toString(), record.getSequenceNumber(), m_shardId); if (seqNum.compareTo(seq) < 0) { debug(null, "Record %d is out of sequence on shard %s", seqNum, m_shardId); } else { seq = seqNum; } } Object params[] = null; try { params = m_formatter.transform(record.getData()); Invocation invocation = new Invocation(m_config.getProcedure(), params); StreamProcedureCallback cb = new StreamProcedureCallback(m_gapTracker, offset, seqNum, m_cbcnt); if (!callProcedure(invocation, cb)) { rateLimitedLog(Level.ERROR, null, "Call procedure error on shard %s", m_shardId); m_gapTracker.commit(offset, seqNum); } } catch (FormatException e) { rateLimitedLog(Level.ERROR, e, "Data error on shard %s, data: %s", m_shardId, Arrays.toString(params)); m_gapTracker.commit(offset, seqNum); } if (!shouldRun()) { break; } offset++; } commitCheckPoint(records.getCheckpointer()); } @Override public void shutdown(ShutdownInput shutDownInput) { if (ShutdownReason.TERMINATE.equals(shutDownInput.getShutdownReason())) { //The shard may be split or merged. checkpoint one last time commitCheckPoint(shutDownInput.getCheckpointer()); } } /** * set a checkpoint to dynamoDB so we know the records prior to the point are processed by this app * @param checkpointer The checkpoint processor */ private void commitCheckPoint(IRecordProcessorCheckpointer checkpointer) { int retries = 1; while (retries < 4 && shouldRun()) { final BigInteger safe = m_gapTracker.getSafeCommitPoint(); if (safe == null) { break; } if (isDebugEnabled()) { debug(null, "New checkpoint %s, last checkpoint %s on shard %s", safe.toString(), m_lastFetchCommittedSequenceNumber.toString(), m_shardId); } if (safe.compareTo(m_lastFetchCommittedSequenceNumber) > 0) { if (isDebugEnabled()) { debug(null, "Trying to checkpoint %s on shard %s", safe.toString(), m_shardId); } try { checkpointer.checkpoint(safe.toString()); m_lastFetchCommittedSequenceNumber = safe; break; } catch (ThrottlingException e) { rateLimitedLog(Level.INFO, null, "Checkpoint attempt %d on shard %s", retries, m_shardId); } catch (Exception e) { //committed on other nodes rateLimitedLog(Level.WARN, e, "Skipping checkpoint %s on shard %s. Reason: %s", safe.toString(), m_shardId, e.getMessage()); break; } } backoffSleep(retries++); } } } private void backoffSleep(int failedCount) { try { Thread.sleep(200 * failedCount); } catch (InterruptedException e) { //do not propagate exception since aws will swallow all. rateLimitedLog(Level.WARN, e, "Interrupted sleep when checkpointing."); } } private final static class StreamProcedureCallback implements ProcedureCallback { private final Gap m_tracker; private final int m_offset; private final BigInteger m_seq; private final AtomicLong m_cbcnt; public StreamProcedureCallback(final Gap tracker, final int offset, BigInteger seq, AtomicLong cbcnt) { m_tracker = tracker; m_offset = offset; m_seq = seq; m_cbcnt = cbcnt; m_tracker.submit(m_offset, m_seq); } @Override public void clientCallback(ClientResponse response) throws Exception { m_tracker.commit(m_offset, m_seq); m_cbcnt.incrementAndGet(); } } /** * The class take an array of checkpoint objects and use their indices within the array as offsets to keep track of the safe * commit point. use the safe commit offset to look up the commit point for target system. */ final class Gap { long c = 0; long s = -1L; long[] lag; final int lagLen; BigInteger[] checkpoints; long offer = -1L; private final long gapTrackerCheckMaxTimeMs = 2_000; Gap(int leeway) { if (leeway <= 0) { throw new IllegalArgumentException("leeways is zero or negative"); } lagLen = leeway; checkpoints = new BigInteger[(int)m_config.getMaxReadBatchSize()]; } synchronized void resetTo() { //reset this to take new checkpoints. The offsets after checkpoint commit are not relevant anymore Arrays.fill(checkpoints, null); //The offset is the index among the fetched records. c = 0; s = -1L; lag = new long[lagLen]; } synchronized void submit(long offset, BigInteger v) { if (!validateOffset((int) offset) || v == null || checkpoints[(int) offset] != null) { return; } if (s == -1L && offset >= 0) { lag[idx(offset)] = c = s = offset; } if ((offset - c) >= lag.length) { offer = offset; try { wait(gapTrackerCheckMaxTimeMs); } catch (InterruptedException e) { rateLimitedLog(Level.WARN, e, "Gap tracker wait was interrupted." + m_config.getResourceID().toString()); } } if (offset > s) { s = offset; } checkpoints[(int) offset] = v; } private final int idx(long offset) { return (int) (offset % lagLen); } synchronized void commit(long offset, BigInteger v) { if (!validateOffset((int) offset) || v == null || checkpoints[(int) offset] == null) { return; } if (offset <= s && offset > c && v.equals(checkpoints[(int) offset])) { int ggap = (int) Math.min(lagLen, offset - c); if (ggap == lagLen) { c = offset - lagLen + 1; lag[idx(c)] = c; } lag[idx(offset)] = offset; while (ggap > 0 && lag[idx(c)] + 1 == lag[idx(c + 1)]) { ++c; } if (offer >= 0 && (offer - c) < lag.length) { offer = -1L; notify(); } } } synchronized BigInteger getSafeCommitPoint() { if(checkpoints != null && validateOffset((int) c)){ return checkpoints[(int) c]; } return null; } private boolean validateOffset(int offset) { return (offset >= 0 && offset < checkpoints.length); } } }