/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package gobblin.writer; import java.util.LinkedList; import java.util.ArrayList; import java.util.Deque; import java.util.HashSet; import java.util.Properties; import java.util.Set; import java.util.concurrent.Future; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.util.concurrent.Futures; import com.typesafe.config.Config; import gobblin.util.ConfigUtils; /** * Sequential and TTL based accumulator * A producer can add a record to this accumulator. It generates a batch on the first record arrival. All subsequent records * are added to the same batch until a batch size limit is reached. {@link BufferedAsyncDataWriter} keeps iterating available * batches from this accumulator, all completed batches (full sized) will be popped out one by one but an incomplete batch * keeps in the deque until a TTL is expired. */ public abstract class SequentialBasedBatchAccumulator<D> extends BatchAccumulator<D> { private Deque<BytesBoundedBatch<D>> dq = new LinkedList<>(); private IncompleteRecordBatches incomplete = new IncompleteRecordBatches(); private final long batchSizeLimit; private final long memSizeLimit; private final double tolerance = 0.95; private final long expireInMilliSecond; private static final Logger LOG = LoggerFactory.getLogger(SequentialBasedBatchAccumulator.class); private final ReentrantLock dqLock = new ReentrantLock(); private final Condition notEmpty = dqLock.newCondition(); private final Condition notFull = dqLock.newCondition(); private final long capacity; public SequentialBasedBatchAccumulator() { this (1024 * 256, 1000, 100); } public SequentialBasedBatchAccumulator(Properties properties) { Config config = ConfigUtils.propertiesToConfig(properties); this.batchSizeLimit = ConfigUtils.getLong(config, Batch.BATCH_SIZE, Batch.BATCH_SIZE_DEFAULT); this.expireInMilliSecond = ConfigUtils.getLong(config, Batch.BATCH_TTL, Batch.BATCH_TTL_DEFAULT); this.capacity = ConfigUtils.getLong(config, Batch.BATCH_QUEUE_CAPACITY, Batch.BATCH_QUEUE_CAPACITY_DEFAULT); this.memSizeLimit = (long) (this.tolerance * this.batchSizeLimit); } public SequentialBasedBatchAccumulator(long batchSizeLimit, long expireInMilliSecond, long capacity) { this.batchSizeLimit = batchSizeLimit; this.expireInMilliSecond = expireInMilliSecond; this.capacity = capacity; this.memSizeLimit = (long) (this.tolerance * this.batchSizeLimit); } public long getNumOfBatches () { this.dqLock.lock(); try { return this.dq.size(); } finally { this.dqLock.unlock(); } } /** * Add a data to internal deque data structure */ public final Future<RecordMetadata> enqueue (D record, WriteCallback callback) throws InterruptedException { final ReentrantLock lock = this.dqLock; lock.lock(); try { BytesBoundedBatch last = dq.peekLast(); if (last != null) { Future<RecordMetadata> future = last.tryAppend(record, callback); if (future != null) { return future; } } // Create a new batch because previous one has no space BytesBoundedBatch batch = new BytesBoundedBatch(this.memSizeLimit, this.expireInMilliSecond); LOG.debug("Batch " + batch.getId() + " is generated"); Future<RecordMetadata> future = batch.tryAppend(record, callback); // Even single record can exceed the batch size limit // Ignore the record because Eventhub can only accept payload less than 256KB if (future == null) { LOG.error("Batch " + batch.getId() + " is marked as complete because it contains a huge record: " + record); future = Futures.immediateFuture(new RecordMetadata(0)); callback.onSuccess(WriteResponse.EMPTY); return future; } // if queue is full, we should not add more while (dq.size() >= this.capacity) { this.notFull.await(); } dq.addLast(batch); incomplete.add(batch); this.notEmpty.signal(); return future; } finally { lock.unlock(); } } /** * A threadsafe helper class to hold RecordBatches that haven't been ack'd yet * This is mainly used for flush operation so that all the batches waiting in * the incomplete set will be blocked */ private final static class IncompleteRecordBatches { private final Set<Batch> incomplete; public IncompleteRecordBatches() { this.incomplete = new HashSet<>(); } public void add(Batch batch) { synchronized (incomplete) { this.incomplete.add(batch); } } public void remove(Batch batch) { synchronized (incomplete) { boolean removed = this.incomplete.remove(batch); if (!removed) throw new IllegalStateException("Remove from the incomplete set failed. This should be impossible."); } } public ArrayList<Batch> all() { synchronized (incomplete) { return new ArrayList (this.incomplete); } } } /** * If accumulator has been closed, below actions are performed: * 1) remove and return the first batch if available. * 2) return null if queue is empty. * If accumulator has not been closed, below actions are performed: * 1) if queue.size == 0, block current thread until more batches are available or accumulator is closed. * 2) if queue size == 1, remove and return the first batch if TTL has expired, else return null. * 3) if queue size > 1, remove and return the first batch element. */ public Batch<D> getNextAvailableBatch () { final ReentrantLock lock = SequentialBasedBatchAccumulator.this.dqLock; try { lock.lock(); if (SequentialBasedBatchAccumulator.this.isClosed()) { return dq.poll(); } else { while (dq.size() == 0) { LOG.info ("ready to sleep because of queue is empty"); SequentialBasedBatchAccumulator.this.notEmpty.await(); if (SequentialBasedBatchAccumulator.this.isClosed()) { return dq.poll(); } } if (dq.size() > 1) { BytesBoundedBatch candidate = dq.poll(); SequentialBasedBatchAccumulator.this.notFull.signal(); LOG.debug ("retrieve batch " + candidate.getId()); return candidate; } if (dq.size() == 1) { if (dq.peekFirst().isTTLExpire()) { LOG.info ("Batch " + dq.peekFirst().getId() + " is expired"); BytesBoundedBatch candidate = dq.poll(); SequentialBasedBatchAccumulator.this.notFull.signal(); return candidate; } else { return null; } } else { throw new RuntimeException("Should never get to here"); } } } catch (InterruptedException e) { LOG.error("Wait for next batch is interrupted. " + e.toString()); } finally { lock.unlock(); } return null; } public void close() { super.close(); this.dqLock.lock(); try { this.notEmpty.signal(); } finally { this.dqLock.unlock(); } } /** * This will block until all the incomplete batches are acknowledged */ public void flush() { try { ArrayList<Batch> batches = this.incomplete.all(); LOG.info ("flush on {} batches", batches.size()); for (Batch batch: batches) { batch.await(); } } catch (Exception e) { LOG.info ("Error happens when flushing"); } } /** * Once batch is acknowledged, remove it from incomplete list */ public void deallocate (Batch<D> batch) { this.incomplete.remove(batch); } }