StreamTask.java example

Explorer
kafka-master
- kafka-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.streams.processor.internals;

import org.apache.kafka.clients.consumer.CommitFailedException;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.ProducerFencedException;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.StreamsMetrics;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.TaskId;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.internals.ThreadCache;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import static java.lang.String.format;
import static java.util.Collections.singleton;

/**
 * A StreamTask is associated with a {@link PartitionGroup}, and is assigned to a StreamThread for processing.
 */
public class StreamTask extends AbstractTask implements Punctuator {

    private static final Logger log = LoggerFactory.getLogger(StreamTask.class);

    private static final ConsumerRecord<Object, Object> DUMMY_RECORD = new ConsumerRecord<>(ProcessorContextImpl.NONEXIST_TOPIC, -1, -1L, null, null);

    private final PartitionGroup partitionGroup;
    private final PartitionGroup.RecordInfo recordInfo = new PartitionGroup.RecordInfo();
    private final PunctuationQueue punctuationQueue;

    private final Map<TopicPartition, Long> consumedOffsets;
    private final RecordCollector recordCollector;
    private final Producer<byte[], byte[]> producer;
    private final int maxBufferedSize;

    private boolean commitRequested = false;
    private boolean commitOffsetNeeded = false;
    private final Time time;
    private final TaskMetrics metrics;

    protected class TaskMetrics  {
        final StreamsMetricsImpl metrics;
        final Sensor taskCommitTimeSensor;


        TaskMetrics(final StreamsMetrics metrics) {
            final String name = id().toString();
            this.metrics = (StreamsMetricsImpl) metrics;
            taskCommitTimeSensor = metrics.addLatencyAndThroughputSensor("task", name, "commit", Sensor.RecordingLevel.DEBUG, "streams-task-id", name);
        }

        void removeAllSensors() {
            metrics.removeSensor(taskCommitTimeSensor);
        }
    }

    /**
     * Create {@link StreamTask} with its assigned partitions
     * @param id                    the ID of this task
     * @param applicationId         the ID of the stream processing application
     * @param partitions            the collection of assigned {@link TopicPartition}
     * @param topology              the instance of {@link ProcessorTopology}
     * @param consumer              the instance of {@link Consumer}
     * @param changelogReader       the instance of {@link ChangelogReader} used for restoring state
     * @param config                the {@link StreamsConfig} specified by the user
     * @param metrics               the {@link StreamsMetrics} created by the thread
     * @param stateDirectory        the {@link StateDirectory} created by the thread
     * @param producer              the instance of {@link Producer} used to produce records
     */
    public StreamTask(final TaskId id,
                      final String applicationId,
                      final Collection<TopicPartition> partitions,
                      final ProcessorTopology topology,
                      final Consumer<byte[], byte[]> consumer,
                      final ChangelogReader changelogReader,
                      final StreamsConfig config,
                      final StreamsMetrics metrics,
                      final StateDirectory stateDirectory,
                      final ThreadCache cache,
                      final Time time,
                      final Producer<byte[], byte[]> producer) {
        super(id, applicationId, partitions, topology, consumer, changelogReader, false, stateDirectory, cache, config);
        punctuationQueue = new PunctuationQueue();
        maxBufferedSize = config.getInt(StreamsConfig.BUFFERED_RECORDS_PER_PARTITION_CONFIG);
        this.metrics = new TaskMetrics(metrics);

        // create queues for each assigned partition and associate them
        // to corresponding source nodes in the processor topology
        final Map<TopicPartition, RecordQueue> partitionQueues = new HashMap<>();

        final TimestampExtractor defaultTimestampExtractor  = config.defaultTimestampExtractor();
        for (final TopicPartition partition : partitions) {
            final SourceNode source = topology.source(partition.topic());
            final TimestampExtractor sourceTimestampExtractor = source.getTimestampExtractor() != null ? source.getTimestampExtractor() : defaultTimestampExtractor;
            final RecordQueue queue = new RecordQueue(partition, source, sourceTimestampExtractor);
            partitionQueues.put(partition, queue);
        }

        partitionGroup = new PartitionGroup(partitionQueues);

        // initialize the consumed offset cache
        consumedOffsets = new HashMap<>();

        this.producer = producer;
        recordCollector = createRecordCollector();

        // initialize the topology with its own context
        processorContext = new ProcessorContextImpl(id, this, config, recordCollector, stateMgr, metrics, cache);
        this.time = time;
        log.debug("{} Initializing", logPrefix);
        initializeStateStores();
        stateMgr.registerGlobalStateStores(topology.globalStateStores());
        if (eosEnabled) {
            producer.initTransactions();
            producer.beginTransaction();
        }
        initTopology();
        processorContext.initialized();
    }

    /**
     * <pre>
     * - re-initialize the task
     * - if (eos) begin new transaction
     * </pre>
     */
    @Override
    public void resume() {
        log.debug("{} Resuming", logPrefix);
        if (eosEnabled) {
            producer.beginTransaction();
        }
        initTopology();
    }

    /**
     * Process one record.
     *
     * @return true if this method processes a record, false if it does not process a record.
     */
    @SuppressWarnings("unchecked")
    public boolean process() {
        // get the next record to process
        final StampedRecord record = partitionGroup.nextRecord(recordInfo);

        // if there is no record to process, return immediately
        if (record == null) {
            return false;
        }

        try {
            // process the record by passing to the source node of the topology
            final ProcessorNode currNode = recordInfo.node();
            final TopicPartition partition = recordInfo.partition();

            log.trace("{} Start processing one record [{}]", logPrefix, record);
            updateProcessorContext(record, currNode);
            currNode.process(record.key(), record.value());

            log.trace("{} Completed processing one record [{}]", logPrefix, record);

            // update the consumed offset map after processing is done
            consumedOffsets.put(partition, record.offset());
            commitOffsetNeeded = true;

            // after processing this record, if its partition queue's buffered size has been
            // decreased to the threshold, we can then resume the consumption on this partition
            if (recordInfo.queue().size() == maxBufferedSize) {
                consumer.resume(singleton(partition));
            }
        } catch (final KafkaException e) {
            throw new StreamsException(format("Exception caught in process. taskId=%s, processor=%s, topic=%s, partition=%d, offset=%d",
                id(),
                processorContext.currentNode().name(),
                record.topic(),
                record.partition(),
                record.offset()
            ), e);
        } finally {
            processorContext.setCurrentNode(null);
        }

        return true;
    }

    /**
     * @throws IllegalStateException if the current node is not null
     */
    @Override
    public void punctuate(final ProcessorNode node, final long timestamp) {
        if (processorContext.currentNode() != null) {
            throw new IllegalStateException(String.format("%s Current node is not null", logPrefix));
        }

        updateProcessorContext(new StampedRecord(DUMMY_RECORD, timestamp), node);

        log.trace("{} Punctuating processor {} with timestamp {}", logPrefix, node.name(), timestamp);

        try {
            node.punctuate(timestamp);
        } catch (final KafkaException e) {
            throw new StreamsException(String.format("%s Exception caught while punctuating processor '%s'", logPrefix,  node.name()), e);
        } finally {
            processorContext.setCurrentNode(null);
        }
    }

    private void updateProcessorContext(final StampedRecord record, final ProcessorNode currNode) {
        processorContext.setRecordContext(new ProcessorRecordContext(record.timestamp, record.offset(), record.partition(), record.topic()));
        processorContext.setCurrentNode(currNode);
    }

    /**
     * <pre>
     * - flush state and producer
     * - if(!eos) write checkpoint
     * - commit offsets and start new transaction
     * </pre>
     */
    @Override
    public void commit() {
        commitImpl(true);
    }

    // visible for testing
    void commitImpl(final boolean startNewTransaction) {
        log.trace("{} Committing", logPrefix);
        metrics.metrics.measureLatencyNs(
            time,
            new Runnable() {
                @Override
                public void run() {
                    flushState();
                    if (!eosEnabled) {
                        stateMgr.checkpoint(recordCollectorOffsets());
                    }
                    commitOffsets(startNewTransaction);
                }
            },
            metrics.taskCommitTimeSensor);
    }

    @Override
    protected Map<TopicPartition, Long> recordCollectorOffsets() {
        return recordCollector.offsets();
    }

    @Override
    protected void flushState() {
        log.trace("{} Flushing state and producer", logPrefix);
        super.flushState();
        recordCollector.flush();
    }

    private void commitOffsets(final boolean startNewTransaction) {
        if (commitOffsetNeeded) {
            log.debug("{} Committing offsets", logPrefix);
            final Map<TopicPartition, OffsetAndMetadata> consumedOffsetsAndMetadata = new HashMap<>(consumedOffsets.size());
            for (final Map.Entry<TopicPartition, Long> entry : consumedOffsets.entrySet()) {
                final TopicPartition partition = entry.getKey();
                final long offset = entry.getValue() + 1;
                consumedOffsetsAndMetadata.put(partition, new OffsetAndMetadata(offset));
                stateMgr.putOffsetLimit(partition, offset);
            }

            if (eosEnabled) {
                producer.sendOffsetsToTransaction(consumedOffsetsAndMetadata, applicationId);
                producer.commitTransaction();
                if (startNewTransaction) {
                    producer.beginTransaction();
                }
            } else {
                try {
                    consumer.commitSync(consumedOffsetsAndMetadata);
                } catch (final CommitFailedException e) {
                    log.warn("{} Failed offset commits {} due to {}", logPrefix, consumedOffsetsAndMetadata, e.getMessage());
                    throw e;
                }
            }
            commitOffsetNeeded = false;
        }

        commitRequested = false;
    }

    private void initTopology() {
        // initialize the task by initializing all its processor nodes in the topology
        log.debug("{} Initializing processor nodes of the topology", logPrefix);
        for (final ProcessorNode node : topology.processors()) {
            processorContext.setCurrentNode(node);
            try {
                node.init(processorContext);
            } finally {
                processorContext.setCurrentNode(null);
            }
        }
    }

    /**
     * <pre>
     * - close topology
     * - {@link #commit()}
     *   - flush state and producer
     *   - if (!eos) write checkpoint
     *   - commit offsets
     * </pre>
     */
    @Override
    public void suspend() {
        suspend(true);
    }

    /**
     * <pre>
     * - close topology
     * - if (clean) {@link #commit()}
     *   - flush state and producer
     *   - if (!eos) write checkpoint
     *   - commit offsets
     * </pre>
     */
    private void suspend(final boolean clean) {
        log.debug("{} Suspending", logPrefix);
        closeTopology(); // should we call this only on clean suspend?
        if (clean) {
            commitImpl(false);
        }
    }

    private void closeTopology() {
        log.debug("{} Closing processor topology", logPrefix);

        partitionGroup.clear();

        // close the processors
        // make sure close() is called for each node even when there is a RuntimeException
        RuntimeException exception = null;
        for (final ProcessorNode node : topology.processors()) {
            processorContext.setCurrentNode(node);
            try {
                node.close();
            } catch (final RuntimeException e) {
                exception = e;
            } finally {
                processorContext.setCurrentNode(null);
            }
        }

        if (exception != null) {
            throw exception;
        }
    }

    /**
     * <pre>
     * - {@link #suspend(boolean) suspend(clean)}
     *   - close topology
     *   - if (clean) {@link #commit()}
     *     - flush state and producer
     *     - commit offsets
     * - close state
     *   - if (clean) write checkpoint
     * - if (eos) close producer
     * </pre>
     * @param clean shut down cleanly (ie, incl. flush and commit) if {@code true} --
     *              otherwise, just close open resources
     */
    @Override
    public void close(boolean clean) {
        log.debug("{} Closing", logPrefix);

        RuntimeException firstException = null;
        try {
            suspend(clean);
        } catch (final RuntimeException e) {
            clean = false;
            firstException = e;
            log.error("{} Could not close task due to {}", logPrefix, e);
        }

        try {
            closeStateManager(clean);
        } catch (final RuntimeException e) {
            clean = false;
            if (firstException == null) {
                firstException = e;
            }
            log.error("{} Could not close state manager due to {}", logPrefix, e);
        }

        try {
            partitionGroup.close();
            metrics.removeAllSensors();
        } finally {
            if (eosEnabled) {
                if (!clean) {
                    try {
                        producer.abortTransaction();
                    } catch (final ProducerFencedException e) {
                        // can be ignored: transaction got already aborted by brokers/transactional-coordinator if this happens
                    }
                }
                try {
                    recordCollector.close();
                } catch (final Throwable e) {
                    log.error("{} Failed to close producer: ", logPrefix, e);
                }
            }
        }

        if (firstException != null) {
            throw firstException;
        }
    }

    /**
     * Adds records to queues. If a record has an invalid (i.e., negative) timestamp, the record is skipped
     * and not added to the queue for processing
     *
     * @param partition the partition
     * @param records  the records
     * @return the number of added records
     */
    @SuppressWarnings("unchecked")
    public int addRecords(final TopicPartition partition, final Iterable<ConsumerRecord<byte[], byte[]>> records) {
        final int oldQueueSize = partitionGroup.numBuffered(partition);
        final int newQueueSize = partitionGroup.addRawRecords(partition, records);

        log.trace("{} Added records into the buffered queue of partition {}, new queue size is {}", logPrefix, partition, newQueueSize);

        // if after adding these records, its partition queue's buffered size has been
        // increased beyond the threshold, we can then pause the consumption for this partition
        if (newQueueSize > maxBufferedSize) {
            consumer.pause(singleton(partition));
        }

        return newQueueSize - oldQueueSize;
    }

    /**
     * Schedules a punctuation for the processor
     *
     * @param interval  the interval in milliseconds
     * @throws IllegalStateException if the current node is not null
     */
    public void schedule(final long interval) {
        if (processorContext.currentNode() == null) {
            throw new IllegalStateException(String.format("%s Current node is null", logPrefix));
        }

        punctuationQueue.schedule(new PunctuationSchedule(processorContext.currentNode(), interval));
    }

    /**
     * @return The number of records left in the buffer of this task's partition group
     */
    int numBuffered() {
        return partitionGroup.numBuffered();
    }

    /**
     * Possibly trigger registered punctuation functions if
     * current partition group timestamp has reached the defined stamp
     */
    boolean maybePunctuate() {
        final long timestamp = partitionGroup.timestamp();

        // if the timestamp is not known yet, meaning there is not enough data accumulated
        // to reason stream partition time, then skip.
        if (timestamp == TimestampTracker.NOT_KNOWN) {
            return false;
        } else {
            return punctuationQueue.mayPunctuate(timestamp, this);
        }
    }

    /**
     * Request committing the current task's state
     */
    void needCommit() {
        commitRequested = true;
    }

    /**
     * Whether or not a request has been made to commit the current state
     */
    boolean commitNeeded() {
        return commitRequested;
    }

    // visible for testing only
    ProcessorContext processorContext() {
        return processorContext;
    }

    // visible for testing only
    RecordCollector recordCollector() {
        return recordCollector;
    }

    // visible for testing only
    RecordCollector createRecordCollector() {
        return new RecordCollectorImpl(producer, id.toString());
    }

}