package com.github.projectflink.streaming.utils; /** * A partitioner ensuring that each internal Flink partition ends up in one Kafka partition. * * Note, one Kafka partition can contain multiple Flink partitions. * * Cases: * # More Flink partitions than kafka partitions * * Flink Sinks: Kafka Partitions * 1 ----------------> 1 * 2 --------------/ * 3 -------------/ * 4 ------------/ * * --> Some (or all) kafka partitions contain the output of more than one flink partition * *# Fewer Flink partitions than Kafka * * Flink Sinks: Kafka Partitions * 1 ----------------> 1 * 2 ----------------> 2 * 3 * 4 * 5 * * --> Not all Kafka partitions contain data * To avoid such an unbalanced partitioning, use a round-robin kafka partitioner. (note that this will * cause a lot of network connections between all the Flink instances and all the Kafka brokers * * * @param <T> */ public class FixedPartitioning<T> extends KafkaPartitioner<T> { int targetPartition = -1; @Override public void prepare(int parallelInstanceId, int parallelInstances, int[] partitions) { int p = 0; for(int i = 0; i < parallelInstances; i++) { if(i == parallelInstanceId) { targetPartition = partitions[p]; return; } if(++p == partitions.length) { p = 0; } } } @Override public int partition(T element) { if(targetPartition == -1) { throw new RuntimeException("The partitioner has not been initialized properly"); } return targetPartition; } }