/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.streaming.examples.windowing; import org.apache.flink.api.common.functions.ReduceFunction; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.sink.SinkFunction; import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; import org.apache.flink.streaming.api.functions.windowing.WindowFunction; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.streaming.api.windowing.windows.Window; import org.apache.flink.util.Collector; import static java.util.concurrent.TimeUnit.MILLISECONDS; @SuppressWarnings("serial") public class GroupedProcessingTimeWindowExample { public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataStream<Tuple2<Long, Long>> stream = env .addSource(new RichParallelSourceFunction<Tuple2<Long, Long>>() { private volatile boolean running = true; @Override public void run(SourceContext<Tuple2<Long, Long>> ctx) throws Exception { final long startTime = System.currentTimeMillis(); final long numElements = 20000000; final long numKeys = 10000; long val = 1L; long count = 0L; while (running && count < numElements) { count++; ctx.collect(new Tuple2<>(val++, 1L)); if (val > numKeys) { val = 1L; } } final long endTime = System.currentTimeMillis(); System.out.println("Took " + (endTime-startTime) + " msecs for " + numElements + " values"); } @Override public void cancel() { running = false; } }); stream .keyBy(0) .timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) .reduce(new SummingReducer()) // alternative: use a apply function which does not pre-aggregate // .keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>()) // .window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) // .apply(new SummingWindowFunction()) .addSink(new SinkFunction<Tuple2<Long, Long>>() { @Override public void invoke(Tuple2<Long, Long> value) { } }); env.execute(); } public static class FirstFieldKeyExtractor<Type extends Tuple, Key> implements KeySelector<Type, Key> { @Override @SuppressWarnings("unchecked") public Key getKey(Type value) { return (Key) value.getField(0); } } public static class SummingWindowFunction implements WindowFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Long, Window> { @Override public void apply(Long key, Window window, Iterable<Tuple2<Long, Long>> values, Collector<Tuple2<Long, Long>> out) { long sum = 0L; for (Tuple2<Long, Long> value : values) { sum += value.f1; } out.collect(new Tuple2<>(key, sum)); } } public static class SummingReducer implements ReduceFunction<Tuple2<Long, Long>> { @Override public Tuple2<Long, Long> reduce(Tuple2<Long, Long> value1, Tuple2<Long, Long> value2) { return new Tuple2<>(value1.f0, value1.f1 + value2.f1); } } }