/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.api.common.functions; import org.apache.flink.annotation.Public; import org.apache.flink.annotation.PublicEvolving; import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.accumulators.Accumulator; import org.apache.flink.api.common.accumulators.DoubleCounter; import org.apache.flink.api.common.accumulators.Histogram; import org.apache.flink.api.common.accumulators.IntCounter; import org.apache.flink.api.common.accumulators.LongCounter; import org.apache.flink.api.common.cache.DistributedCache; import org.apache.flink.api.common.state.FoldingState; import org.apache.flink.api.common.state.FoldingStateDescriptor; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.state.MapState; import org.apache.flink.api.common.state.MapStateDescriptor; import org.apache.flink.api.common.state.ReducingState; import org.apache.flink.api.common.state.ReducingStateDescriptor; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.metrics.MetricGroup; import java.io.Serializable; import java.util.List; import java.util.Map; /** * A RuntimeContext contains information about the context in which functions are executed. Each parallel instance * of the function will have a context through which it can access static contextual information (such as * the current parallelism) and other constructs like accumulators and broadcast variables. * <p> * A function can, during runtime, obtain the RuntimeContext via a call to * {@link AbstractRichFunction#getRuntimeContext()}. */ @Public public interface RuntimeContext { /** * Returns the name of the task in which the UDF runs, as assigned during plan construction. * * @return The name of the task in which the UDF runs. */ String getTaskName(); /** * Returns the metric group for this parallel subtask. * * @return The metric group for this parallel subtask. */ @PublicEvolving MetricGroup getMetricGroup(); /** * Gets the parallelism with which the parallel task runs. * * @return The parallelism with which the parallel task runs. */ int getNumberOfParallelSubtasks(); /** * Gets the number of max-parallelism with which the parallel task runs. * * @return The max-parallelism with which the parallel task runs. */ @PublicEvolving int getMaxNumberOfParallelSubtasks(); /** * Gets the number of this parallel subtask. The numbering starts from 0 and goes up to * parallelism-1 (parallelism as returned by {@link #getNumberOfParallelSubtasks()}). * * @return The index of the parallel subtask. */ int getIndexOfThisSubtask(); /** * Gets the attempt number of this parallel subtask. First attempt is numbered 0. * * @return Attempt number of the subtask. */ int getAttemptNumber(); /** * Returns the name of the task, appended with the subtask indicator, such as "MyTask (3/6)", * where 3 would be ({@link #getIndexOfThisSubtask()} + 1), and 6 would be * {@link #getNumberOfParallelSubtasks()}. * * @return The name of the task, with subtask indicator. */ String getTaskNameWithSubtasks(); /** * Returns the {@link org.apache.flink.api.common.ExecutionConfig} for the currently executing * job. */ ExecutionConfig getExecutionConfig(); /** * Gets the ClassLoader to load classes that were are not in system's classpath, but are part of the * jar file of a user job. * * @return The ClassLoader for user code classes. */ ClassLoader getUserCodeClassLoader(); // -------------------------------------------------------------------------------------------- /** * Add this accumulator. Throws an exception if the accumulator already exists in the same Task. * Note that the Accumulator name must have an unique name across the Flink job. Otherwise you will * get an error when incompatible accumulators from different Tasks are combined at the JobManager * upon job completion. */ <V, A extends Serializable> void addAccumulator(String name, Accumulator<V, A> accumulator); /** * Get an existing accumulator object. The accumulator must have been added * previously in this local runtime context. * * Throws an exception if the accumulator does not exist or if the * accumulator exists, but with different type. */ <V, A extends Serializable> Accumulator<V, A> getAccumulator(String name); /** * Returns a map of all registered accumulators for this task. * The returned map must not be modified. * @deprecated Use getAccumulator(..) to obtain the value of an accumulator. */ @Deprecated @PublicEvolving Map<String, Accumulator<?, ?>> getAllAccumulators(); /** * Convenience function to create a counter object for integers. */ @PublicEvolving IntCounter getIntCounter(String name); /** * Convenience function to create a counter object for longs. */ @PublicEvolving LongCounter getLongCounter(String name); /** * Convenience function to create a counter object for doubles. */ @PublicEvolving DoubleCounter getDoubleCounter(String name); /** * Convenience function to create a counter object for histograms. */ @PublicEvolving Histogram getHistogram(String name); // -------------------------------------------------------------------------------------------- /** * Tests for the existence of the broadcast variable identified by the * given {@code name}. * * @param name The name under which the broadcast variable is registered; * @return Whether a broadcast variable exists for the given name. */ @PublicEvolving boolean hasBroadcastVariable(String name); /** * Returns the result bound to the broadcast variable identified by the * given {@code name}. * <p> * IMPORTANT: The broadcast variable data structure is shared between the parallel * tasks on one machine. Any access that modifies its internal state needs to * be manually synchronized by the caller. * * @param name The name under which the broadcast variable is registered; * @return The broadcast variable, materialized as a list of elements. */ <RT> List<RT> getBroadcastVariable(String name); /** * Returns the result bound to the broadcast variable identified by the * given {@code name}. The broadcast variable is returned as a shared data structure * that is initialized with the given {@link BroadcastVariableInitializer}. * <p> * IMPORTANT: The broadcast variable data structure is shared between the parallel * tasks on one machine. Any access that modifies its internal state needs to * be manually synchronized by the caller. * * @param name The name under which the broadcast variable is registered; * @param initializer The initializer that creates the shared data structure of the broadcast * variable from the sequence of elements. * @return The broadcast variable, materialized as a list of elements. */ <T, C> C getBroadcastVariableWithInitializer(String name, BroadcastVariableInitializer<T, C> initializer); /** * Returns the {@link DistributedCache} to get the local temporary file copies of files otherwise not * locally accessible. * * @return The distributed cache of the worker executing this instance. */ DistributedCache getDistributedCache(); // ------------------------------------------------------------------------ // Methods for accessing state // ------------------------------------------------------------------------ /** * Gets a handle to the system's key/value state. The key/value state is only accessible * if the function is executed on a KeyedStream. On each access, the state exposes the value * for the the key of the element currently processed by the function. * Each function may have multiple partitioned states, addressed with different names. * * <p>Because the scope of each value is the key of the currently processed element, * and the elements are distributed by the Flink runtime, the system can transparently * scale out and redistribute the state and KeyedStream. * * <p>The following code example shows how to implement a continuous counter that counts * how many times elements of a certain key occur, and emits an updated count for that * element on each occurrence. * * <pre>{@code * DataStream<MyType> stream = ...; * KeyedStream<MyType> keyedStream = stream.keyBy("id"); * * keyedStream.map(new RichMapFunction<MyType, Tuple2<MyType, Long>>() { * * private ValueState<Long> count; * * public void open(Configuration cfg) { * state = getRuntimeContext().getState( * new ValueStateDescriptor<Long>("count", LongSerializer.INSTANCE, 0L)); * } * * public Tuple2<MyType, Long> map(MyType value) { * long count = state.value() + 1; * state.update(value); * return new Tuple2<>(value, count); * } * }); * }</pre> * * @param stateProperties The descriptor defining the properties of the stats. * * @param <T> The type of value stored in the state. * * @return The partitioned state object. * * @throws UnsupportedOperationException Thrown, if no partitioned state is available for the * function (function is not part of a KeyedStream). */ @PublicEvolving <T> ValueState<T> getState(ValueStateDescriptor<T> stateProperties); /** * Gets a handle to the system's key/value list state. This state is similar to the state * accessed via {@link #getState(ValueStateDescriptor)}, but is optimized for state that * holds lists. One can adds elements to the list, or retrieve the list as a whole. * * <p>This state is only accessible if the function is executed on a KeyedStream. * * <pre>{@code * DataStream<MyType> stream = ...; * KeyedStream<MyType> keyedStream = stream.keyBy("id"); * * keyedStream.map(new RichFlatMapFunction<MyType, List<MyType>>() { * * private ListState<MyType> state; * * public void open(Configuration cfg) { * state = getRuntimeContext().getListState( * new ListStateDescriptor<>("myState", MyType.class)); * } * * public void flatMap(MyType value, Collector<MyType> out) { * if (value.isDivider()) { * for (MyType t : state.get()) { * out.collect(t); * } * } else { * state.add(value); * } * } * }); * }</pre> * * @param stateProperties The descriptor defining the properties of the stats. * * @param <T> The type of value stored in the state. * * @return The partitioned state object. * * @throws UnsupportedOperationException Thrown, if no partitioned state is available for the * function (function is not part os a KeyedStream). */ @PublicEvolving <T> ListState<T> getListState(ListStateDescriptor<T> stateProperties); /** * Gets a handle to the system's key/value reducing state. This state is similar to the state * accessed via {@link #getState(ValueStateDescriptor)}, but is optimized for state that * aggregates values. * * <p>This state is only accessible if the function is executed on a KeyedStream. * * <pre>{@code * DataStream<MyType> stream = ...; * KeyedStream<MyType> keyedStream = stream.keyBy("id"); * * keyedStream.map(new RichMapFunction<MyType, List<MyType>>() { * * private ReducingState<Long> state; * * public void open(Configuration cfg) { * state = getRuntimeContext().getReducingState( * new ReducingStateDescriptor<>("sum", (a, b) -> a + b, Long.class)); * } * * public Tuple2<MyType, Long> map(MyType value) { * state.add(value.count()); * return new Tuple2<>(value, state.get()); * } * }); * * }</pre> * * @param stateProperties The descriptor defining the properties of the stats. * * @param <T> The type of value stored in the state. * * @return The partitioned state object. * * @throws UnsupportedOperationException Thrown, if no partitioned state is available for the * function (function is not part of a KeyedStream). */ @PublicEvolving <T> ReducingState<T> getReducingState(ReducingStateDescriptor<T> stateProperties); /** * Gets a handle to the system's key/value folding state. This state is similar to the state * accessed via {@link #getState(ValueStateDescriptor)}, but is optimized for state that * aggregates values with different types. * * <p>This state is only accessible if the function is executed on a KeyedStream. * * <pre>{@code * DataStream<MyType> stream = ...; * KeyedStream<MyType> keyedStream = stream.keyBy("id"); * * keyedStream.map(new RichMapFunction<MyType, List<MyType>>() { * * private FoldingState<MyType, Long> state; * * public void open(Configuration cfg) { * state = getRuntimeContext().getReducingState( * new FoldingStateDescriptor<>("sum", 0L, (a, b) -> a.count() + b, Long.class)); * } * * public Tuple2<MyType, Long> map(MyType value) { * state.add(value); * return new Tuple2<>(value, state.get()); * } * }); * * }</pre> * * @param stateProperties The descriptor defining the properties of the stats. * * @param <T> The type of value stored in the state. * * @return The partitioned state object. * * @throws UnsupportedOperationException Thrown, if no partitioned state is available for the * function (function is not part of a KeyedStream). * * @deprecated will be removed in a future version */ @PublicEvolving @Deprecated <T, ACC> FoldingState<T, ACC> getFoldingState(FoldingStateDescriptor<T, ACC> stateProperties); /** * Gets a handle to the system's key/value map state. This state is similar to the state * accessed via {@link #getState(ValueStateDescriptor)}, but is optimized for state that * is composed of user-defined key-value pairs * * <p>This state is only accessible if the function is executed on a KeyedStream. * * <pre>{@code * DataStream<MyType> stream = ...; * KeyedStream<MyType> keyedStream = stream.keyBy("id"); * * keyedStream.map(new RichMapFunction<MyType, List<MyType>>() { * * private MapState<MyType, Long> state; * * public void open(Configuration cfg) { * state = getRuntimeContext().getMapState( * new MapStateDescriptor<>("sum", MyType.class, Long.class)); * } * * public Tuple2<MyType, Long> map(MyType value) { * return new Tuple2<>(value, state.get(value)); * } * }); * * }</pre> * * @param stateProperties The descriptor defining the properties of the stats. * * @param <UK> The type of the user keys stored in the state. * @param <UV> The type of the user values stored in the state. * * @return The partitioned state object. * * @throws UnsupportedOperationException Thrown, if no partitioned state is available for the * function (function is not part of a KeyedStream). */ @PublicEvolving <UK, UV> MapState<UK, UV> getMapState(MapStateDescriptor<UK, UV> stateProperties); }