/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.transforms;
import java.io.Serializable;
import java.util.Comparator;
import org.apache.beam.sdk.transforms.Combine.BinaryCombineFn;
import org.apache.beam.sdk.transforms.display.DisplayData;
/**
* {@code PTransform}s for computing the minimum of the elements in a {@code PCollection}, or the
* minimum of the values associated with each key in a {@code PCollection} of {@code KV}s.
*
* <p>Example 1: get the minimum of a {@code PCollection} of {@code Double}s.
* <pre> {@code
* PCollection<Double> input = ...;
* PCollection<Double> min = input.apply(Min.doublesGlobally());
* } </pre>
*
* <p>Example 2: calculate the minimum of the {@code Integer}s
* associated with each unique key (which is of type {@code String}).
* <pre> {@code
* PCollection<KV<String, Integer>> input = ...;
* PCollection<KV<String, Integer>> minPerKey = input
* .apply(Min.<String>integersPerKey());
* } </pre>
*/
public class Min {
private Min() {
// do not instantiate
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a
* {@code PCollection<Integer>} whose contents is a single value that is the minimum of the input
* {@code PCollection}'s elements, or {@code Integer.MAX_VALUE} if there are no elements.
*/
public static Combine.Globally<Integer, Integer> integersGlobally() {
return Combine.globally(new MinIntegerFn());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and
* returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each
* distinct key in the input {@code PCollection} to the minimum of the values associated with that
* key in the input {@code PCollection}.
*
* <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
*/
public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() {
return Combine.<K, Integer, Integer>perKey(new MinIntegerFn());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code
* PCollection<Long>} whose contents is the minimum of the input {@code PCollection}'s elements,
* or {@code Long.MAX_VALUE} if there are no elements.
*/
public static Combine.Globally<Long, Long> longsGlobally() {
return Combine.globally(new MinLongFn());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a
* {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in
* the input {@code PCollection} to the minimum of the values associated with that key in the
* input {@code PCollection}.
*
* <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
*/
public static <K> Combine.PerKey<K, Long, Long> longsPerKey() {
return Combine.<K, Long, Long>perKey(new MinLongFn());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a
* {@code PCollection<Double>} whose contents is the minimum of the input {@code PCollection}'s
* elements, or {@code Double.POSITIVE_INFINITY} if there are no elements.
*/
public static Combine.Globally<Double, Double> doublesGlobally() {
return Combine.globally(new MinDoubleFn());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns
* a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key
* in the input {@code PCollection} to the minimum of the values associated with that key in the
* input {@code PCollection}.
*
* <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
*/
public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() {
return Combine.<K, Double, Double>perKey(new MinDoubleFn());
}
/**
* A {@code CombineFn} that computes the minimum of a collection of {@code Integer}s, useful as an
* argument to {@link Combine#globally} or {@link Combine#perKey}.
*/
public static Combine.BinaryCombineIntegerFn ofIntegers() {
return new Min.MinIntegerFn();
}
/**
* A {@code CombineFn} that computes the minimum of a collection of {@code Long}s, useful as an
* argument to {@link Combine#globally} or {@link Combine#perKey}.
*/
public static Combine.BinaryCombineLongFn ofLongs() {
return new Min.MinLongFn();
}
/**
* A {@code CombineFn} that computes the minimum of a collection of {@code Double}s, useful as an
* argument to {@link Combine#globally} or {@link Combine#perKey}.
*/
public static Combine.BinaryCombineDoubleFn ofDoubles() {
return new Min.MinDoubleFn();
}
/**
* A {@code CombineFn} that computes the minimum of a collection of elements of type {@code T}
* using an arbitrary {@link Comparator} and an {@code identity},
* useful as an argument to {@link Combine#globally} or {@link Combine#perKey}.
*
* @param <T> the type of the values being compared
*/
public static <T, ComparatorT extends Comparator<? super T> & Serializable>
BinaryCombineFn<T> of(T identity, ComparatorT comparator) {
return new MinFn<T>(identity, comparator);
}
/**
* A {@code CombineFn} that computes the minimum of a collection of elements of type {@code T}
* using an arbitrary {@link Comparator}, useful as an argument to {@link Combine#globally} or
* {@link Combine#perKey}.
*
* @param <T> the type of the values being compared
*/
public static <T, ComparatorT extends Comparator<? super T> & Serializable>
BinaryCombineFn<T> of(ComparatorT comparator) {
return new MinFn<T>(null, comparator);
}
public static <T extends Comparable<? super T>> BinaryCombineFn<T> naturalOrder(T identity) {
return new MinFn<T>(identity, new Top.Natural<T>());
}
public static <T extends Comparable<? super T>> BinaryCombineFn<T> naturalOrder() {
return new MinFn<T>(null, new Top.Natural<T>());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
* PCollection<T>} whose contents is the minimum according to the natural ordering of {@code T}
* of the input {@code PCollection}'s elements, or {@code null} if there are no elements.
*/
public static <T extends Comparable<? super T>>
Combine.Globally<T, T> globally() {
return Combine.<T, T>globally(Min.<T>naturalOrder());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
* {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the
* input {@code PCollection} to the minimum according to the natural ordering of {@code T} of the
* values associated with that key in the input {@code PCollection}.
*
* <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
*/
public static <K, T extends Comparable<? super T>>
Combine.PerKey<K, T, T> perKey() {
return Combine.<K, T, T>perKey(Min.<T>naturalOrder());
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code
* PCollection<T>} whose contents is the minimum of the input {@code PCollection}'s elements, or
* {@code null} if there are no elements.
*/
public static <T, ComparatorT extends Comparator<? super T> & Serializable>
Combine.Globally<T, T> globally(ComparatorT comparator) {
return Combine.<T, T>globally(Min.<T, ComparatorT>of(comparator));
}
/**
* Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a
* {@code PCollection<KV<K, T>>} that contains one output element per key mapping each
* to the minimum of the values associated with that key in the input {@code PCollection}.
*
* <p>See {@link Combine.PerKey} for how this affects timestamps and windowing.
*/
public static <K, T, ComparatorT extends Comparator<? super T> & Serializable>
Combine.PerKey<K, T, T> perKey(ComparatorT comparator) {
return Combine.<K, T, T>perKey(Min.<T, ComparatorT>of(comparator));
}
/////////////////////////////////////////////////////////////////////////////
private static class MinFn<T> extends BinaryCombineFn<T> {
private final T identity;
private final Comparator<? super T> comparator;
private <ComparatorT extends Comparator<? super T> & Serializable> MinFn(
T identity, ComparatorT comparator) {
this.identity = identity;
this.comparator = comparator;
}
@Override
public T identity() {
return identity;
}
@Override
public T apply(T left, T right) {
return comparator.compare(left, right) <= 0 ? left : right;
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
super.populateDisplayData(builder);
builder.add(DisplayData.item("comparer", comparator.getClass())
.withLabel("Record Comparer"));
}
}
private static class MinIntegerFn extends Combine.BinaryCombineIntegerFn {
@Override
public int apply(int left, int right) {
return left <= right ? left : right;
}
@Override
public int identity() {
return Integer.MAX_VALUE;
}
}
private static class MinLongFn extends Combine.BinaryCombineLongFn {
@Override
public long apply(long left, long right) {
return left <= right ? left : right;
}
@Override
public long identity() {
return Long.MAX_VALUE;
}
}
private static class MinDoubleFn extends Combine.BinaryCombineDoubleFn {
@Override
public double apply(double left, double right) {
return left <= right ? left : right;
}
@Override
public double identity() {
return Double.POSITIVE_INFINITY;
}
}
}