/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.spark.aggregators;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Map;
import java.util.TreeMap;
import org.apache.beam.runners.spark.translation.SparkRuntimeContext;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.transforms.Combine;
/**
* This class wraps a map of named aggregators. Spark expects that all accumulators be declared
* before a job is launched. Beam allows aggregators to be used and incremented on the fly.
* We create a map of named aggregators and instantiate in the the spark context before the job
* is launched. We can then add aggregators on the fly in Spark.
*/
public class NamedAggregators implements Serializable {
/**
* Map from aggregator name to current state.
*/
private final Map<String, State<?, ?, ?>> mNamedAggregators = new TreeMap<>();
/**
* Constructs a new NamedAggregators instance.
*/
public NamedAggregators() {
}
/**
* Constructs a new named aggregators instance that contains a mapping from the specified
* `named` to the associated initial state.
*
* @param name Name of aggregator.
* @param state Associated State.
*/
public NamedAggregators(String name, State<?, ?, ?> state) {
this.mNamedAggregators.put(name, state);
}
/**
* @param name Name of aggregator to retrieve.
* @param typeClass Type class to cast the value to.
* @param <T> Type to be returned.
* @return the value of the aggregator associated with the specified name,
* or <code>null</code> if the specified aggregator could not be found.
*/
public <T> T getValue(String name, Class<T> typeClass) {
final State<?, ?, ?> state = mNamedAggregators.get(name);
return state != null ? typeClass.cast(state.render()) : null;
}
/**
* @return a map of all the aggregator names and their <b>rendered </b>values
*/
public Map<String, ?> renderAll() {
return
ImmutableMap.copyOf(
Maps.transformValues(mNamedAggregators,
new Function<State<?, ?, ?>, Object>() {
@Override
public Object apply(State<?, ?, ?> state) {
return state.render();
}
}));
}
/**
* Merges another NamedAggregators instance with this instance.
*
* @param other The other instance of named aggregators ot merge.
* @return This instance of Named aggregators with associated states updated to reflect the
* other instance's aggregators.
*/
public NamedAggregators merge(NamedAggregators other) {
for (Map.Entry<String, State<?, ?, ?>> e : other.mNamedAggregators.entrySet()) {
String key = e.getKey();
State<?, ?, ?> otherValue = e.getValue();
State<?, ?, ?> value = mNamedAggregators.get(key);
if (value == null) {
mNamedAggregators.put(key, otherValue);
} else {
mNamedAggregators.put(key, merge(value, otherValue));
}
}
return this;
}
/**
* Helper method to merge States whose generic types aren't provably the same,
* so require some casting.
*/
@SuppressWarnings("unchecked")
private static <InputT, InterT, OutputT> State<InputT, InterT, OutputT> merge(
State<?, ?, ?> s1,
State<?, ?, ?> s2) {
return ((State<InputT, InterT, OutputT>) s1).merge((State<InputT, InterT, OutputT>) s2);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (Map.Entry<String, State<?, ?, ?>> e : mNamedAggregators.entrySet()) {
sb.append(e.getKey()).append(": ").append(e.getValue().render()).append(" ");
}
return sb.toString();
}
/**
* @param <InputT> Input data type
* @param <InterT> Intermediate data type (useful for averages)
* @param <OutputT> Output data type
*/
public interface State<InputT, InterT, OutputT> extends Serializable {
/**
* @param element new element to update state
*/
void update(InputT element);
State<InputT, InterT, OutputT> merge(State<InputT, InterT, OutputT> other);
InterT current();
OutputT render();
Combine.CombineFn<InputT, InterT, OutputT> getCombineFn();
}
/**
* @param <InputT> Input data type
* @param <InterT> Intermediate data type (useful for averages)
* @param <OutputT> Output data type
*/
public static class CombineFunctionState<InputT, InterT, OutputT>
implements State<InputT, InterT, OutputT> {
private Combine.CombineFn<InputT, InterT, OutputT> combineFn;
private Coder<InputT> inCoder;
private SparkRuntimeContext ctxt;
private transient InterT state;
public CombineFunctionState(
Combine.CombineFn<InputT, InterT, OutputT> combineFn,
Coder<InputT> inCoder,
SparkRuntimeContext ctxt) {
this.combineFn = combineFn;
this.inCoder = inCoder;
this.ctxt = ctxt;
this.state = combineFn.createAccumulator();
}
@Override
public void update(InputT element) {
combineFn.addInput(state, element);
}
@Override
public State<InputT, InterT, OutputT> merge(State<InputT, InterT, OutputT> other) {
this.state = combineFn.mergeAccumulators(ImmutableList.of(current(), other.current()));
return this;
}
@Override
public InterT current() {
return state;
}
@Override
public OutputT render() {
return combineFn.extractOutput(state);
}
@Override
public Combine.CombineFn<InputT, InterT, OutputT> getCombineFn() {
return combineFn;
}
private void writeObject(ObjectOutputStream oos) throws IOException {
oos.writeObject(ctxt);
oos.writeObject(combineFn);
oos.writeObject(inCoder);
try {
combineFn.getAccumulatorCoder(ctxt.getCoderRegistry(), inCoder)
.encode(state, oos);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
}
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream ois) throws IOException, ClassNotFoundException {
ctxt = (SparkRuntimeContext) ois.readObject();
combineFn = (Combine.CombineFn<InputT, InterT, OutputT>) ois.readObject();
inCoder = (Coder<InputT>) ois.readObject();
try {
state = combineFn.getAccumulatorCoder(ctxt.getCoderRegistry(), inCoder)
.decode(ois);
} catch (CannotProvideCoderException e) {
throw new IllegalStateException("Could not determine coder for accumulator", e);
}
}
}
}