/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.runtime.stage.output;
import java.io.IOException;
import java.lang.reflect.Method;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskCounter;
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.util.ReflectionUtils;
import com.asakusafw.runtime.core.Result;
import com.asakusafw.runtime.flow.ResultOutput;
import com.asakusafw.runtime.stage.StageOutput;
/**
* A driver for configuring stage outputs.
*/
public class StageOutputDriver {
static final Log LOG = LogFactory.getLog(StageOutputDriver.class);
private static final String K_NAMES = "com.asakusafw.stage.output.names"; //$NON-NLS-1$
private static final String K_FORMAT_PREFIX = "com.asakusafw.stage.output.format."; //$NON-NLS-1$
private static final String K_KEY_PREFIX = "com.asakusafw.stage.output.key."; //$NON-NLS-1$
private static final String K_VALUE_PREFIX = "com.asakusafw.stage.output.value."; //$NON-NLS-1$
private static final String COUNTER_GROUP = "com.asakusafw.stage.output.RecordCounters"; //$NON-NLS-1$
private final Map<String, ResultOutput<?>> resultSinks;
private final TaskInputOutputContext<?, ?, ?, ?> context;
/**
* Creates a new instance.
* @param context the current context
* @throws IOException if failed to initialize this driver
* @throws InterruptedException if interrupted while initializing this driver
* @throws IllegalArgumentException if the parameter is {@code null}
*/
public StageOutputDriver(
TaskInputOutputContext<?, ?, ?, ?> context) throws IOException, InterruptedException {
if (context == null) {
throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
}
this.context = context;
this.resultSinks = prepareSinks(context);
}
private static Map<String, ResultOutput<?>> prepareSinks(TaskInputOutputContext<?, ?, ?, ?> context) {
assert context != null;
Map<String, ResultOutput<?>> results = new HashMap<>();
Configuration conf = context.getConfiguration();
for (String name : conf.getStringCollection(K_NAMES)) {
results.put(name, null);
}
return results;
}
private static final String METHOD_SET_OUTPUT_NAME = "setOutputName"; //$NON-NLS-1$
private void setOutputFilePrefix(JobContext localContext, String name) throws IOException {
assert localContext != null;
assert name != null;
try {
Method method = FileOutputFormat.class.getDeclaredMethod(
METHOD_SET_OUTPUT_NAME, JobContext.class, String.class);
method.setAccessible(true);
method.invoke(null, localContext, name);
} catch (Exception e) {
throw new IOException(MessageFormat.format(
"Failed to configure output name of \"{0}\" ([MAPREDUCE-370] may be not applied)",
name), e);
}
}
/**
* Returns the result sink object with the specified name.
* Clients must register the result sink before launching the job by using {@link #set(Job, String, Collection)}.
* @param <T> the output data type
* @param name the sink name
* @return the corresponded sink name
* @throws IOException if failed to initialize the target sink
* @throws InterruptedException if interrupted while initializing the target sink
* @throws IllegalArgumentException if the parameter is {@code null}
*/
@SuppressWarnings("unchecked")
public synchronized <T extends Writable> Result<T> getResultSink(
String name) throws IOException, InterruptedException {
if (name == null) {
throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$
}
if (resultSinks.containsKey(name) == false) {
throw new IllegalArgumentException(MessageFormat.format(
"Output \"{0}\" is not declared",
name));
}
ResultOutput<?> sink = resultSinks.get(name);
if (sink == null) {
sink = buildSink(name);
resultSinks.put(name, sink);
}
return (Result<T>) sink;
}
private ResultOutput<?> buildSink(String name) throws IOException, InterruptedException {
assert name != null;
Configuration conf = context.getConfiguration();
@SuppressWarnings("rawtypes")
Class<? extends OutputFormat> formatClass = conf.getClass(
getPropertyName(K_FORMAT_PREFIX, name),
null,
OutputFormat.class);
Class<?> keyClass = conf.getClass(getPropertyName(K_KEY_PREFIX, name), null);
Class<?> valueClass = conf.getClass(getPropertyName(K_VALUE_PREFIX, name), null);
if (formatClass == null) {
throw new IllegalStateException(MessageFormat.format(
"OutputFormat is not declared for output \"{0}\"",
name));
}
if (keyClass == null) {
throw new IllegalStateException(MessageFormat.format(
"Output key type is not declared for output \"{0}\"",
name));
}
if (valueClass == null) {
throw new IllegalStateException(MessageFormat.format(
"Output value type is not declared for output \"{0}\"",
name));
}
List<Counter> counters = getCounters(name);
if (TemporaryOutputFormat.class.isAssignableFrom(formatClass)) {
return buildTemporarySink(name, valueClass, counters);
} else {
return buildNormalSink(name, formatClass, keyClass, valueClass, counters);
}
}
private List<Counter> getCounters(String name) {
assert name != null;
try {
List<Counter> results = new ArrayList<>();
if (context.getTaskAttemptID().getTaskType() == TaskType.MAP) {
results.add(context.getCounter(TaskCounter.MAP_OUTPUT_RECORDS));
} else {
results.add(context.getCounter(TaskCounter.REDUCE_OUTPUT_RECORDS));
}
results.add(context.getCounter(COUNTER_GROUP, name));
return results;
} catch (RuntimeException e) {
LOG.warn("Failed to create counters", e);
return Collections.emptyList();
}
}
private ResultOutput<?> buildTemporarySink(
String name,
Class<?> valueClass,
List<Counter> counters) throws IOException, InterruptedException {
assert context != null;
assert name != null;
assert valueClass != null;
assert counters != null;
TemporaryOutputFormat<?> format = new TemporaryOutputFormat<>();
RecordWriter<?, ?> writer = format.createRecordWriter(context, name, valueClass);
return new ResultOutput<Writable>(context, writer, counters);
}
private ResultOutput<?> buildNormalSink(
String name,
@SuppressWarnings("rawtypes") Class<? extends OutputFormat> formatClass,
Class<?> keyClass,
Class<?> valueClass,
List<Counter> counters) throws IOException, InterruptedException {
assert context != null;
assert name != null;
assert formatClass != null;
assert keyClass != null;
assert valueClass != null;
assert counters != null;
Job job = Job.getInstance(context.getConfiguration());
job.setOutputFormatClass(formatClass);
job.setOutputKeyClass(keyClass);
job.setOutputValueClass(valueClass);
TaskAttemptContext localContext = new TaskAttemptContextImpl(
job.getConfiguration(),
context.getTaskAttemptID());
if (FileOutputFormat.class.isAssignableFrom(formatClass)) {
setOutputFilePrefix(localContext, name);
}
OutputFormat<?, ?> format = ReflectionUtils.newInstance(
formatClass,
localContext.getConfiguration());
RecordWriter<?, ?> writer = format.getRecordWriter(localContext);
return new ResultOutput<Writable>(localContext, writer);
}
/**
* Closes this driver and finalizes all result sinks.
* @throws IOException if failed to finalize some result sinks
* @throws InterruptedException if interrupted while disposing the driver
*/
public synchronized void close() throws IOException, InterruptedException {
for (Map.Entry<String, ResultOutput<?>> entry : resultSinks.entrySet()) {
ResultOutput<?> output = entry.getValue();
if (output != null) {
output.close();
entry.setValue(null);
}
}
}
/**
* Sets the output specification for this job.
* @param job current job
* @param outputPath base output path
* @param outputList each output information
* @throws IOException if failed to configure the output specification
* @throws IllegalArgumentException if some parameters were {@code null}
* @since 0.2.5
*/
public static void set(Job job, String outputPath, Collection<StageOutput> outputList) throws IOException {
if (job == null) {
throw new IllegalArgumentException("job must not be null"); //$NON-NLS-1$
}
if (outputPath == null) {
throw new IllegalArgumentException("outputPath must not be null"); //$NON-NLS-1$
}
if (outputList == null) {
throw new IllegalArgumentException("outputList must not be null"); //$NON-NLS-1$
}
List<StageOutput> brigeOutputs = new ArrayList<>();
List<StageOutput> normalOutputs = new ArrayList<>();
boolean sawFileOutput = false;
boolean sawTemporaryOutput = false;
for (StageOutput output : outputList) {
Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
if (BridgeOutputFormat.class.isAssignableFrom(formatClass)) {
brigeOutputs.add(output);
} else {
normalOutputs.add(output);
}
}
if (brigeOutputs.isEmpty() == false) {
BridgeOutputFormat.set(job, brigeOutputs);
}
for (StageOutput output : normalOutputs) {
String name = output.getName();
Class<?> keyClass = output.getKeyClass();
Class<?> valueClass = output.getValueClass();
Class<? extends OutputFormat<?, ?>> formatClass = output.getFormatClass();
sawFileOutput |= FileOutputFormat.class.isAssignableFrom(formatClass);
sawTemporaryOutput |= TemporaryOutputFormat.class.isAssignableFrom(formatClass);
addOutput(job, name, formatClass, keyClass, valueClass);
}
if (sawFileOutput) {
FileOutputFormat.setOutputPath(job, new Path(outputPath));
}
if (sawTemporaryOutput) {
TemporaryOutputFormat.setOutputPath(job, new Path(outputPath));
}
}
private static void addOutput(
Job job,
String name,
Class<?> formatClass,
Class<?> keyClass,
Class<?> valueClass) {
assert job != null;
assert name != null;
assert formatClass != null;
assert keyClass != null;
assert valueClass != null;
if (isValidName(name) == false) {
throw new IllegalArgumentException(MessageFormat.format(
"Output name \"{0}\" is not valid",
name));
}
Configuration conf = job.getConfiguration();
Set<String> names = new TreeSet<>(conf.getStringCollection(K_NAMES));
if (names.contains(name)) {
throw new IllegalArgumentException(MessageFormat.format(
"Output name \"{0}\" is already declared",
name));
}
names.add(name);
conf.setStrings(K_NAMES, names.toArray(new String[names.size()]));
conf.setClass(getPropertyName(K_FORMAT_PREFIX, name), formatClass, OutputFormat.class);
conf.setClass(getPropertyName(K_KEY_PREFIX, name), keyClass, Object.class);
conf.setClass(getPropertyName(K_VALUE_PREFIX, name), valueClass, Object.class);
}
private static String getPropertyName(String prefix, String name) {
assert prefix != null;
assert name != null;
return prefix + name;
}
private static boolean isValidName(String name) {
assert name != null;
for (char c : name.toCharArray()) {
if (isValidNameChar(c) == false) {
return false;
}
}
return true;
}
private static boolean isValidNameChar(char c) {
return ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
}
}