/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.internal.app.runtime.batch.dataset.output; import co.cask.cdap.common.lang.InstantiatorFactory; import com.google.common.reflect.TypeToken; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** * OutputFormat that wraps a root OutputFormat and provides an OutputFormatCommitter that delegates to multiple * preconfigured OutputFormatCommitters. * * @param <K> Type of key * @param <V> Type of value */ public class MultipleOutputsMainOutputWrapper<K, V> extends OutputFormat<K, V> { private static final String ROOT_OUTPUT_FORMAT = "co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputsMainOutputWrapper.rootOutputFormat"; private OutputFormat<K, V> innerFormat; private OutputCommitter committer; @Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { OutputFormat<K, V> rootOutputFormat = getRootOutputFormat(job); return rootOutputFormat.getRecordWriter(job); } @Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { for (String name : MultipleOutputs.getNamedOutputsList(context)) { Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs.getNamedOutputFormatClass(context, name); OutputFormat outputFormat = new InstantiatorFactory(false).get(TypeToken.of(namedOutputFormatClass)).create(); JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name); outputFormat.checkOutputSpecs(namedContext); } } /** * Sets an OutputFormat class as the root OutputFormat for the Hadoop job. * * @param job the job on which to set the OutputFormat class * @param outputFormatClass the class to set as the root OutputFormat for the job * @param outputConfig the configuration to set for the specified OutputFormat */ public static void setRootOutputFormat(Job job, String outputFormatClass, Map<String, String> outputConfig) { job.getConfiguration().set(ROOT_OUTPUT_FORMAT, outputFormatClass); for (Map.Entry<String, String> confEntry : outputConfig.entrySet()) { job.getConfiguration().set(confEntry.getKey(), confEntry.getValue()); } } // the root OutputFormat is used only for writing, not for checking output specs or committing of the output // because the root is also in the delegates, which check the output spec and commit the output. private OutputFormat<K, V> getRootOutputFormat(JobContext context) { if (innerFormat == null) { Configuration conf = context.getConfiguration(); @SuppressWarnings("unchecked") Class<OutputFormat<K, V>> c = (Class<OutputFormat<K, V>>) conf.getClass(ROOT_OUTPUT_FORMAT, FileOutputFormat.class); try { innerFormat = c.newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new RuntimeException(e); } } return innerFormat; } @Override public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { // return a MultipleOutputsCommitter that commits for the root output format as well as all delegate outputformats if (committer == null) { Map<String, OutputCommitter> committers = new HashMap<>(); for (String name : MultipleOutputs.getNamedOutputsList(context)) { Class<? extends OutputFormat> namedOutputFormatClass = MultipleOutputs.getNamedOutputFormatClass(context, name); TaskAttemptContext namedContext = MultipleOutputs.getNamedTaskContext(context, name); OutputFormat outputFormat = new InstantiatorFactory(false).get(TypeToken.of(namedOutputFormatClass)).create(); committers.put(name, outputFormat.getOutputCommitter(namedContext)); } committer = new MultipleOutputsCommitter(committers); } return committer; } }