/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.runtime.stage.output; import java.io.IOException; import java.text.MessageFormat; import java.util.Map; import java.util.WeakHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DefaultCodec; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.util.ReflectionUtils; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.stage.temporary.TemporaryStorage; /** * A temporary output format. * @param <T> target type * @since 0.2.5 * @version 0.8.0 */ public final class TemporaryOutputFormat<T> extends OutputFormat<NullWritable, T> { static final Log LOG = LogFactory.getLog(TemporaryOutputFormat.class); /** * The Hadoop property key of output name prefix. * @since 0.8.0 */ public static final String KEY_FILE_NAME = "com.asakusafw.temporary.output.name"; //$NON-NLS-1$ /** * The default output name prefix. */ public static final String DEFAULT_FILE_NAME = "part"; //$NON-NLS-1$ private static final String KEY_OUTPUT_PATH = "com.asakusafw.temporary.output"; //$NON-NLS-1$ private final Map<TaskAttemptID, FileOutputCommitter> commiterCache = new WeakHashMap<>(); @Override public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } Path path = getOutputPath(context); if (TemporaryOutputFormat.getOutputPath(context) == null) { throw new IOException("Temporary output path is not set"); } TokenCache.obtainTokensForNamenodes( context.getCredentials(), new Path[] { path }, context.getConfiguration()); if (path.getFileSystem(context.getConfiguration()).exists(path)) { throw new IOException(MessageFormat.format( "Output directory {0} already exists", path)); } } @Override public RecordWriter<NullWritable, T> getRecordWriter( TaskAttemptContext context) throws IOException, InterruptedException { @SuppressWarnings("unchecked") Class<T> valueClass = (Class<T>) context.getOutputValueClass(); String name = context.getConfiguration().get(KEY_FILE_NAME, DEFAULT_FILE_NAME); return createRecordWriter(context, name, valueClass); } /** * Creates a new {@link RecordWriter} to output temporary data. * @param <V> value type * @param context current context * @param name output name * @param dataType value type * @return the created writer * @throws IOException if failed to create a new {@link RecordWriter} * @throws InterruptedException if interrupted * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> RecordWriter<NullWritable, V> createRecordWriter( TaskAttemptContext context, String name, Class<V> dataType) throws IOException, InterruptedException { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } if (name == null) { throw new IllegalArgumentException("name must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } CompressionCodec codec = null; Configuration conf = context.getConfiguration(); if (FileOutputFormat.getCompressOutput(context)) { Class<?> codecClass = FileOutputFormat.getOutputCompressorClass(context, DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } FileOutputCommitter committer = getOutputCommitter(context); Path file = new Path( committer.getWorkPath(), FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$ ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file, codec); return new RecordWriter<NullWritable, V>() { @Override public void write(NullWritable key, V value) throws IOException { out.write(value); } @Override public void close(TaskAttemptContext ignored) throws IOException { out.close(); } @Override public String toString() { return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$ } }; } @Override public FileOutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { synchronized (this) { TaskAttemptID id = context.getTaskAttemptID(); FileOutputCommitter committer = commiterCache.get(id); if (committer == null) { committer = createOutputCommitter(context); } commiterCache.put(id, committer); return committer; } } private FileOutputCommitter createOutputCommitter(TaskAttemptContext context) throws IOException { assert context != null; if (getOutputPath(context).equals(FileOutputFormat.getOutputPath(context))) { return (FileOutputCommitter) new EmptyFileOutputFormat().getOutputCommitter(context); } else { return new FileOutputCommitter(getOutputPath(context), context); } } /** * Returns the output path. * @param context current context * @return the path * @throws IllegalArgumentException if some parameters were {@code null} * @see #setOutputPath(JobContext, Path) */ public static Path getOutputPath(JobContext context) { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } String pathString = context.getConfiguration().get(KEY_OUTPUT_PATH); if (pathString == null) { return null; } return new Path(pathString); } /** * Configures output path. * @param context current context * @param path target output path * @throws IllegalArgumentException if some parameters were {@code null} */ public static void setOutputPath(JobContext context, Path path) { if (context == null) { throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$ } if (path == null) { throw new IllegalArgumentException("path must not be null"); //$NON-NLS-1$ } context.getConfiguration().set(KEY_OUTPUT_PATH, path.toString()); } }