/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.output;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.annotations.Inheritance;
import org.kiji.mapreduce.KijiTableContext;
import org.kiji.mapreduce.framework.KijiConfKeys;
import org.kiji.mapreduce.impl.DirectKijiTableWriterContext;
import org.kiji.schema.KijiURI;
/**
* The class DirectKijiTableMapReduceJobOutput is used to indicate the usage of a Kiji table
* as an output for a MapReduce job.
*
* <p>
* Use of this job output configuration is discouraged for many reasons:
* <ul>
* <li> It may induce a very high load on the target HBase cluster.
* <li> It may result in partial writes (eg. if the job fails half through).
* </ul>
* The recommended way to write to HBase tables is through the {@link HFileMapReduceJobOutput}.
* </p>
*
* <h2>Configuring an output:</h2>
* <p>
* DirectKijiTableMapReduceJobOutput must be configured with the address of the Kiji table to
* write to and optionally the number of reduce tasks to use for the job:
* </p>
* <pre>
* <code>
* final MapReduceJobOutput kijiTableOutput =
* MapReduceJobOutputs.newDirectKijiTableMapReduceJobOutput(myURI);
* </code>
* </pre>
* @see HFileMapReduceJobOutput
*/
@ApiAudience.Public
@ApiStability.Stable
@Inheritance.Sealed
public class DirectKijiTableMapReduceJobOutput extends KijiTableMapReduceJobOutput {
/** Default constructor. Accessible via {@link MapReduceJobOutputs}. */
DirectKijiTableMapReduceJobOutput() {
}
/**
* Creates a new <code>KijiTableMapReduceJobOutput</code> instance.
*
* @param tableURI The Kiji table to write to.
*/
DirectKijiTableMapReduceJobOutput(KijiURI tableURI) {
this(tableURI, 0);
}
/**
* Creates a new <code>KijiTableMapReduceJobOutput</code> instance.
*
* @param tableURI The Kiji table to write to.
* @param numReduceTasks The number of reduce tasks to use (use zero if using a producer).
*/
DirectKijiTableMapReduceJobOutput(KijiURI tableURI, int numReduceTasks) {
super(tableURI, numReduceTasks);
}
/** {@inheritDoc} */
@Override
public void configure(Job job) throws IOException {
// sets Hadoop output format, Kiji output table and # of reducers:
super.configure(job);
final Configuration conf = job.getConfiguration();
// Kiji table context:
conf.setClass(
KijiConfKeys.KIJI_TABLE_CONTEXT_CLASS,
DirectKijiTableWriterContext.class,
KijiTableContext.class);
// Since there's no "commit" operation for an entire map task writing to a
// Kiji table, do not use speculative execution when writing directly to a Kiji table.
conf.setBoolean("mapred.map.tasks.speculative.execution", false);
}
/** {@inheritDoc} */
@Override
protected Class<? extends OutputFormat> getOutputFormatClass() {
// No hadoop output:
return NullOutputFormat.class;
}
}