/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.bulkimport;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.ReflectionUtils;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.mapreduce.JobConfigurationException;
import org.kiji.mapreduce.KijiMapReduceJob;
import org.kiji.mapreduce.KijiMapper;
import org.kiji.mapreduce.KijiReducer;
import org.kiji.mapreduce.MapReduceJobInput;
import org.kiji.mapreduce.MapReduceJobOutput;
import org.kiji.mapreduce.bulkimport.impl.BulkImportMapper;
import org.kiji.mapreduce.framework.KijiConfKeys;
import org.kiji.mapreduce.framework.MapReduceJobBuilder;
import org.kiji.mapreduce.kvstore.KeyValueStore;
import org.kiji.mapreduce.output.DirectKijiTableMapReduceJobOutput;
import org.kiji.mapreduce.output.HFileMapReduceJobOutput;
import org.kiji.mapreduce.output.KijiTableMapReduceJobOutput;
import org.kiji.mapreduce.reducer.IdentityReducer;
/** Builds a job that runs a KijiBulkImporter to import data into a Kiji table. */
@ApiAudience.Public
@ApiStability.Stable
public final class KijiBulkImportJobBuilder
extends MapReduceJobBuilder<KijiBulkImportJobBuilder> {
/** The class of the bulk importer to run. */
@SuppressWarnings("rawtypes")
private Class<? extends KijiBulkImporter> mBulkImporterClass;
/** The bulk importer instance. */
private KijiBulkImporter<?, ?> mBulkImporter;
/** The mapper instance to run (which runs the bulk importer inside it). */
private KijiMapper<?, ?, ?, ?> mMapper;
/** The reducer instance to run (may be null). */
private KijiReducer<?, ?, ?, ?> mReducer;
/** The job input. */
private MapReduceJobInput mJobInput;
/** Job output must be a Kiji table. */
private KijiTableMapReduceJobOutput mJobOutput;
/** Constructs a builder for jobs that run a KijiBulkImporter. */
private KijiBulkImportJobBuilder() {
mBulkImporterClass = null;
mBulkImporter = null;
mMapper = null;
mReducer = null;
mJobInput = null;
mJobOutput = null;
}
/**
* Creates a new builder for Kiji bulk import jobs.
*
* @return a new Kiji bulk import job builder.
*/
public static KijiBulkImportJobBuilder create() {
return new KijiBulkImportJobBuilder();
}
/**
* Configures the job with input.
*
* @param jobInput The input for the job.
* @return This builder instance so you may chain configuration method calls.
*/
public KijiBulkImportJobBuilder withInput(MapReduceJobInput jobInput) {
mJobInput = jobInput;
return this;
}
/**
* Configures the bulk-importer output Kiji table.
*
* @param jobOutput Bulk importer must output to a Kiji table.
* @return this builder.
*/
public KijiBulkImportJobBuilder withOutput(KijiTableMapReduceJobOutput jobOutput) {
mJobOutput = jobOutput;
super.withOutput(jobOutput);
return this;
}
/**
* Configures the job output.
*
* @param jobOutput The output for the job.
* Bulk importer must output to a Kiji table.
* @return This builder instance so you may chain configuration method calls.
*
* {@inheritDoc}
*/
@Override
public KijiBulkImportJobBuilder withOutput(MapReduceJobOutput jobOutput) {
if (!(jobOutput instanceof KijiTableMapReduceJobOutput)) {
throw new JobConfigurationException(String.format(
"Invalid job output %s: expecting %s or %s",
jobOutput.getClass().getName(),
DirectKijiTableMapReduceJobOutput.class.getName(),
HFileMapReduceJobOutput.class.getName()));
}
return withOutput((KijiTableMapReduceJobOutput) jobOutput);
}
/**
* Configures the job with a bulk importer to run in the map phase.
*
* @param bulkImporterClass The bulk importer class to use in the job.
* @return This builder instance so you may chain configuration method calls.
*/
@SuppressWarnings("rawtypes")
public KijiBulkImportJobBuilder withBulkImporter(
Class<? extends KijiBulkImporter> bulkImporterClass) {
mBulkImporterClass = bulkImporterClass;
return this;
}
/** {@inheritDoc} */
@Override
protected void configureJob(Job job) throws IOException {
final Configuration conf = job.getConfiguration();
// Store the name of the the importer to use in the job configuration so the mapper can
// create instances of it.
// Construct the bulk importer instance.
if (null == mBulkImporterClass) {
throw new JobConfigurationException("Must specify a bulk importer.");
}
conf.setClass(
KijiConfKeys.KIJI_BULK_IMPORTER_CLASS, mBulkImporterClass, KijiBulkImporter.class);
mJobOutput.configure(job);
// Configure the mapper and reducer. This part depends on whether we're going to write
// to HFiles or directly to the table.
configureJobForHFileOutput(job);
job.setJobName("Kiji bulk import: " + mBulkImporterClass.getSimpleName());
mBulkImporter = ReflectionUtils.newInstance(mBulkImporterClass, conf);
// Configure the MapReduce job (requires mBulkImporter to be set properly):
super.configureJob(job);
}
/**
* Configures the job settings specific to writing HFiles.
*
* @param job The job to configure.
*/
protected void configureJobForHFileOutput(Job job) {
// Construct the mapper instance that runs the importer.
mMapper = new BulkImportMapper<Object, Object>();
// Don't need to do anything during the Reducer, but we need to run the reduce phase
// so the KeyValue records output from the map phase get sorted.
mReducer = new IdentityReducer<Object, Object>();
}
/** {@inheritDoc} */
@Override
protected KijiMapReduceJob build(Job job) {
return KijiMapReduceJob.create(job);
}
/** {@inheritDoc} */
@Override
protected Map<String, KeyValueStore<?, ?>> getRequiredStores() throws IOException {
return mBulkImporter.getRequiredStores();
}
/** {@inheritDoc} */
@Override
protected MapReduceJobInput getJobInput() {
return mJobInput;
}
/** {@inheritDoc} */
@Override
protected KijiMapper<?, ?, ?, ?> getMapper() {
return mMapper;
}
/** {@inheritDoc} */
@Override
protected KijiReducer<?, ?, ?, ?> getCombiner() {
// Use no combiner.
return null;
}
/** {@inheritDoc} */
@Override
protected KijiReducer<?, ?, ?, ?> getReducer() {
return mReducer;
}
/** {@inheritDoc} */
@Override
protected Class<?> getJarClass() {
return mBulkImporterClass;
}
}