/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.framework;
import java.io.IOException;
import com.google.common.base.Preconditions;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.SerializationUtils;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.delegation.Lookups;
import org.kiji.schema.EntityId;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiURI;
import org.kiji.schema.filter.KijiRowFilter;
import org.kiji.schema.hbase.HBaseKijiURI;
/** InputFormat for Hadoop MapReduce jobs reading from a Kiji table. */
@ApiAudience.Framework
@ApiStability.Stable
public abstract class KijiTableInputFormat
extends InputFormat<EntityId, KijiRowData>
implements Configurable {
/** Static factory class for getting instances of the appropriate KijiTableInputFormatFactory. */
public static final class Factory {
/**
* Returns a KijiFactory for the appropriate type of Kiji (HBase or Cassandra), based on a URI.
*
* @param uri for the Kiji instance to build with the factory.
* @return the default KijiFactory.
*/
public static KijiTableInputFormatFactory get(KijiURI uri) {
KijiTableInputFormatFactory instance;
String scheme = uri.getScheme();
if (scheme.equals(KijiURI.KIJI_SCHEME)) {
scheme = HBaseKijiURI.HBASE_SCHEME;
}
synchronized (Kiji.Factory.class) {
instance = Lookups
.getNamed(KijiTableInputFormatFactory.class)
.lookup(scheme);
assert (null != instance);
}
return instance;
}
}
/**
* Configures a Hadoop M/R job to read from a given table.
*
* @param job Job to configure.
* @param tableURI URI of the table to read from.
* @param dataRequest Data request.
* @param startRow Minimum row key to process. May be left null to indicate
* that scanning should start at the beginning of the table.
* @param endRow Maximum row key to process. May be left null to indicate that
* scanning should continue to the end of the table.
* @param filter Filter to use for scanning. May be left null.
* @throws IOException on I/O error.
*/
public static void configureJob(
Job job,
KijiURI tableURI,
KijiDataRequest dataRequest,
EntityId startRow,
EntityId endRow,
KijiRowFilter filter
) throws IOException {
Preconditions.checkNotNull(job, "job must not be null");
Preconditions.checkNotNull(tableURI, "tableURI must not be null");
Preconditions.checkNotNull(dataRequest, "dataRequest must not be null");
final Configuration conf = job.getConfiguration();
// TODO: Check for jars config:
// GenericTableMapReduceUtil.initTableInput(hbaseTableName, scan, job);
// Write all the required values to the job's configuration object.
final String serializedRequest =
Base64.encodeBase64String(SerializationUtils.serialize(dataRequest));
conf.set(KijiConfKeys.KIJI_INPUT_DATA_REQUEST, serializedRequest);
conf.set(KijiConfKeys.KIJI_INPUT_TABLE_URI, tableURI.toString());
if (null != startRow) {
conf.set(KijiConfKeys.KIJI_START_ROW_KEY,
Base64.encodeBase64String(startRow.getHBaseRowKey()));
}
if (null != endRow) {
conf.set(KijiConfKeys.KIJI_LIMIT_ROW_KEY,
Base64.encodeBase64String(endRow.getHBaseRowKey()));
}
if (null != filter) {
conf.set(KijiConfKeys.KIJI_ROW_FILTER, filter.toJson().toString());
}
}
}