/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.framework; import java.io.IOException; import com.google.common.base.Preconditions; import org.apache.hadoop.mapreduce.Job; import org.kiji.annotations.ApiAudience; import org.kiji.annotations.ApiStability; import org.kiji.annotations.Inheritance; import org.kiji.mapreduce.JobConfigurationException; import org.kiji.mapreduce.MapReduceJobInput; import org.kiji.mapreduce.input.KijiTableMapReduceJobInput; import org.kiji.mapreduce.input.MapReduceJobInputs; import org.kiji.schema.EntityId; import org.kiji.schema.Kiji; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiDataRequestException; import org.kiji.schema.KijiDataRequestValidator; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiURI; import org.kiji.schema.filter.KijiRowFilter; import org.kiji.schema.util.ResourceUtils; /** * Base class for MapReduce jobs that use a Kiji table as input. * * @param <T> Type of the builder class. */ @ApiAudience.Framework @ApiStability.Stable @Inheritance.Sealed public abstract class KijiTableInputJobBuilder<T extends KijiTableInputJobBuilder<T>> extends MapReduceJobBuilder<T> { /** The table to use as input for the job. */ private KijiURI mInputTableURI; /** The entity id of the start row (inclusive). */ private EntityId mStartRow; /** The entity id of the limit row (exclusive). */ private EntityId mLimitRow; /** A row filter that specifies rows to exclude from the scan (optional, so may be null). */ private KijiRowFilter mRowFilter; /** Constructs a builder for jobs that use a Kiji table as input. */ protected KijiTableInputJobBuilder() { mInputTableURI = null; mStartRow = null; mLimitRow = null; mRowFilter = null; } /** * Configures the job input table. * * @param input The job input table. * @return This builder instance so you may chain configuration method calls. */ @SuppressWarnings("unchecked") public final T withJobInput(KijiTableMapReduceJobInput input) { mInputTableURI = input.getInputTableURI(); if (input.getRowOptions() != null) { mStartRow = input.getRowOptions().getStartRow(); mLimitRow = input.getRowOptions().getLimitRow(); mRowFilter = input.getRowOptions().getRowFilter(); } return (T) this; } /** * Configures the job with the Kiji table to use as input. * * @param inputTableURI The Kiji table to use as input for the job. * @return This builder instance so you may chain configuration method calls. */ @SuppressWarnings("unchecked") public final T withInputTable(KijiURI inputTableURI) { mInputTableURI = inputTableURI; return (T) this; } /** * Configures the job to process rows after and including an entity id. * * @param entityId The entity id of the first row input. * @return This builder instance so you may chain configuration method calls. */ @SuppressWarnings("unchecked") public final T withStartRow(EntityId entityId) { mStartRow = entityId; return (T) this; } /** * Configures the job to process rows before an entity id. * * @param entityId The entity id of the first row to exclude from the input. * @return This builder instance so you may chain configuration method calls. */ @SuppressWarnings("unchecked") public final T withLimitRow(EntityId entityId) { mLimitRow = entityId; return (T) this; } /** * Configures the job to exclude rows not accepted by a row filter. * * @param rowFilter A filter that specifies which rows to exclude from the input table. * @return This builder instance so you may chain configuration method calls. */ @SuppressWarnings("unchecked") public final T withFilter(KijiRowFilter rowFilter) { mRowFilter = rowFilter; return (T) this; } /** {@inheritDoc} */ @Override protected final MapReduceJobInput getJobInput() { final KijiTableMapReduceJobInput.RowOptions rowOptions = KijiTableMapReduceJobInput.RowOptions.create(mStartRow, mLimitRow, mRowFilter); return MapReduceJobInputs.newKijiTableMapReduceJobInput( mInputTableURI, getDataRequest(), rowOptions); } /** @return the URI of the input table. */ protected final KijiURI getInputTableURI() { return mInputTableURI; } /** * Subclasses must override this to provide a Kiji data request for the input table. * * @return the Kiji data request to configure the input table scanner with. */ protected abstract KijiDataRequest getDataRequest(); /** {@inheritDoc} */ @Override protected void configureJob(Job job) throws IOException { // Configure the input, mapper, combiner, and reducer, output. super.configureJob(job); // Validate the Kiji data request against the current table layout: Preconditions.checkNotNull(mInputTableURI, "Input Kiji table was never set."); final Kiji kiji = Kiji.Factory.open(mInputTableURI, getConf()); try { final KijiTable table = kiji.openTable(mInputTableURI.getTable()); try { validateInputTable(table); } finally { ResourceUtils.releaseOrLog(table); } } finally { ResourceUtils.releaseOrLog(kiji); } } /** * Validates the input table. * * Sub-classes may override this method to perform additional validation requiring an active * connection to the input table. * * @param table Input table. * @throws IOException on I/O error. */ protected void validateInputTable(KijiTable table) throws IOException { try { KijiDataRequestValidator.validatorForLayout(table.getLayout()).validate(getDataRequest()); } catch (KijiDataRequestException kdre) { throw new JobConfigurationException("Invalid data request: " + kdre.getMessage()); } } }