/**
* (c) Copyright 2013 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.pivot;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.annotations.Inheritance;
import org.kiji.mapreduce.KijiContext;
import org.kiji.mapreduce.KijiTableContext;
import org.kiji.mapreduce.kvstore.KeyValueStore;
import org.kiji.mapreduce.kvstore.KeyValueStoreClient;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiRowData;
/**
* Base class for a map-only job that reads from a Kiji table and writes to a Kiji table.
*
* <p>
* A {@link KijiPivoter} scans the rows from an input KijiTable and write cells into an
* output KijiTable. The input and the output Kiji tables may or may not be the same.
* Contrary to a {@link org.kiji.mapreduce.produce.KijiProducer}, a {@link KijiPivoter} is free
* to write cells to any column of any row in the output table.
* </p>
*
* <h1>Lifecycle:</h1>
*
* <p>
* Instances are created using ReflectionUtils.
* The {@link org.apache.hadoop.conf.Configuration} object is set immediately after
* instantiation with a call to {@link #setConf(Configuration)}.
* In order to initialize internal state before any other methods are called,
* override the {@link #setConf(Configuration)} method.
* </p>
*
* <p>
* As a {@link KeyValueStoreClient}, KijiTableMapper has access to all
* stores defined by {@link KeyValueStoreClient#getRequiredStores()}. Readers for
* these stores are surfaced in the setup(), produce(), and cleanup() methods
* via the Context provided to each by calling {@link KijiContext#getStore(String)}.
* </p>
*
* <p>
* Once the internal state is set, functions may be called in any order, except for
* restrictions on setup(), produce(), and cleanup().
* </p>
*
* <p>
* setup() will get called once at the beginning of the map phase, followed by
* a call to produce() for each input row. Once all of these produce()
* calls have completed, cleanup() will be called exactly once. It is possible
* that this setup-produce-cleanup cycle may repeat any number of times.
* </p>
*
* <p>
* A final guarantee is that setup(), produce(), and cleanup() will be called after
* getDataRequest() and getOutputColumn() have each been called at least once.
* </p>
*
* <h1>Skeleton:</h1>
* <p>
* Any concrete implementation of a KijiTableMapper must implement
* {@link #getDataRequest()} and {@link #produce(KijiRowData, KijiTableContext)}.
* </p>
*/
@ApiAudience.Public
@ApiStability.Experimental
@Inheritance.Extensible
public abstract class KijiPivoter
implements Configurable, KeyValueStoreClient {
/** Configuration of this pivoter. */
private Configuration mConf;
/**
* All subclass of KijiPivoter must have a default constructor.
* Constructors should be lightweight, since the framework is free to create new instances
* at any time.
*/
public KijiPivoter() {
}
/**
* Sets the Configuration for this KijiPivoter to use.
* This function is guaranteed to be called immediately after instantiation.
* Override this method to initialize internal state from a configuration.
*
* <p>
* If you override this method, you must call <code>super.setConf();</code>
* or else the configuration will not be saved properly.
* </p>
*
* @param conf Configuration to initialize this pivoter with.
*/
@Override
public void setConf(Configuration conf) {
mConf = conf;
}
/** {@inheritDoc} */
@Override
public final Configuration getConf() {
Preconditions.checkNotNull(mConf);
return mConf;
}
/**
* Returns a KijiDataRequest that describes which input columns need to be available to
* the pivoter.
*
* <p> This method may be called multiple times, including before {@link #setup(KijiContext)}.
* </p>
*
* @return a specification of the data requested by this pivoter.
*/
public abstract KijiDataRequest getDataRequest();
/** {@inheritDoc} */
@Override
public Map<String, KeyValueStore<?, ?>> getRequiredStores() {
return Collections.emptyMap();
}
/**
* Called once to initialize this pivoter before any calls to
* {@link KijiPivoter#produce(KijiRowData, KijiTableContext)}.
*
* @param context The KijiContext providing access to KeyValueStores, Counters, etc.
* @throws IOException on I/O error.
*/
public void setup(KijiContext context) throws IOException {
// By default, do nothing. Nothing may be added here, because subclasses may implement setup
// methods without super.setup().
}
/**
* Called to compute derived data for a single entity. The input that is included is controlled
* by the {@link org.kiji.schema.KijiDataRequest} returned in {@link #getDataRequest}.
*
* @param row Input row from the input Kiji table, populated with the requested columns.
* @param context Context used to write to the output Kiji table.
* @throws IOException on I/O error.
*/
public abstract void produce(KijiRowData row, KijiTableContext context) throws IOException;
/**
* Called once to clean up this pivoter after all
* {@link #produce(KijiRowData, KijiTableContext)} calls are made.
*
* @param context The KijiContext providing access to KeyValueStores, Counters, etc.
* @throws IOException on I/O error.
*/
public void cleanup(KijiContext context) throws IOException {
// By default, do nothing. Nothing may be added here, because subclasses may implement setup
// methods without super.cleanup().
}
}