InputFormat.java example

Explorer
BC-BSP-master
- src
/**
 * CopyRight by Chinamobile
 * 
 * InputFormat.java
 */

package com.chinamobile.bcbsp.io;

import java.io.IOException;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;

import com.chinamobile.bcbsp.util.BSPJob;

/**
 * InputFormat
 * 
 * This is an abstract class. All user-defined InputFormat class must implement
 * two methods:getSplits() and createRecordReader();
 * 
 * @author
 * @version
 */
public abstract class InputFormat<K, V> {

    /**
     * This method is used for generating splits according to the input data.
     * The list of split will be used by JobInProgress, SimpleTaskScheduler and
     * Staff.
     * 
     * @param job
     * @return
     * @throws IOException
     * @throws InterruptedException
     */
    public abstract List<InputSplit> getSplits(BSPJob job) throws IOException,
            InterruptedException;

    /**
     * This method will return a user-defined RecordReader for reading data from
     * the original storage. It is used in Staff.
     * 
     * @param split
     * @param job
     * @return
     * @throws IOException
     * @throws InterruptedException
     */
    public abstract RecordReader<K, V> createRecordReader(InputSplit split,
            BSPJob job) throws IOException, InterruptedException;

    /**
     * This method is only used to read data from HBase. If the data is read
     * from the DFS you do not cover it. This method is primarily used to
     * initialize the HBase table and set Scan
     * 
     * @param configuration
     */
    public void initialize(Configuration configuration) {
    }

}