/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.input.impl; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader; import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; import org.kiji.annotations.ApiAudience; /** * An input format for reading multiple files in a single map task. * Exactly one (unsplit) file will be read by each record passed to the mapper. * Records are constructed from WholeFileRecordReaders. */ @ApiAudience.Private public final class WholeFileInputFormat extends CombineFileInputFormat<Text, Text> { /** * Guarantees that no file is split. * * @param context The JobContext. Unused. * @param filename The path to this file. Unused. * @return false so that no files are split. */ @Override protected boolean isSplitable(JobContext context, Path filename) { // This, combined with returning the entire file as a record // guarantees that you get exactly one file for each record. return false; } /** * Creates a CombineFileRecordReader to read each file assigned to this InputSplit. * Note, that unlike ordinary InputSplits, split must be a CombineFileSplit, and therefore * is expected to specify multiple files. * * @param split The InputSplit to read. Throws an IllegalArgumentException if this is * not a CombineFileSplit. * @param context The context for this task. * @return a CombineFileRecordReader to process each file in split. * It will read each file with a WholeFileRecordReader. * @throws IOException if there is an error. */ @Override public RecordReader<Text, Text> createRecordReader( InputSplit split, TaskAttemptContext context) throws IOException { if (!(split instanceof CombineFileSplit)) { throw new IllegalArgumentException("split must be a CombineFileSplit"); } return new CombineFileRecordReader<Text, Text>((CombineFileSplit) split, context, WholeFileRecordReader.class); } }