/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.knittingboar.io; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; /** * Encapsulates functionality from: * * - FileInputFormat::getSplits(...) - this info should be calculated by the * main job controlling process [MOVE] * * - TextInputFormat::readSplit(...) * * * Notes - currently hard-coded to read CSV "record per line" non-compressed * records from disk * * @author jpatterson * */ public class InputRecordsSplit { TextInputFormat input_format = null; InputSplit split = null; JobConf jobConf = null; RecordReader<LongWritable,Text> reader = null; LongWritable key = null; final Reporter voidReporter = Reporter.NULL; public InputRecordsSplit(JobConf jobConf, InputSplit split) throws IOException { this.jobConf = jobConf; this.split = split; this.input_format = new TextInputFormat(); // RecordReader<LongWritable, Text> reader = // format.getRecordReader(splits[x], job, reporter); this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter); this.key = reader.createKey(); // Text value = reader.createValue(); } /** * * just a dead simple way to do this * * - functionality from TestTextInputFormat::readSplit() * * If returns true, then csv_line contains the next line If returns false, * then there is no next record * * Will terminate when it hits the end of the split based on the information * provided in the split class to the constructor and the TextInputFormat * * @param csv_line * @throws IOException */ public boolean next(Text csv_line) throws IOException { return reader.next(key, csv_line); } public void ResetToStartOfSplit() throws IOException { // I'mma cheatin here. sue me. this.reader = input_format.getRecordReader(this.split, this.jobConf, voidReporter); } }