TableRecordReader.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.mapreduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.types.Projection;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.data.Tuple;

/**
 * Adaptor class to implement RecordReader on top of Scanner.
 */
public class TableRecordReader extends RecordReader<BytesWritable, Tuple> {
  private final TableScanner scanner;
  private long count = 0;
  private BytesWritable key = null;
  private Tuple value = null;

  /**
   * 
   * @param expr
   *          Table expression
   * @param projection
   *          projection schema. Should never be null.
   * @param split
   *          the split to work on
   * @param jobContext
   *          JobContext object
   * @throws IOException
   */
  public TableRecordReader(TableExpr expr, String projection,
		  InputSplit split, JobContext jobContext) throws IOException, ParseException {
	  Configuration conf = jobContext.getConfiguration();
	  if( split instanceof RowTableSplit ) {
		  RowTableSplit rowSplit = (RowTableSplit)split;
		  scanner = expr.getScanner(rowSplit, projection, conf);
	  } else {
		  SortedTableSplit tblSplit = (SortedTableSplit)split;
		  scanner = expr.getScanner( tblSplit.getBegin(), tblSplit.getEnd(), projection, conf );
	  }
  }
  
  @Override
  public void close() throws IOException {
    scanner.close();
  }

  public long getPos() throws IOException {
    return count;
  }

  @Override
  public float getProgress() throws IOException {
    return  (float)((scanner.atEnd()) ? 1.0 : 0);
  }
  /**
   * Seek to the position at the first row which has the key
   * or just after the key; only applicable for sorted Zebra table
   *
   * @param key
   *          the key to seek on
   */
  public boolean seekTo(BytesWritable key) throws IOException {
    return scanner.seekTo(key);
  }
  
  /**
   * Check if the end of the input has been reached
   *
   * @return true if the end of the input is reached
   */
  public boolean atEnd() throws IOException {
	  return scanner.atEnd();
  }

	@Override
	public BytesWritable getCurrentKey() throws IOException, InterruptedException {
		return key;
	}
	
	@Override
	public Tuple getCurrentValue() throws IOException, InterruptedException {
		return value;
	}
	
	@Override
	public void initialize(org.apache.hadoop.mapreduce.InputSplit arg0,
			TaskAttemptContext arg1) throws IOException, InterruptedException {
		// no-op
	}
	
	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
	    if( scanner.atEnd() ) {
	    	key = null;
	    	value = null;
	        return false;
	    }
	    
	    if( key == null ) {
	        key = new BytesWritable();
	    }
	    if (value == null) {
	        value = TypesUtils.createTuple(Projection.getNumColumns(scanner.getProjection()));
	    }
	    scanner.getKey(key);
	    scanner.getValue(value);
	    scanner.advance();
	    count++;
	    return true;
	}
}