/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.zebra.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.types.Projection;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.data.Tuple;
/**
* Adaptor class to implement RecordReader on top of Scanner.
*/
public class TableRecordReader extends RecordReader<BytesWritable, Tuple> {
private final TableScanner scanner;
private long count = 0;
private BytesWritable key = null;
private Tuple value = null;
/**
*
* @param expr
* Table expression
* @param projection
* projection schema. Should never be null.
* @param split
* the split to work on
* @param jobContext
* JobContext object
* @throws IOException
*/
public TableRecordReader(TableExpr expr, String projection,
InputSplit split, JobContext jobContext) throws IOException, ParseException {
Configuration conf = jobContext.getConfiguration();
if( split instanceof RowTableSplit ) {
RowTableSplit rowSplit = (RowTableSplit)split;
scanner = expr.getScanner(rowSplit, projection, conf);
} else {
SortedTableSplit tblSplit = (SortedTableSplit)split;
scanner = expr.getScanner( tblSplit.getBegin(), tblSplit.getEnd(), projection, conf );
}
}
@Override
public void close() throws IOException {
scanner.close();
}
public long getPos() throws IOException {
return count;
}
@Override
public float getProgress() throws IOException {
return (float)((scanner.atEnd()) ? 1.0 : 0);
}
/**
* Seek to the position at the first row which has the key
* or just after the key; only applicable for sorted Zebra table
*
* @param key
* the key to seek on
*/
public boolean seekTo(BytesWritable key) throws IOException {
return scanner.seekTo(key);
}
/**
* Check if the end of the input has been reached
*
* @return true if the end of the input is reached
*/
public boolean atEnd() throws IOException {
return scanner.atEnd();
}
@Override
public BytesWritable getCurrentKey() throws IOException, InterruptedException {
return key;
}
@Override
public Tuple getCurrentValue() throws IOException, InterruptedException {
return value;
}
@Override
public void initialize(org.apache.hadoop.mapreduce.InputSplit arg0,
TaskAttemptContext arg1) throws IOException, InterruptedException {
// no-op
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if( scanner.atEnd() ) {
key = null;
value = null;
return false;
}
if( key == null ) {
key = new BytesWritable();
}
if (value == null) {
value = TypesUtils.createTuple(Projection.getNumColumns(scanner.getProjection()));
}
scanner.getKey(key);
scanner.getValue(value);
scanner.advance();
count++;
return true;
}
}