/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.pig.backend.hadoop.hbase;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Scanner;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.util.StringUtils;
import org.apache.pig.Slice;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
/**
* HBase Slice to load a portion of range of a table. The key range will be
* [start, end) Modeled from org.apache.hadoop.hbase.mapred.TableSplit.
*/
public class HBaseSlice implements Slice {
/** A Generated Serial Version UID **/
private static final long serialVersionUID = 9035916017187148965L;
private static final Log LOG = LogFactory.getLog(HBaseSlice.class);
// assigned during construction
/** Table Name **/
private byte[] m_tableName;
/** Table Start Row **/
private byte[] m_startRow;
/** Table End Row **/
private byte[] m_endRow;
/** Table Region Location **/
private String m_regionLocation;
/** Input Columns **/
private byte[][] m_inputColumns;
// created as part of init
/** The connection to the table in Hbase **/
private transient HTable m_table;
/** The scanner over the table **/
private transient Scanner m_scanner;
private transient ArrayList<Object> mProtoTuple;
/**
* Record the last processed row, so that we can restart the scanner when an
* exception happened during scanning a table
*/
private transient byte[] m_lastRow;
/**
* Constructor
*
* @param tableName
* table name
* @param startRow
* start now, inclusive
* @param endRow
* end row, exclusive
* @param inputColumns
* input columns
* @param location
* region location
*/
public HBaseSlice(byte[] tableName, byte[] startRow, byte[] endRow,
byte[][] inputColumns, final String location) {
this.m_tableName = tableName;
this.m_startRow = startRow;
this.m_endRow = endRow;
this.m_inputColumns = inputColumns;
this.m_regionLocation = location;
}
/** @return table name */
public byte[] getTableName() {
return this.m_tableName;
}
/** @return starting row key */
public byte[] getStartRow() {
return this.m_startRow;
}
/** @return end row key */
public byte[] getEndRow() {
return this.m_endRow;
}
/** @return input columns */
public byte[][] getInputColumns() {
return this.m_inputColumns;
}
/** @return the region's hostname */
public String getRegionLocation() {
return this.m_regionLocation;
}
@Override
public long getStart() {
// Not clear how to obtain this in a table...
return 0;
}
@Override
public long getLength() {
// Not clear how to obtain this in a table...
// it seems to be used only for sorting splits
return 0;
}
@Override
public String[] getLocations() {
return new String[] { m_regionLocation };
}
@Override
public long getPos() throws IOException {
// This should be the ordinal tuple in the range;
// not clear how to calculate...
return 0;
}
@Override
public float getProgress() throws IOException {
// Depends on the total number of tuples and getPos
return 0;
}
@Override
public void init(DataStorage store) throws IOException {
LOG.info("Init Hbase Slice " + this);
HBaseConfiguration conf = new HBaseConfiguration();
// connect to the given table
m_table = new HTable(conf, m_tableName);
// init the scanner
init_scanner();
}
/**
* Init the table scanner
*
* @throws IOException
*/
private void init_scanner() throws IOException {
restart(m_startRow);
m_lastRow = m_startRow;
}
/**
* Restart scanning from survivable exceptions by creating a new scanner.
*
* @param startRow
* the start row
* @throws IOException
*/
private void restart(byte[] startRow) throws IOException {
if ((m_endRow != null) && (m_endRow.length > 0)) {
this.m_scanner = this.m_table.getScanner(m_inputColumns, startRow,
m_endRow);
} else {
this.m_scanner = this.m_table.getScanner(m_inputColumns, startRow);
}
}
@Override
public boolean next(Tuple value) throws IOException {
RowResult result;
try {
result = this.m_scanner.next();
} catch (UnknownScannerException e) {
LOG.debug("recovered from " + StringUtils.stringifyException(e));
restart(m_lastRow);
if (m_lastRow != m_startRow) {
this.m_scanner.next(); // skip presumed already mapped row
}
result = this.m_scanner.next();
}
boolean hasMore = result != null && result.size() > 0;
if (hasMore) {
m_lastRow = result.getRow();
convertResultToTuple(result, value);
}
return hasMore;
}
/**
* Converte a row result to a tuple
*
* @param result
* row result
* @param tuple
* tuple
*/
private void convertResultToTuple(RowResult result, Tuple tuple) {
if (mProtoTuple == null)
mProtoTuple = new ArrayList<Object>();
Cell cell = null;
byte[] value = null;
for (byte[] column : m_inputColumns) {
cell = result.get(column);
if (cell == null || (value = cell.getValue()) == null) {
mProtoTuple.add(null);
} else {
mProtoTuple.add(new DataByteArray(value));
}
}
Tuple newT = TupleFactory.getInstance().newTuple(mProtoTuple);
mProtoTuple.clear();
tuple.reference(newT);
}
@Override
public void close() throws IOException {
if (m_scanner != null) {
m_scanner.close();
m_scanner = null;
}
}
@Override
public String toString() {
return m_regionLocation + ":" + Bytes.toString(m_startRow) + ","
+ Bytes.toString(m_endRow);
}
}