/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.pig.backend.hadoop.hbase;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.pig.ExecType;
import org.apache.pig.LoadFunc;
import org.apache.pig.Slice;
import org.apache.pig.Slicer;
import org.apache.pig.backend.datastorage.DataStorage;
import org.apache.pig.builtin.Utf8StorageConverter;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/**
* A <code>Slicer</code> that split the hbase table into {@link HBaseSlice}s.
* And a load function will provided to do none load operations, the actually
* load operatrions will be done in {@link HBaseSlice}.
*/
public class HBaseStorage extends Utf8StorageConverter implements Slicer,
LoadFunc {
private byte[][] m_cols;
private HTable m_table;
private HBaseConfiguration m_conf;
private static final Log LOG = LogFactory.getLog(HBaseStorage.class);
// HBase Slicer
// Creates a slice per region of a specified table.
/**
* Constructor. Construct a HBase Table loader to load the cells of the
* provided columns.
*
* @param columnList
* columnlist that is a presented string delimited by space.
*/
public HBaseStorage(String columnList) {
String[] colNames = columnList.split(" ");
m_cols = new byte[colNames.length][];
for (int i = 0; i < m_cols.length; i++) {
m_cols[i] = Bytes.toBytes(colNames[i]);
}
m_conf = new HBaseConfiguration();
}
@Override
public Slice[] slice(DataStorage store, String tablename)
throws IOException {
validate(store, tablename);
byte[][] startKeys = m_table.getStartKeys();
if (startKeys == null || startKeys.length == 0) {
throw new IOException("Expecting at least one region");
}
if (m_cols == null || m_cols.length == 0) {
throw new IOException("Expecting at least one column");
}
// one region one slice
Slice[] slices = new Slice[startKeys.length];
for (int i = 0; i < startKeys.length; i++) {
String regionLocation = m_table.getRegionLocation(startKeys[i])
.getServerAddress().getHostname();
slices[i] = new HBaseSlice(m_table.getTableName(), startKeys[i],
((i + 1) < startKeys.length) ? startKeys[i + 1]
: HConstants.EMPTY_START_ROW, m_cols,
regionLocation);
LOG.info("slice: " + i + "->" + slices[i]);
}
return slices;
}
@Override
public void validate(DataStorage store, String tablename)
throws IOException {
ensureTable(tablename);
}
private void ensureTable(String tablename) throws IOException {
LOG.info("tablename: "+tablename);
// We're looking for the right scheme here (actually, we don't
// care what the scheme is as long as it is one and it's
// different from hdfs and file. If the user specified to use
// the multiquery feature and did not specify a scheme we will
// have transformed it to an absolute path. In that case we'll
// take the last component and guess that's what was
// meant. We'll print a warning in that case.
int index;
if(-1 != (index = tablename.indexOf("://"))) {
if (tablename.startsWith("hdfs:")
|| tablename.startsWith("file:")) {
index = tablename.lastIndexOf("/");
if (-1 == index) {
index = tablename.lastIndexOf("\\");
}
if (-1 == index) {
throw new IOException("Got tablename: "+tablename
+". Either turn off multiquery (-no_multiquery)"
+" or specify load path as \"hbase://<tablename>\".");
} else {
String in = tablename;
tablename = tablename.substring(index+1);
LOG.warn("Got tablename: "+in+" Assuming you meant table: "
+tablename+". Either turn off multiquery (-no_multiquery) "
+"or specify load path as \"hbase://<tablename>\" "
+"to avoid this warning.");
}
} else {
tablename = tablename.substring(index+3);
}
}
if (m_table == null) {
m_table = new HTable(m_conf, tablename);
}
}
// HBase LoadFunc
// It is just a mock class to let the UDF be casted to a LOADFUNC during
// parsing.
@Override
public void bindTo(String fileName, BufferedPositionedInputStream is,
long offset, long end) throws IOException {
// do nothing
}
@Override
public Schema determineSchema(String fileName, ExecType execType,
DataStorage storage) throws IOException {
// do nothing
return null;
}
@Override
public void fieldsToRead(Schema schema) {
// do nothing
}
@Override
public Tuple getNext() throws IOException {
// do nothing
return null;
}
}