package net.hbase.secondaryindex.mapred;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.hbase.KeyValue;
//import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import net.hbase.secondaryindex.util.Const;
public class IndexMapper extends TableMapper<Writable, Writable> {
private byte[] columnFamily;
private byte[] columnQualifier;
private boolean isBuildSingleIndex;
private String column;
private Map<String, Set<String>> colNameValSetrMap;
private long ts = System.currentTimeMillis();
private Text k = new Text();
private Text v = new Text();
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
column = context.getConfiguration().get(Const.HBASE_CONF_COLUMN_NAME);
isBuildSingleIndex = context.getConfiguration().getBoolean(
Const.HBASE_CONF_ISBUILDSINGLEINDEX_NAME, true);
}
@SuppressWarnings("rawtypes")
@Override
public void map(ImmutableBytesWritable row, Result columns, Context context)
throws IOException {
String value = null;
String rowkey = new String(row.get());
String cf = Const.COLUMN_FAMILY_CF1_STRING;
String qualifier = Const.COLUMN_RK_STRING;
String[] arr = null;
if (!isBuildSingleIndex) {
// initial column name and values map
arr = column.split(",", -1);
colNameValSetrMap = new HashMap<String, Set<String>>(arr.length);
for (int i = 0; i < arr.length; i++) {
colNameValSetrMap.put(arr[i], new HashSet<String>());
}
}
try {
for (KeyValue kv : columns.list()) {
value = Bytes.toStringBinary(kv.getValue());
ts = kv.getTimestamp();
columnFamily = kv.getFamily();
columnQualifier = kv.getQualifier();
String columnName = Bytes.toString(columnFamily)
+ Const.FAMILY_COLUMN_SEPARATOR
+ Bytes.toString(columnQualifier);
if (null != value && value.length() > 0) {
k.set(columnName + Const.ROWKEY_DEFAULT_SEPARATOR + value
+ Const.FIELD_COMMON_SEPARATOR + ts);
v.set(cf + Const.FIELD_COMMON_SEPARATOR + qualifier
+ Const.FIELD_COMMON_SEPARATOR + rowkey);
context.write(k, v);
if (!isBuildSingleIndex) {
Set<String> colValSet = colNameValSetrMap
.get(columnName);
colValSet.add(value);
colNameValSetrMap.put(columnName, colValSet);
}
}
}
/* build combined index */
if (!isBuildSingleIndex) {
// remove empty columns
Map<String, Set<String>> cleanedMap = removeEmptyEntry(colNameValSetrMap); // valid
if (cleanedMap.size() > 1 && cleanedMap.size() < 4
&& cleanedMap.size() <= arr.length) {
// The existing columns of this rowkey is 3 and the input
// 'column' is 3 too.
if (cleanedMap.size() == 3) {
Set<String> cn0 = cleanedMap.get(arr[0]);
Set<String> cn1 = cleanedMap.get(arr[1]);
Set<String> cn2 = cleanedMap.get(arr[2]);
for (String v0 : cn0) {
for (String v1 : cn1) {
for (String v2 : cn2) {
Vector<String> source = new Vector<String>();
source.add(arr[0]
+ Const.ROWKEY_DEFAULT_SEPARATOR
+ v0);
source.add(arr[1]
+ Const.ROWKEY_DEFAULT_SEPARATOR
+ v1);
source.add(arr[2]
+ Const.ROWKEY_DEFAULT_SEPARATOR
+ v2);
Vector<Vector> comb = Combination
.getLowerLimitCombinations(source,
2);
if (null != comb && comb.size() > 0) {
for (Vector vect : comb) {
String indexRowkey = vect
.toString()
.replaceAll(", ", "_")
.replaceAll("\\[", "")
.replaceAll("\\]", "");
k.set(indexRowkey
+ Const.FIELD_COMMON_SEPARATOR
+ ts);
v.set(cf
+ Const.FIELD_COMMON_SEPARATOR
+ qualifier
+ Const.FIELD_COMMON_SEPARATOR
+ rowkey);
context.write(k, v);
}
}
}
}
}
// The input 'column' is 2 or 3, and the existing
// columns of this rowkey is 2.
} else if (cleanedMap.size() == 2) {
Set<String> cn0 = null;
Set<String> cn1 = null;
// arr convert to list, do not use Arrays.asList(), it
// could not use remove().
List<String> arrList = new ArrayList<String>();
for (String s : arr) {
arrList.add(s);
}
if (arr.length == 3) {
String key = getKeyWithEmptyValue(colNameValSetrMap);
arrList.remove(key);
cn0 = cleanedMap.get(arrList.get(0));
cn1 = cleanedMap.get(arrList.get(1));
} else if (arr.length == 2) {
cn0 = cleanedMap.get(arr[0]);
cn1 = cleanedMap.get(arr[1]);
}
for (String v0 : cn0) {
for (String v1 : cn1) {
String indexRowkey = arrList.get(0)
+ Const.ROWKEY_DEFAULT_SEPARATOR + v0
+ Const.ROWKEY_DEFAULT_SEPARATOR
+ arrList.get(1)
+ Const.ROWKEY_DEFAULT_SEPARATOR + v1;
k.set(indexRowkey
+ Const.FIELD_COMMON_SEPARATOR + ts);
v.set(cf + Const.FIELD_COMMON_SEPARATOR
+ qualifier
+ Const.FIELD_COMMON_SEPARATOR + rowkey);
context.write(k, v);
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("Error: " + e.getMessage() + ", Row: "
+ Bytes.toString(row.get()) + ", Value: " + value);
}
}
private Map<String, Set<String>> removeEmptyEntry(
Map<String, Set<String>> map) {
Map<String, Set<String>> rst = new HashMap<String, Set<String>>();
if (null != map && map.size() > 0) {
Set<Map.Entry<String, Set<String>>> set = map.entrySet();
for (Iterator<Map.Entry<String, Set<String>>> it = set.iterator(); it
.hasNext();) {
Map.Entry<String, Set<String>> entry = (Map.Entry<String, Set<String>>) it
.next();
Set<String> value = entry.getValue();
if (value.size() > 0)
rst.put(entry.getKey(), value);
}
}
return rst;
}
private String getKeyWithEmptyValue(Map<String, Set<String>> map) {
String rst = null;
if (null != map && map.size() > 0) {
Set<Map.Entry<String, Set<String>>> set = map.entrySet();
for (Iterator<Map.Entry<String, Set<String>>> it = set.iterator(); it
.hasNext();) {
Map.Entry<String, Set<String>> entry = (Map.Entry<String, Set<String>>) it
.next();
Set<String> value = entry.getValue();
if (value.size() == 0)
rst = entry.getKey();
}
}
return rst;
}
}