package net.hbase.secondaryindex.mapred;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.hbase.KeyValue;
//import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import net.hbase.secondaryindex.util.Const;
import net.hbase.secondaryindex.util.JsonUtil;
/**
* Just build index for json column.
*
* @author mayanhui
*
*/
public class IndexJsonMapper extends TableMapper<Writable, Writable> {
private boolean isBuildSingleIndex;
private String column;
private String jsonFields;
private JsonUtil jsonUtil = new JsonUtil();
private Text k = new Text();
private Text v = new Text();
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
column = context.getConfiguration().get(Const.HBASE_CONF_COLUMN_NAME);
jsonFields = context.getConfiguration().get(Const.HBASE_CONF_JSON_NAME);
isBuildSingleIndex = context.getConfiguration().getBoolean(
Const.HBASE_CONF_ISBUILDSINGLEINDEX_NAME, true);
}
@SuppressWarnings("rawtypes")
@Override
public void map(ImmutableBytesWritable row, Result columns, Context context)
throws IOException {
String json = null;
String rowkey = new String(row.get());
String cf = Const.COLUMN_FAMILY_CF1_STRING;
String qualifier = Const.COLUMN_RK_STRING;
String[] arr = jsonFields.split(",", -1);
try {
for (KeyValue kv : columns.list()) {
json = Bytes.toString(kv.getValue()); // json column value
long ts = kv.getTimestamp();
/* build single column index */
for (String jf : arr) {
Set<String> jfValueSet = new HashSet<String>();
// json array
if (json.startsWith(Const.JSON_ARRAY_START)) {
jfValueSet = jsonUtil.evaluateDistinctArray(json, jf);
} else {
// single json object
String jfValue = jsonUtil.evaluate(json, "$." + jf)
.toString();
jfValueSet.add(jfValue);
}
for (String jfValue : jfValueSet) {
if (null != jfValue && jfValue.trim().length() > 0) {
k.set(column + Const.ROWKEY_DEFAULT_SEPARATOR + jf
+ Const.ROWKEY_DEFAULT_SEPARATOR + jfValue
+ Const.FIELD_COMMON_SEPARATOR + ts);
v.set(cf + Const.FIELD_COMMON_SEPARATOR + qualifier
+ Const.FIELD_COMMON_SEPARATOR + rowkey);
context.write(k, v);
}
}
}
/* build combined index */
if (!isBuildSingleIndex) {
List<String> jsonArr = jsonUtil.evaluateArray(json,
jsonFields);
for (String ja : jsonArr) {
String[] jarr = ja.split(",", -1);
Vector<String> source = new Vector<String>();
for (int i = 0; i < jarr.length; i++) {
source.add(arr[i] + Const.ROWKEY_DEFAULT_SEPARATOR
+ jarr[i]);
}
Vector<Vector> comb = Combination
.getLowerLimitCombinations(source, 2);
if (null != comb && comb.size() > 0) {
for (Vector vect : comb) {
String indexRowkey = column
+ Const.ROWKEY_DEFAULT_SEPARATOR
+ vect.toString().replaceAll(", ", "_")
.replaceAll("\\[", "")
.replaceAll("\\]", "");
k.set(indexRowkey
+ Const.FIELD_COMMON_SEPARATOR + ts);
v.set(cf + Const.FIELD_COMMON_SEPARATOR
+ qualifier
+ Const.FIELD_COMMON_SEPARATOR + rowkey);
context.write(k, v);
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
System.err.println("Error: " + e.getMessage() + ", Row: "
+ Bytes.toString(row.get()) + ", Value: " + json);
}
}
}