package net.hbase.secondaryindex.mapred; import java.io.IOException; import java.util.*; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import net.hbase.secondaryindex.util.Const; import net.hbase.secondaryindex.util.JsonUtil; /** * Just build index for json column. * * @author mayanhui * */ @Deprecated public class DeprecatedIndexJsonMapper extends TableMapper<Writable, Writable> { private boolean isBuildSingleIndex; private String column; private String jsonFields; private Map<String, Set<String>> colNameValSetrMap; private JsonUtil jsonUtil = new JsonUtil(); private long ts = System.currentTimeMillis(); private Text k = new Text(); private Text v = new Text(); @Override protected void setup(Context context) throws IOException, InterruptedException { column = context.getConfiguration().get(Const.HBASE_CONF_COLUMN_NAME); jsonFields = context.getConfiguration().get(Const.HBASE_CONF_JSON_NAME); isBuildSingleIndex = context.getConfiguration().getBoolean( Const.HBASE_CONF_ISBUILDSINGLEINDEX_NAME, true); } @SuppressWarnings("rawtypes") @Override public void map(ImmutableBytesWritable row, Result columns, Context context) throws IOException { String json = null; // byte[] rowkey = row.get(); // byte[] cf = Const.COLUMN_FAMILY_CF1_BYTE; // byte[] qualifier = Const.COLUMN_RK_BYTE; String rowkey = new String(row.get()); String cf = Const.COLUMN_FAMILY_CF1_STRING; String qualifier = Const.COLUMN_RK_STRING; String[] arr = jsonFields.split(",", -1); if (!isBuildSingleIndex) { // initial column name and values map colNameValSetrMap = new HashMap<String, Set<String>>(arr.length); for (int i = 0; i < arr.length; i++) { colNameValSetrMap.put(arr[i], new HashSet<String>()); } } try { for (KeyValue kv : columns.list()) { json = Bytes.toStringBinary(kv.getValue()); // json column value ts = kv.getTimestamp(); /* build single column index */ for (String jf : arr) { List<String> jfValueList = new ArrayList<String>(); // json array if (json.startsWith(Const.JSON_ARRAY_START)) { jfValueList = jsonUtil.evaluateArray(json, jf); } else {// single json object String jfValue = jsonUtil.evaluate(json, "$." + jf) .toString(); jfValueList.add(jfValue); } for (String jfValue : jfValueList) { if (null != jfValue && jfValue.trim().length() > 0) { // Put put = new Put( // Bytes.toBytes(column // + Const.ROWKEY_DEFAULT_SEPARATOR // + jf // + Const.ROWKEY_DEFAULT_SEPARATOR // + jfValue), ts); // put.add(cf, qualifier, rowkey); // context.write(row, put); k.set(column + Const.ROWKEY_DEFAULT_SEPARATOR + jf + Const.ROWKEY_DEFAULT_SEPARATOR + jfValue + Const.FIELD_COMMON_SEPARATOR + ts); v.set(cf + Const.FIELD_COMMON_SEPARATOR + qualifier + Const.FIELD_COMMON_SEPARATOR + rowkey); context.write(k, v); if (!isBuildSingleIndex) { Set<String> colValSet = colNameValSetrMap .get(jf); colValSet.add(jfValue); colNameValSetrMap.put(jf, colValSet); } } } } } /* build combined index */ if (!isBuildSingleIndex) { // remove empty columns Map<String, Set<String>> cleanedMap = removeEmptyEntry(colNameValSetrMap); // valid if (cleanedMap.size() > 1 && cleanedMap.size() < 4 && cleanedMap.size() <= arr.length) { // The existing columns of this rowkey is 3 and the input // 'column' is 3 too. if (cleanedMap.size() == 3) { Set<String> cn0 = cleanedMap.get(arr[0]); Set<String> cn1 = cleanedMap.get(arr[1]); Set<String> cn2 = cleanedMap.get(arr[2]); for (String v0 : cn0) { for (String v1 : cn1) { for (String v2 : cn2) { Vector<String> source = new Vector<String>(); source.add(arr[0] + Const.ROWKEY_DEFAULT_SEPARATOR + v0); source.add(arr[1] + Const.ROWKEY_DEFAULT_SEPARATOR + v1); source.add(arr[2] + Const.ROWKEY_DEFAULT_SEPARATOR + v2); Vector<Vector> comb = Combination .getLowerLimitCombinations(source, 2); if (null != comb && comb.size() > 0) { for (Vector vect : comb) { String indexRowkey = column + Const.ROWKEY_DEFAULT_SEPARATOR + vect.toString() .replaceAll(", ", "_") .replaceAll("\\[", "") .replaceAll("\\]", ""); // Put put = new Put( // Bytes.toBytes(indexRowkey)); // put.add(cf, qualifier, rowkey); // context.write(row, put); k.set(indexRowkey + Const.FIELD_COMMON_SEPARATOR + ts); v.set(cf + Const.FIELD_COMMON_SEPARATOR + qualifier + Const.FIELD_COMMON_SEPARATOR + rowkey); context.write(k, v); } } } } } // The input 'column' is 2 or 3, and the existing // columns of this rowkey is 2. } else if (cleanedMap.size() == 2) { Set<String> cn0 = null; Set<String> cn1 = null; // arr convert to list, do not use Arrays.asList(), it // could not use remove(). List<String> arrList = new ArrayList<String>(); for (String s : arr) { arrList.add(s); } if (arr.length == 3) { String key = getKeyWithEmptyValue(colNameValSetrMap); arrList.remove(key); cn0 = cleanedMap.get(arrList.get(0)); cn1 = cleanedMap.get(arrList.get(1)); } else if (arr.length == 2) { cn0 = cleanedMap.get(arr[0]); cn1 = cleanedMap.get(arr[1]); } for (String v0 : cn0) { for (String v1 : cn1) { String indexRowkey = column + Const.ROWKEY_DEFAULT_SEPARATOR + arrList.get(0) + Const.ROWKEY_DEFAULT_SEPARATOR + v0 + Const.ROWKEY_DEFAULT_SEPARATOR + arrList.get(1) + Const.ROWKEY_DEFAULT_SEPARATOR + v1; // Put put = new // Put(Bytes.toBytes(indexRowkey)); // put.add(cf, qualifier, rowkey); // context.write(row, put); k.set(indexRowkey + Const.FIELD_COMMON_SEPARATOR + ts); v.set(cf + Const.FIELD_COMMON_SEPARATOR + qualifier + Const.FIELD_COMMON_SEPARATOR + rowkey); context.write(k, v); } } } } } } catch (Exception e) { e.printStackTrace(); System.err.println("Error: " + e.getMessage() + ", Row: " + Bytes.toString(row.get()) + ", Value: " + json); } } private Map<String, Set<String>> removeEmptyEntry( Map<String, Set<String>> map) { Map<String, Set<String>> rst = new HashMap<String, Set<String>>(); if (null != map && map.size() > 0) { Set<Map.Entry<String, Set<String>>> set = map.entrySet(); for (Iterator<Map.Entry<String, Set<String>>> it = set.iterator(); it .hasNext();) { Map.Entry<String, Set<String>> entry = (Map.Entry<String, Set<String>>) it .next(); Set<String> value = entry.getValue(); if (value.size() > 0) rst.put(entry.getKey(), value); } } return rst; } private String getKeyWithEmptyValue(Map<String, Set<String>> map) { String rst = null; if (null != map && map.size() > 0) { Set<Map.Entry<String, Set<String>>> set = map.entrySet(); for (Iterator<Map.Entry<String, Set<String>>> it = set.iterator(); it .hasNext();) { Map.Entry<String, Set<String>> entry = (Map.Entry<String, Set<String>>) it .next(); Set<String> value = entry.getValue(); if (value.size() == 0) rst = entry.getKey(); } } return rst; } }