package meetup.beeno;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import meetup.beeno.mapping.IndexMapping;
import meetup.beeno.util.HUtil;
import meetup.beeno.util.PBUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
/**
* Generates updates for secondary index tables based on an update for
* the indexed table. This implementation generates index keys based
* on a primary property value (from which the index is mapped) and an
* optional date column, which can be appended to the key as is or as
* an inverted value for reverse chronological sorting.
*
* Extra columns mapped in the index will be stored in index table
* row, along with the primary property value and the date column (if
* present), and can be used for filtering in scanning the index.
*
* TODO: Currently this implementation does not handle removing
* secondary index rows when a value is removed!!!
*
* @author garyh
*
*/
public class EntityIndexer {
static final byte[] INDEX_FAMILY = Bytes.toBytes("__idx__");
static final byte[] INDEX_KEY_COLUMN = Bytes.toBytes("row");
private static final byte[] ROW_KEY_SEP = Bytes.toBytes("-");
private static Logger log = Logger.getLogger(EntityIndexer.class);
private String indexTable;
private HUtil.HCol primaryField;
private HUtil.HCol dateField;
private boolean invertDate = false;
private List<HUtil.HCol> extraFields;
private IndexKeyFactory keyFactory = new DefaultKeyFactory();
public EntityIndexer(IndexMapping mapping) {
this.indexTable = mapping.getTableName();
this.primaryField = new HUtil.HCol(mapping.getPrimaryField().getFamily(),
mapping.getPrimaryField().getColumn());
this.dateField = mapping.getDateField();
this.invertDate = mapping.isDateInverted();
this.extraFields = mapping.getExtraFields();
if (mapping.getKeyFactory() != null) {
try {
this.keyFactory = mapping.getKeyFactory().newInstance();
}
catch (Exception e) {
throw new IllegalArgumentException("Unable to instantiate key factory class", e);
}
}
}
public String getIndexTable() { return this.indexTable; }
/**
* Returns a set of updates for this index table, based on the
* update to the underlying table.
*
* @param entityUpdate
* @return
*/
public List<Put> getIndexUpdates(Put entityUpdate) {
List<Put> up = new ArrayList<Put>(1);
/* TODO: handle many index records to one base record.
* For example, if the primary property value contains
* a collection type, a new index record could be generated
* for each value in the collection.
*/
Put valUpdate = getUpdateForValue(entityUpdate);
if (valUpdate != null)
up.add(valUpdate);
return up;
}
protected Put getUpdateForValue(Put entityUpdate) {
Put put = null;
// generate the index row key
Map<byte[],List<KeyValue>> familyMap = entityUpdate.getFamilyMap();
// store all the indexed values
byte[] primaryVal = getValue(this.primaryField.family(), this.primaryField.column(), familyMap);
if (primaryVal != null && primaryVal.length > 0) {
Long date = getDateValue(familyMap);
put = new Put( createIndexKey(primaryVal, date, entityUpdate.getRow()) );
// sync with base timestamp
put.setTimeStamp( entityUpdate.getTimeStamp() );
// store all specified values (when present)
put.add(this.primaryField.family(), this.primaryField.column(), primaryVal);
if (this.dateField != null && date != null)
put.add(this.dateField.family(), this.dateField.column(), PBUtil.toBytes(date));
// add any extra fields
for (HUtil.HCol col : this.extraFields) {
byte[] val = getValue(col.family(), col.column(), familyMap);
if (val != null)
put.add(col.family(), col.column(), val);
}
// store the orig record key
put.add(INDEX_FAMILY, INDEX_KEY_COLUMN, entityUpdate.getRow());
}
else {
// no update for primary value, skip
log.debug("No primary value for index "+getIndexTable());
}
return put;
}
protected Long getDateValue(Map<byte[],List<KeyValue>> familyMap) {
Long dateVal = null;
if (this.dateField != null) {
byte[] rawDate = getValue(this.dateField.family(),
this.dateField.column(), familyMap);
HDataTypes.HField pbDate = PBUtil.readMessage(rawDate);
// dates are assumed to be Long!
if (pbDate != null && pbDate.getType() == HDataTypes.HField.Type.INTEGER)
dateVal = pbDate.getInteger();
}
return dateVal;
}
protected byte[] getValue(byte[] family, byte[] col,
Map<byte[],List<KeyValue>> familyMap) {
byte[] val = null;
List<KeyValue> familyVals = familyMap.get(family);
if (familyVals != null) {
for (KeyValue kv : familyVals) {
if (kv.matchingColumn(family, col)) {
val = kv.getValue();
break;
}
}
}
return val;
}
/**
* TODO: split this out into a separate interface for different
* implementations
*/
public byte[] createIndexKey(byte[] primaryVal, Long date, byte[] origRow) {
if (this.dateField != null && date != null) {
return this.keyFactory.createKey(primaryVal, origRow, date, this.invertDate);
}
else {
return this.keyFactory.createKey(primaryVal, origRow, null, this.invertDate);
}
}
public static class DefaultKeyFactory implements IndexKeyFactory {
@Override
public byte[] createKey( byte[] primaryVal, byte[] rowKey, Long date, boolean invertDate ) {
byte[] key = new byte[0];
HDataTypes.HField pbVal = PBUtil.readMessage(primaryVal);
// order numeric types
if (pbVal != null && pbVal.getType() == HDataTypes.HField.Type.INTEGER) {
key = Bytes.add(key, HUtil.toOrderedBytes(pbVal.getInteger()));
}
else {
// just use raw bytes
key = Bytes.add(key, primaryVal);
}
// add on date, if specified
if (date != null) {
key = Bytes.add(key,
Bytes.add(ROW_KEY_SEP, HUtil.toOrderedBytes(date, invertDate)) );
}
// add on the original row key to ensure uniqueness
if (rowKey != null && rowKey.length > 0) {
key = Bytes.add(key,
Bytes.add(ROW_KEY_SEP, rowKey));
}
return key;
}
}
/**
* Generates the same index keys as DefaultKeyFactory, but prefixed with the primary value mod 100 for
* better row key distribution.
*
* This is designed specifically to avoid hot regions arising from frequently used indexes based off of
* a sequentially incremented primary value.
* @author garyh
*
*/
public static class ModKeyFactory extends DefaultKeyFactory {
private static int base = 100;
public byte[] createKey( byte[] primaryVal, byte[] rowKey, Long date, boolean invertDate) {
byte[] key = new byte[0];
HDataTypes.HField pbVal = PBUtil.readMessage(primaryVal);
// order numeric types
if (pbVal != null && pbVal.getType() == HDataTypes.HField.Type.INTEGER) {
long val = pbVal.getInteger();
key = Bytes.add(Bytes.toBytes(Long.toString( val % base )), ROW_KEY_SEP);
}
key = Bytes.add(key, super.createKey(primaryVal, rowKey, date, invertDate));
return key;
}
}
}