package org.solbase;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.net.MalformedURLException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.ResourceBundle;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.SolbaseHTablePool;
import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.lucene.index.Term;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
/**
* @author koh
*
* needs to create these tables first
*
* create 'SI', 'info', {NAME=>'info',REPLICATION_SCOPE=>1,VERSION=>1}
* create 'Docs', 'field', 'allTerms', 'timestamp', {COMPRESSION=>'SNAPPY',NAME=>'field',VERSION=>1,REPLICATION_SCOPE=>1},{COMPRESSION=>'SNAPPY',NAME=>'allTerms',VERSION=>1,REPLICATION_SCOPE=>1},{COMPRESSION=>'SNAPPY',NAME=>'timestamp',VERSION=>1, REPLICATION_SCOPE=>1}
* d => document, t=>term, f=>field
* create 'TV', 'd', {COMPRESSION=>'SNAPPY',NAME=>'d',VERSION=>1, REPLICATION_SCOPE=>1}
* create 'DocKeyIdMap', 'docId',{COMPRESSION=>'SNAPPY',NAME=>'docId',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'Sequence', 'id',{COMPRESSION=>'SNAPPY',NAME=>'id',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'TVVersionId', 'timestamp', {COMPRESSION=>'SNAPPY',NAME=>'timestamp',VERSION=>1,REPLICATION_SCOPE=>1}
* create 'uniq_checksum_user_media', 'userMediaKey', {COMPRESSION=>'SNAPPY',NAME=>'userMediaKey',VERSION=>1,REPLICATION_SCOPE=>1}
*
* loading solr schema file to solbase
* curl http://localhost:8080/solbase/schema/pbimages --data-binary @image_schema.xml -H 'Content-type:text/xml; charset=utf-8'
*/
public final class SolbaseUtil {
public static final byte[] delimiter = {Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE, Byte.MAX_VALUE };
//public static final byte[] delimiter = {-17, -65, -65};
// used to save bytes on current time minutes (2005/1/1)
public static final long SolbaseEpochTime = 18408960l;
public static final byte[] floorBytes = {0, 0, 0, 0 };
public static final byte[] termVectorTable;
public static final byte[] docTable;
public static final byte[] schemaInfoTable;
public static final byte[] docKeyIdMapTable;
public static final byte[] sequenceTable;
public static final byte[] termVectorVersionIDTable;
public static final byte[] uniqChecksumUserMediaTable;
public static final byte[] userMediaTable;
static {
String dbPostfix = System.getProperty("solbase.db.postfix");
if(dbPostfix == null && ResourceBundle.getBundle("solbase") != null){
dbPostfix = ResourceBundle.getBundle("solbase").getString("db.postfix");
}
dbPostfix = (dbPostfix == null || dbPostfix.isEmpty()) ? "" : "_" + dbPostfix;
termVectorTable = Bytes.toBytes("TV" + dbPostfix);
docTable = Bytes.toBytes("Docs" + dbPostfix);
schemaInfoTable = Bytes.toBytes("SI");
docKeyIdMapTable = Bytes.toBytes("DocKeyIdMap" + dbPostfix);
sequenceTable = Bytes.toBytes("Sequence" + dbPostfix);
termVectorVersionIDTable = Bytes.toBytes("TVVersionId" + dbPostfix);
uniqChecksumUserMediaTable = Bytes.toBytes("uniq_checksum_user_media" + dbPostfix);
userMediaTable = Bytes.toBytes("user_media");
}
public static final int UNIQ_ID_CHUNK = 10000;
public static final byte[] timestampColumnFamilyName = Bytes.toBytes("timestamp");
public static final byte[] termVectorDocColumnFamilyName = Bytes.toBytes("d");
public static final byte[] docIdColumnFamilyName = Bytes.toBytes("docId");
public static final byte[] idColumnFamilyName = Bytes.toBytes("id");
public static final byte[] allTermsColumnFamilyName = Bytes.toBytes("allTerms");
public static final byte[] fieldColumnFamilyName = Bytes.toBytes("field");
public static final byte[] userMediaKeyColumnFamilyName = Bytes.toBytes("userMediaKey");
public static final byte[] tombstonedColumnFamilyQualifierBytes = Bytes.toBytes("tombstoned");
public static final byte[] emptyColumnFamilyQualifierBytes = Bytes.toBytes("");
public static final byte[] schemaInfoColumnFamilyName = Bytes.toBytes("info");
public static int cacheInvalidationInterval = 1000;//ms
private static int SOLBASE_HTABLE_POOL = 100; // per table. 3 (docs, tv, tvversionid) * 10 (num of region servers) * 100 = 3000 threads ~ 3.0G at most
private static SolbaseHTablePool hTablePool;
private static Configuration conf;
static {
conf = HBaseConfiguration.create();
hTablePool = new SolbaseHTablePool(conf, SOLBASE_HTABLE_POOL);
}
public static HTableInterface getTable(byte[] tableName) {
return hTablePool.getTable(tableName);
}
public static HTable getLocalTable(byte[] tableName){
HTable table = (HTable)hTablePool.getTable(tableName);
/*
try {
// setting buffer size to 12MB
table.setWriteBufferSize(1024*1024*12);
} catch (IOException e) {
e.printStackTrace();
}
*/
table.setAutoFlush(false);
return table;
}
public static HTableInterface getTermVectorTable() {
return getTable(termVectorTable);
}
public static HTable getLocalTermVectorTable(){
return getLocalTable(termVectorTable);
}
public static String getTermVectorTableName(){
return Bytes.toString(termVectorTable);
}
// TODO: uniqChecksumUserMediaTable is PB specific table
public static HTableInterface getUniqChecksumUserMediaTable() {
return getTable(uniqChecksumUserMediaTable);
}
public static HTable getLocalUniqChecksumUserMediaTable(){
return getLocalTable(uniqChecksumUserMediaTable);
}
public static String getUniqChecksumUserMediaTableName(){
return Bytes.toString(uniqChecksumUserMediaTable);
}
public static HTableInterface getTermVectorVersionIDTable() {
return getTable(termVectorVersionIDTable);
}
public static HTable getLocalTermVectorVersionIDTable(){
return getLocalTable(termVectorVersionIDTable);
}
public static String getTermVectorVersionIDTableName(){
return Bytes.toString(termVectorVersionIDTable);
}
public static HTableInterface getDocTable() {
return getTable(docTable);
}
public static HTable getLocalDocTable() {
return getLocalTable(docTable);
}
public static String getDocTableName(){
return Bytes.toString(SolbaseUtil.docTable);
}
public static HTableInterface getSchemaInfoTable() {
return getTable(schemaInfoTable);
}
public static HTableInterface getDocKeyIdMapTable() {
return getTable(docKeyIdMapTable);
}
public static HTable getLocalDocKeyIdMapTable() {
return getLocalTable(docKeyIdMapTable);
}
public static String getDocKeyIdMapTableName(){
return Bytes.toString(SolbaseUtil.docKeyIdMapTable);
}
public static HTableInterface getUserMediaTable() {
return getTable(userMediaTable);
}
public static HTable getLocalUserMediaTable() {
return getLocalTable(userMediaTable);
}
public static HTableInterface getSequenceTable() {
return getTable(sequenceTable);
}
public static String getSequenceTableName(){
return Bytes.toString(SolbaseUtil.sequenceTable);
}
public static void releaseTable(HTableInterface table) {
hTablePool.putTable(table);
}
public static byte[] generateTermKey(Term term) {
byte[] fieldBytes = Bytes.toBytes(term.field());
byte[] termBytes = Bytes.toBytes(term.text());
byte[] fieldTermKeyBytes = Bytes.add(fieldBytes, SolbaseUtil.delimiter,
termBytes);
return fieldTermKeyBytes;
}
public static byte[] generateTermBeginKey(Term term) {
return Bytes.add(SolbaseUtil.generateTermKey(term),
SolbaseUtil.delimiter, SolbaseUtil.floorBytes);
}
public static byte[] generateTermEndKey(Term term) {
return Bytes.add(SolbaseUtil.generateTermKey(term),
SolbaseUtil.delimiter, SolbaseUtil.delimiter);
}
public static byte[] generateTermKey(Term term, int startDocId) {
return Bytes.add(SolbaseUtil.generateTermKey(term),
SolbaseUtil.delimiter, Bytes.toBytes(startDocId));
}
public static byte[] getDocumentId(byte[] termDocKey) {
int maxByteCount = 0;
int delimiterCount = 0;
for (int i = 0; i < termDocKey.length; i++) {
if (termDocKey[i] == Byte.MAX_VALUE) {
maxByteCount++;
}
if (maxByteCount == 4) {
delimiterCount++;
maxByteCount = 0;
}
if (delimiterCount == 2) {
return Arrays.copyOfRange(termDocKey, i+1, termDocKey.length);
}
}
return null;
}
public static Integer getDocumentId(ByteBuffer termDocKey) {
int maxByteCount = 0;
int delimiterCount = 0;
while (termDocKey.remaining() > 0) {
byte currentValue = termDocKey.get();
if (currentValue == Byte.MAX_VALUE) {
maxByteCount++;
}
if (maxByteCount == 4) {
delimiterCount++;
maxByteCount = 0;
}
if (delimiterCount == 2) {
return termDocKey.getInt();
}
}
return null;
}
public static int findDocIdIndex(byte[] termDocKey) {
int maxByteCount = 0;
int delimiterCount = 0;
for(int i = 0; i < termDocKey.length; i++){
byte currentValue = termDocKey[i];
if (currentValue == Byte.MAX_VALUE) {
maxByteCount++;
}
if (maxByteCount == 4) {
delimiterCount++;
maxByteCount = 0;
}
if (delimiterCount == 2) {
return i + 1;
}
}
return -1;
}
public static int mreadVInt(ByteBuffer buf)
{
int length = buf.remaining();
if(length == 0)
return 0;
byte b = buf.get();
int i = b & 0x7F;
for (int pos = 1, shift = 7; (b & 0x80) != 0 && pos < length; shift += 7, pos++)
{
b = buf.get();
i |= (b & 0x7F) << shift;
}
return i;
}
public static int mreadVInt(InputStream buf) throws IOException
{
byte b = (byte)buf.read();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = (byte) buf.read();
i |= (b & 0x7F) << shift;
}
return i;
}
public static byte[] writeVInt(int i)
{
int length = 0;
int p = i;
while ((p & ~0x7F) != 0)
{
p >>>= 7;
length++;
}
length++;
byte[] buf = new byte[length];
int pos = 0;
while ((i & ~0x7F) != 0)
{
buf[pos] = ((byte) ((i & 0x7f) | 0x80));
i >>>= 7;
pos++;
}
buf[pos] = (byte) i;
return buf;
}
public static Object fromBytes(ByteBuffer data) throws IOException, ClassNotFoundException
{
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data.array(), data.position()+data.arrayOffset(), data
.remaining()));
Object o = ois.readObject();
ois.close();
return o;
}
public static ByteBuffer toBytes(Object o) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(baos);
oos.writeObject(o);
oos.close();
return ByteBuffer.wrap(baos.toByteArray());
}
// sequence doc id mapping to actual silo.picture_id
public static Integer getDocId(String key) throws IOException {
HTableInterface docIdKeyMap = SolbaseUtil.getDocKeyIdMapTable();
try {
Get get = new Get(Bytes.toBytes(key));
Result result = docIdKeyMap.get(get);
if(result.isEmpty()){
return null;
}
byte[] docId = result.getValue(Bytes.toBytes("docId"),Bytes.toBytes(""));
int doc = Bytes.toInt(docId);
return doc;
} finally {
SolbaseUtil.releaseTable(docIdKeyMap);
}
}
// sequence generator for generating doc id
public static int generateDocId(String key) throws IOException {
HTableInterface sequence = SolbaseUtil.getSequenceTable();
HTableInterface docIdKeyMap = SolbaseUtil.getDocKeyIdMapTable();
try {
int docId = new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), 1, true)).intValue();
Put mapping = new Put(Bytes.toBytes(key));
mapping.add(Bytes.toBytes("docId"), Bytes.toBytes(""), Bytes.toBytes(docId));
docIdKeyMap.put(mapping);
return docId;
} finally {
SolbaseUtil.releaseTable(sequence);
SolbaseUtil.releaseTable(docIdKeyMap);
}
}
// return uniq id from sequence table
// mainly used for chunking with pristine indexing
public static int generateUniqId() throws IOException {
HTableInterface sequence = SolbaseUtil.getSequenceTable();
try {
int docId = new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), SolbaseUtil.UNIQ_ID_CHUNK, true)).intValue();
return docId;
} finally {
SolbaseUtil.releaseTable(sequence);
}
}
public static int getSequenceId(){
HTableInterface sequence = SolbaseUtil.getSequenceTable();
Get get = new Get(Bytes.toBytes("sequence"));
try {
Result result = sequence.get(get);
if(result == null || result.isEmpty()){
int docId = new Long(sequence.incrementColumnValue(Bytes.toBytes("sequence"), Bytes.toBytes("id"), Bytes.toBytes(""), 1, true)).intValue();
return docId;
} else {
byte[] val = result.getValue(Bytes.toBytes("id"), Bytes.toBytes(""));
return new Long(Bytes.toLong(val)).intValue();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return -1;
}
public static int getCurrentMaxId(){
return getSequenceId();
}
public static int getEpochSinceSolbase(long currentMinutes){
return (int)(currentMinutes - SolbaseEpochTime);
}
public static long getCurrentTimeFromEpochSinceSolbase(String currentMinutes){
int currentMin = 0;
try {
currentMin = Integer.parseInt(currentMinutes);
return (SolbaseEpochTime + currentMin) * 60;
} catch (NumberFormatException e) {
// ignore
}
return currentMin;
}
public static void createTable(HTableDescriptor desc, byte[] startKey, byte[] endKey, Integer numberOfRegions) throws IOException{
HBaseAdmin admin = new HBaseAdmin(SolbaseUtil.conf);
if(startKey != null && endKey != null && numberOfRegions != null){
admin.createTable(desc, startKey, endKey, numberOfRegions);
} else {
admin.createTable(desc);
}
}
public static void setupHColumnDescriptor(HColumnDescriptor column){
column.setCompressionType(Algorithm.SNAPPY);
column.setScope(1);
column.setMaxVersions(1);
}
public static void createSITable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.schemaInfoTable);
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.schemaInfoColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
HBaseAdmin admin;
admin = new HBaseAdmin(SolbaseUtil.conf);
admin.createTable(desc);
}
public static void createTermVectorTable(byte[][] splits) throws IOException{
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.termVectorDocColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
HBaseAdmin admin;
admin = new HBaseAdmin(SolbaseUtil.conf);
admin.createTable(desc, splits);
}
public static void createTermVectorTable(byte[] startTerm, byte[] endTerm, Integer numberOfRegions) throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.termVectorDocColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
SolbaseUtil.createTable(desc, startTerm, endTerm, numberOfRegions);
}
public static void createTermVectorVersionIDTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getTermVectorVersionIDTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
SolbaseUtil.createTable(desc,null,null,null);
}
public static void createDocKeyIdMapTable(byte [] start, byte[] end, Integer numberOfRegions) throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocKeyIdMapTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.docIdColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
SolbaseUtil.createTable(desc, start, end, numberOfRegions);
}
public static void createDocTable(byte[] start, byte[] end, Integer numberOfRegions) throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocTableName());
HColumnDescriptor fieldColumn = new HColumnDescriptor(SolbaseUtil.fieldColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(fieldColumn);
desc.addFamily(fieldColumn);
HColumnDescriptor allTermsColumn = new HColumnDescriptor(SolbaseUtil.allTermsColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(allTermsColumn);
desc.addFamily(allTermsColumn);
HColumnDescriptor timestampColumn = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(timestampColumn);
desc.addFamily(timestampColumn);
SolbaseUtil.createTable(desc, start, end, numberOfRegions);
}
public static void createDocTable(byte[][] splits) throws IOException{
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getDocTableName());
HColumnDescriptor fieldColumn = new HColumnDescriptor(SolbaseUtil.fieldColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(fieldColumn);
desc.addFamily(fieldColumn);
HColumnDescriptor allTermsColumn = new HColumnDescriptor(SolbaseUtil.allTermsColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(allTermsColumn);
desc.addFamily(allTermsColumn);
HColumnDescriptor timestampColumn = new HColumnDescriptor(SolbaseUtil.timestampColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(timestampColumn);
desc.addFamily(timestampColumn);
HBaseAdmin admin;
admin = new HBaseAdmin(SolbaseUtil.conf);
admin.createTable(desc, splits);
}
public static void createSequenceTable() throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getSequenceTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.idColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
SolbaseUtil.createTable(desc, null, null, null);
}
public static void createUniqChecksumUserMediaTable(byte[] start, byte[] end, Integer numberOfRegions) throws IOException {
HTableDescriptor desc = new HTableDescriptor(SolbaseUtil.getUniqChecksumUserMediaTableName());
HColumnDescriptor column = new HColumnDescriptor(SolbaseUtil.userMediaKeyColumnFamilyName);
SolbaseUtil.setupHColumnDescriptor(column);
desc.addFamily(column);
SolbaseUtil.createTable(desc, start, end, numberOfRegions);
}
public static void truncateTable(HBaseAdmin admin, byte[] tableName){
try {
HTableDescriptor desc = admin.getTableDescriptor(tableName);
admin.disableTable(tableName);
admin.deleteTable(tableName);
admin.createTable(desc);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static byte[] randomize(Integer docId){
byte[] bytes = Bytes.toBytes(docId);
ArrayUtils.reverse(bytes);
return bytes;
}
public static byte[] randomize(byte[] docId){
ArrayUtils.reverse(docId);
return docId;
}
}