/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.bizosys.hsearch.common.IStorable;
import com.bizosys.hsearch.common.RecordScalar;
import com.bizosys.hsearch.common.Storable;
import com.bizosys.hsearch.hbase.HbaseLog;
import com.bizosys.hsearch.hbase.HReader;
import com.bizosys.hsearch.hbase.HWriter;
import com.bizosys.hsearch.hbase.NV;
import com.bizosys.hsearch.hbase.NVBytes;
import com.bizosys.hsearch.inpipe.SaveToIndexRecord;
import com.bizosys.hsearch.schema.ILanguageMap;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.oneline.SystemFault;
/**
* Multiple term families grouped inside a Termtable
* @author karan
*
*/
public class TermTables {
private static final byte[] BUCKET_COUNTER_BYTES = "BUCKET_COUNTER".getBytes();
static {init();}
public IStorable bucketId = null;
public Map<Character, TermFamilies> tables = null;
public TermTables() {
}
public TermTables(IStorable bucketId) {
this.bucketId = bucketId;
}
public void add(Term aTerm, ILanguageMap lang) {
if ( null == tables) tables = new HashMap<Character, TermFamilies>();
Character table = lang.getTableName(aTerm.term);
TermFamilies block = null;
if ( tables.containsKey(table)) block = tables.get(table);
else {
block = new TermFamilies();
tables.put(table, block);
}
block.add(aTerm, lang);
}
public boolean add(TermTables another) {
if ( null == another.bucketId) return false;
byte[] anotherPK = another.bucketId.toBytes();
if ( !Storable.compareBytes(this.bucketId.toBytes(), anotherPK) ) return false;
/**
* Both belong to same bucket zone
*/
for (Character otherTable : another.tables.keySet()) {
TermFamilies otherFamilies = another.tables.get(otherTable);
if (this.tables.containsKey(otherTable)) {
TermFamilies thisFamilies = this.tables.get(otherTable);
thisFamilies.add(otherFamilies);
} else {
this.tables.put(otherTable, otherFamilies);
}
}
return true;
}
public void assignDocumentPosition(int docPos) {
if ( null == tables) return;
for ( TermFamilies tf : tables.values()) {
if ( null == tf ) continue;
tf.assignDocumentPosition(docPos);
}
}
public void persist(boolean merge) throws SystemFault {
try {
for ( Character tableName : tables.keySet()) {
TermFamilies termFamilies = tables.get(tableName);
SaveToIndexRecord record = new SaveToIndexRecord(bucketId);
record.setTermFamilies(termFamilies);
if (HbaseLog.l.isDebugEnabled())
HbaseLog.l.debug("TermTables.persist Table " + tableName + record.toString());
HWriter.merge(tableName.toString(), record);
}
} catch (Exception ex) {
throw new SystemFault(ex);
}
}
/**
* Populates the existing value.
* @param tableName
* @param termFamilies
* @throws SystemFault
*/
public void setExistingValue(String tableName,
TermFamilies termFamilies) throws SystemFault {
List<NVBytes> existingB =
HReader.getCompleteRow(tableName, bucketId.toBytes());
if ( null == existingB) return;
for (char family: termFamilies.families.keySet()) {
TermColumns cols = termFamilies.families.get(family);
for (char col : cols.columns.keySet()) {
TermList terms = cols.columns.get(col);
for (NVBytes bytes : existingB) {
if ( bytes.family[0] == family && bytes.name[0] == col) {
terms.setExistingBytes(bytes.data);
break;
}
}
}
}
}
/**
* Get the Running bucket Id
*/
public static long getCurrentBucketId() throws SystemFault {
HbaseLog.l.info("TermTables > aquiring the running bucket.");
NV nv = new NV(IOConstants.NAME_VALUE_BYTES,IOConstants.NAME_VALUE_BYTES);
RecordScalar scalar = new RecordScalar(BUCKET_COUNTER_BYTES, nv);
HReader.getScalar(IOConstants.TABLE_CONFIG,scalar);
long currentBucket = Storable.getLong(0, nv.data.toBytes());
if ( HbaseLog.l.isInfoEnabled())
HbaseLog.l.info("TermTables > Running bucket = " + currentBucket);
return currentBucket;
}
/**
* This creates bucket Id, unique across machines.
* @return The bucket Id
* @throws SystemFault
*/
public static long createBucketId() throws SystemFault {
HbaseLog.l.debug("TermBucket > Creating a new bucket Zone");
/**
* Get next bucket Id
*/
NV nv = new NV(IOConstants.NAME_VALUE_BYTES,IOConstants.NAME_VALUE_BYTES);
RecordScalar scalar = new RecordScalar(BUCKET_COUNTER_BYTES, nv);
long bucketId = HReader.generateKeys(IOConstants.TABLE_CONFIG,scalar,1);
/**
* Put the bucket as a row for counting document serials.
*/
HbaseLog.l.debug("TermBucket > Setting serial counter for this bucket :" + bucketId);
long startPos = Short.MIN_VALUE;
nv.data = new Storable(startPos);
RecordScalar docSerial = new RecordScalar(
Storable.putLong(bucketId), nv);
try {
HWriter.insertScalar(IOConstants.TABLE_CONFIG, docSerial);
HbaseLog.l.info("TermBucket > Bucket setup completed :" + bucketId);
return bucketId;
} catch (IOException ex) {
HbaseLog.l.fatal("TermBucket > Setting serial counter Failed:" + bucketId, ex);
throw new SystemFault(ex);
}
}
/**
* This create document serial no inside a bucket id, unique across machines
* @param bucketId The current bucket id
* @param amount Amount of documents to be added
* @return Moved document serial position
* @throws SystemFault
* @throws BucketIsFullException
*/
public static short createDocumentSerialIds(long bucketId, int amount)
throws SystemFault, BucketIsFullException {
/**
* Generate Ids for this bucket
*/
HbaseLog.l.debug("Generating buckets keys");
NV nv = new NV(IOConstants.NAME_VALUE_BYTES,IOConstants.NAME_VALUE_BYTES);
byte[] pkBucketId = Storable.putLong(bucketId);
RecordScalar scalar = new RecordScalar(pkBucketId, nv);
long bucketMaxPos =
HReader.generateKeys(IOConstants.TABLE_CONFIG,scalar,amount);
HbaseLog.l.debug("Buckets keys generated :" + bucketMaxPos);
int maxValue = Short.MAX_VALUE - Short.MIN_VALUE;
if ( bucketMaxPos >= maxValue) {
HbaseLog.l.warn("Crossed the bucket limit of storage :" + bucketMaxPos);
BucketIsFullException bife = new BucketIsFullException(bucketMaxPos);
throw bife;
}
return new Long(bucketMaxPos).shortValue();
}
/**
* This gives all the rows from all tables.
* @param bucketId Bucket Id
* @return List of name-value bytes
* @throws SystemFault
*/
public static List<NVBytes> get(long bucketId) throws SystemFault {
List<NVBytes> allFields = null;
for (Character c : ILanguageMap.ALL_TABLES) {
List<NVBytes> nvs = HReader.getCompleteRow(c.toString(),Storable.putLong(bucketId));
if ( null == allFields) allFields = nvs;
if ( null != nvs) allFields.addAll(nvs);
}
return allFields;
}
/**
* Initializes the term buckets
* Initial System: There will be no bucket. Start from Long.MIN_VALUE
* Second time onwards : Continue
*/
public static void init() {
try {
NV nv = new NV(IOConstants.NAME_VALUE_BYTES,IOConstants.NAME_VALUE_BYTES);
if ( ! HReader.exists(IOConstants.TABLE_CONFIG, BUCKET_COUNTER_BYTES)) {
HbaseLog.l.info("Bucket Counter setup is not there. Setting up bucket id counter.");
RecordScalar bucketCounter = new RecordScalar(new Storable(BUCKET_COUNTER_BYTES), nv);
nv.data = new Storable(Long.MIN_VALUE);
HWriter.insertScalar(IOConstants.TABLE_CONFIG, bucketCounter);
HbaseLog.l.info("Bucket Counter setup is complete.");
}
} catch (IOException ex) {
HbaseLog.l.fatal("TermBucket > Bucker Bucket Counter Creation Failure:", ex);
System.exit(1);
} catch (SystemFault ex) {
HbaseLog.l.fatal("TermBucket > Bucker Bucket Counter Creation Failure:", ex);
System.exit(1);
}
}
}