/**
* Copyright 2010 TransPac Software, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bixolabs.simpledb;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.bixolabs.aws.AWSException;
import com.bixolabs.aws.SimpleDB;
public class SimpleDBUtils {
public static final int NO_SELECT_LIMIT = -1;
// Name of attribute used to store item name hash, for slicing a shard into chunks.
public static final String ITEM_HASH_ATTR_NAME = SimpleDBUtils.class.getSimpleName() + "-itemHash";
private static final long MIN_HASH = (long)Integer.MIN_VALUE;
private static final long HASH_RANGE = (long)Integer.MAX_VALUE - MIN_HASH;
private static final int HASH_DIGITS = Long.toString(HASH_RANGE).length();
private static final String NEGATIVE_HASH_FORMAT = "0%0" + HASH_DIGITS + "d";
private static final String POSITIVE_HASH_FORMAT = "1%0" + HASH_DIGITS + "d";
// These two must be kept in sync.
private static final String DOMAIN_NAME_FORMAT = "%s-%d-of-%d";
private static final Pattern DOMAIN_NAME_PATTERN = Pattern.compile("(.+)-(\\d+)-of-(\\d+)");
public static List<String> getShardNames(String baseDomainName, int numShards) {
List<String> result = new ArrayList<String>(numShards);
for (int i = 1; i <= numShards; i++) {
result.add(String.format(DOMAIN_NAME_FORMAT, baseDomainName, i, numShards));
}
return result;
}
/**
* Given a domain name, return the total shard count (n from x-of-n pattern)
*
* @param domain domain name
* @return total shards, or -1 if domain name format isn't valid.
*/
public static int getShardCount(String domain) {
return getShardCount(null, domain);
}
/**
* Given a domain name, return the total shard count (n from x-of-n pattern)
*
* @param table table name
* @param domain domain name
* @return total shards, or -1 if domain name format isn't valid.
*/
public static int getShardCount(String table, String domain) {
Matcher m = DOMAIN_NAME_PATTERN.matcher(domain);
if (!m.matches()) {
return -1;
} else if ((table != null) && !table.equals(m.group(1))) {
// Case of table = x, and domain is x-y-1-of-3
return -1;
} else {
return Integer.parseInt(m.group(3));
}
}
/**
* Given a domain name, return the shard number (1..n)
*
* @param domain domain name
* @return shard number, or -1 if domain name format isn't valid.
*/
public static int getShardNumber(String domain) {
return getShardNumber(null, domain);
}
/**
* Given a domain name, return the shard number (1..n)
*
* @param table table name
* @param domain domain name
* @return shard number, or -1 if domain name format isn't valid.
*/
public static int getShardNumber(String table, String domain) {
Matcher m = DOMAIN_NAME_PATTERN.matcher(domain);
if (!m.matches()) {
return -1;
} else if ((table != null) && !table.equals(m.group(1))) {
// Case of table = x, and domain is x-y-1-of-3
return -1;
} else {
return Integer.parseInt(m.group(2));
}
}
public static int getNumShardsForTable(SimpleDB sdb, String table) throws IOException, AWSException, InterruptedException {
int numShards = 0;
boolean[] shardFound = null;
List<String> domains = sdb.listDomains();
for (String domain : domains) {
if (domain.startsWith(table)) {
int totalShards = getShardCount(table, domain);
int shardNumber = getShardNumber(table, domain);
if ((totalShards != -1) && (shardNumber != -1)) {
if (numShards == 0) {
numShards = totalShards;
shardFound = new boolean[numShards];
} else if (numShards != totalShards) {
throw new IllegalStateException(String.format("Table %s has shard %s with a different total count than a previous shard", table, domain));
}
shardFound[shardNumber - 1] = true;
}
}
}
for (int i = 0; i < numShards; i++) {
if (!shardFound[i]) {
throw new IllegalStateException(String.format("Table %s is missing shard #%d", table, i+1));
}
}
return numShards;
}
public static int getShardIndex(String itemValue, int numShards) {
if (numShards == 1) {
return 0;
}
long shardRange = HASH_RANGE/numShards;
long hash = joaat_hash(itemValue);
long absoluteHash = hash - MIN_HASH;
return (int)(absoluteHash/shardRange);
}
public static String getItemHash(String itemName) {
int hash = joaat_hash(itemName);
if (hash < 0) {
return String.format(NEGATIVE_HASH_FORMAT, Math.abs(hash));
} else {
return String.format(POSITIVE_HASH_FORMAT, hash);
}
}
/**
* Return the total count for items in <domainName>, optionally selected with
* <expression> and limited to <limit> items.
*
* @param sdb
* @param domainName
* @param expression expression to use with selection, or ""/null.
* @param limit limit to result, or NO_SELECT_LIMIT
* @return count of total items matching selection criteria
* @throws IOException
* @throws AWSException
* @throws InterruptedException
*/
public static int getItemCount(SimpleDB sdb, String domainName, String expression,
int limit) throws IOException, AWSException, InterruptedException {
int result = 0;
String nextToken = null;
// FUTURE KKr - if there's no query, then just make a metadata call as that's
// going to be faster than the select.
String selectStr = String.format("select count(*) from `%s`", domainName);
if ((expression != null) && (expression.length() > 0)) {
selectStr += String.format(" where %s", expression);
}
boolean limited = limit != NO_SELECT_LIMIT;
if (limited) {
selectStr += String.format(" limit %d", limit);
}
// TODO KKr - seems like the select call will keep returning results past what
// we specified with the limit parameter...verify that this is the expected behavior.
// I think the fix is to reduce the limit by the returned count, each time through
// the loop.
do {
List<Map<String, String[]>> selectResult = sdb.select(selectStr, nextToken);
nextToken = sdb.getLastToken();
String numItemsStr = null;
for (Map<String, String[]> attributes : selectResult) {
String[] values = attributes.get("Count");
if ((values != null) && (values.length > 0)) {
numItemsStr = values[0];
break;
}
}
if (numItemsStr == null) {
throw new RuntimeException("SimpleDB select didn't return count");
}
try {
result += Integer.parseInt(numItemsStr);
} catch (NumberFormatException e) {
throw new RuntimeException("SimpleDB select returned invalid count: " + numItemsStr);
}
} while ((nextToken != null) && (!limited || (result < limit)));
if (limited) {
result = Math.min(result, limit);
}
return result;
}
public static void deleteTable(SimpleDB sdb, String table) throws IOException, AWSException, InterruptedException {
List<String> domains = sdb.listDomains();
for (String domain : domains) {
if (domain.startsWith(table)) {
try {
sdb.deleteDomain(domain);
} catch (Exception e) {
// ignore
}
}
}
}
private static int joaat_hash(String key) {
try {
return joaat_hash(key.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Impossible error", e);
}
}
private static int joaat_hash(byte[] key) {
int hash = 0;
for (byte b : key) {
hash += (b & 0xFF);
hash += (hash << 10);
hash ^= (hash >>> 6);
}
hash += (hash << 3);
hash ^= (hash >>> 11);
hash += (hash << 15);
return hash;
}
}