package org.apache.nutchbase.util.hbase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.conf.*;
import org.apache.nutch.util.NutchConfiguration;
public class WebTableCreator extends Configured implements Tool {
public static final Log LOG = LogFactory.getLog(WebTableCreator.class);
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(NutchConfiguration.create(),
new WebTableCreator(), args);
System.exit(res);
}
/**
* Adds the required column families to the target table. This method does commit any changes to the table it
* simply adds the descriptors required.
*
* @param targetTable the HTableDescriptor for the table you wish to add the webtable column families.
*/
public static void addColumnFamilies(HTableDescriptor targetTable) {
targetTable.addFamily(new HColumnDescriptor(TableColumns.BASE_URL));
targetTable.addFamily(new HColumnDescriptor(TableColumns.STATUS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.FETCH_TIME));
targetTable.addFamily(new HColumnDescriptor(TableColumns.RETRIES));
targetTable.addFamily(new HColumnDescriptor(TableColumns.FETCH_INTERVAL));
targetTable.addFamily(new HColumnDescriptor(TableColumns.SCORE));
targetTable.addFamily(new HColumnDescriptor(TableColumns.MODIFIED_TIME));
targetTable.addFamily(new HColumnDescriptor(TableColumns.SIGNATURE));
targetTable.addFamily(new HColumnDescriptor(TableColumns.CONTENT));
targetTable.addFamily(new HColumnDescriptor(TableColumns.CONTENT_TYPE));
targetTable.addFamily(new HColumnDescriptor(TableColumns.TITLE));
targetTable.addFamily(new HColumnDescriptor(TableColumns.OUTLINKS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.INLINKS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.PARSE_STATUS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.PROTOCOL_STATUS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.TEXT));
targetTable.addFamily(new HColumnDescriptor(TableColumns.REPR_URL));
targetTable.addFamily(new HColumnDescriptor(TableColumns.HEADERS));
targetTable.addFamily(new HColumnDescriptor(TableColumns.METADATA));
// Hackish solution to access previous versions of some columns
targetTable.addFamily(new HColumnDescriptor(TableColumns.PREV_SIGNATURE));
targetTable.addFamily(new HColumnDescriptor(TableColumns.PREV_FETCH_TIME));
targetTable.addFamily(new HColumnDescriptor(TableColumns.PAGERANK));
targetTable.addFamily(new HColumnDescriptor(TableColumns.VOTES));
}
public int run(String[] args) throws Exception {
if (args.length != 1) {
System.err.println("Usage: WebTableCreator <webtable>");
return -1;
}
try {
HBaseConfiguration hbaseConf = new HBaseConfiguration();
LOG.debug("Creating table: " + args[0]);
HTableDescriptor desc = new HTableDescriptor(args[0]);
addColumnFamilies(desc);
HBaseAdmin admin = new HBaseAdmin(hbaseConf);
LOG.warn("Calling createTable");
admin.createTable(desc);
return 0;
} catch (Exception e) {
LOG.fatal("WebTableCreator: " + StringUtils.stringifyException(e));
return -1;
}
}
}