package org.apache.nutchbase.util.hbase;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Writable;
import org.apache.nutch.crawl.Inlink;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.protocol.ProtocolStatus;
public class ImmutableRowPart implements Writable, TableColumns {
protected static final int INLINKS_STR_LEN = INLINKS_STR.length();
protected static final int OUTLINKS_STR_LEN = OUTLINKS_STR.length();
protected RowResult rowResult;
/** For Writable. Do not use directly. */
public ImmutableRowPart() { rowResult = new RowResult(); }
public ImmutableRowPart(RowResult rowResult) {
this.rowResult = rowResult;
}
protected String stringify(Cell c) {
if (c == null)
return null;
return Bytes.toString(c.getValue());
}
protected byte[] get(byte[] column) {
final Cell c = rowResult.get(column);
return c == null ? null : c.getValue();
}
public ImmutableRowPart(final byte[] rowId) {
rowResult = new RowResult(rowId, new HbaseMapWritable<byte[], Cell>());
}
public void readFields(DataInput in) throws IOException {
rowResult.readFields(in);
}
public void write(DataOutput out) throws IOException {
rowResult.write(out);
}
public byte[] getRowId() {
return rowResult.getRow();
}
/** Checks if row has the specified column.
*
* @param col Column to be checked
* @return true if given column exists in row
*/
public boolean hasColumn(byte[] col) {
return rowResult.containsKey(col);
}
public String getBaseUrl() {
return stringify(rowResult.get(BASE_URL));
}
public byte getStatus() {
return rowResult.get(STATUS).getValue()[0];
}
public byte[] getSignature() {
if (!hasColumn(SIGNATURE))
return null;
return rowResult.get(SIGNATURE).getValue();
}
public byte[] getPrevSignature() {
if (!hasColumn(PREV_SIGNATURE))
return null;
return rowResult.get(PREV_SIGNATURE).getValue();
}
public long getFetchTime() {
return Bytes.toLong(rowResult.get(FETCH_TIME).getValue());
}
public long getPrevFetchTime() {
Cell c = rowResult.get(PREV_FETCH_TIME);
if (c == null)
return 0L;
return Bytes.toLong(c.getValue());
}
public long getModifiedTime() {
return Bytes.toLong(rowResult.get(MODIFIED_TIME).getValue());
}
public int getFetchInterval() {
return Bytes.toInt(rowResult.get(FETCH_INTERVAL).getValue());
}
public int getRetriesSinceFetch() {
return Bytes.toInt(rowResult.get(RETRIES).getValue());
}
public ProtocolStatus getProtocolStatus() {
final ProtocolStatus protocolStatus = new ProtocolStatus();
final byte[] val = rowResult.get(PROTOCOL_STATUS).getValue();
try {
return (ProtocolStatus) Writables.getWritable(val, protocolStatus);
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
public float getScore() {
Cell score = rowResult.get(SCORE);
if (score != null) {
return TableUtil.toFloat(score.getValue());
} else {
return 0.0f;
}
}
public float getPagerank() {
Cell PR = rowResult.get(PAGERANK);
if (PR != null) {
return TableUtil.toFloat(PR.getValue());
} else {
return 0.0f;
}
}
public float getVotes() {
Cell votes = rowResult.get(VOTES);
if (votes != null) {
return TableUtil.toFloat(votes.getValue());
} else {
return 0.0f;
}
}
public byte[] getContent() {
return rowResult.get(CONTENT).getValue();
}
public String getContentType() {
return stringify(rowResult.get(CONTENT_TYPE));
}
public String getText() {
return stringify(rowResult.get(TEXT));
}
public String getTitle() {
return stringify(rowResult.get(TITLE));
}
public ParseStatus getParseStatus() {
final ParseStatus parseStatus = new ParseStatus();
final byte[] val = rowResult.get(PARSE_STATUS).getValue();
try {
return (ParseStatus) Writables.getWritable(val, parseStatus);
} catch (final IOException e) {
return null;
}
}
public String getReprUrl() {
return stringify(rowResult.get(REPR_URL));
}
public Collection<Outlink> getOutlinks() {
final List<Outlink> outlinks = new ArrayList<Outlink>();
for (final byte[] col : rowResult.keySet()) {
final String column = Bytes.toString(col);
if (column.startsWith(OUTLINKS_STR)) {
final String toUrl = column.substring(OUTLINKS_STR_LEN);
final String anchor = Bytes.toString(rowResult.get(col).getValue());
outlinks.add(new Outlink(toUrl, anchor));
}
}
return outlinks;
}
public Collection<Inlink> getInlinks() {
final List<Inlink> inlinks = new ArrayList<Inlink>();
for (final byte[] col : rowResult.keySet()) {
final String column = Bytes.toString(col);
if (column.startsWith(INLINKS_STR)) {
final String fromUrl = column.substring(INLINKS_STR_LEN);
final String anchor = Bytes.toString(rowResult.get(col).getValue());
inlinks.add(new Inlink(fromUrl, anchor));
}
}
return inlinks;
}
/** Returns a header.
* @param key Header-key
* @return headers if it exists, null otherwise
*/
public String getHeader(String key) {
final byte[] headerKey = Bytes.toBytes(HEADERS_STR + key);
if (!hasColumn(headerKey)) {
return null;
}
return stringify(rowResult.get(headerKey));
}
/** Checks if a metadata key exists in "metadata" column.
* @param metaKey Key to search in metadata column
* @return true if key exists
*/
public boolean hasMeta(String metaKey) {
return hasColumn(Bytes.toBytes(METADATA_STR + metaKey));
}
/** Read a metadata key from "metadata" column.
* @param metaKey Key to search in metadata column
* @return Value in byte array form or null if metadata doesn't exist
*/
public byte[] getMeta(String metaKey) {
final byte[] col = Bytes.toBytes(METADATA_STR + metaKey);
return get(col);
}
/** Read a metadata key from "metadata" column.
* @param metaKey Key to search in metadata column
* @return Value in string form or null if metadata doesn't exist
*/
public String getMetaAsString(String metaKey) {
final byte[] val = getMeta(metaKey);
return val == null ? null : Bytes.toString(val);
}
/**
* Reads a value from an abitrary row
* @param key the String key name
* @return the value as a byte array null if there is no value for the key.
*/
public byte[] getColumn(String key) {
final byte[] headerKey = Bytes.toBytes(key);
if (!hasColumn(headerKey)) {
return null;
}
return rowResult.get(Bytes.toBytes(key)).getValue();
}
public Set<byte[]> getColumns() {
return rowResult.keySet();
}
/**
* Reads a value from an abitrary row
* @param key the String key name
* @return the value as a byte array null if there is no value for the key.
*/
public String getColumnAsString(String key) {
final byte[] val = getColumn(key);
return val == null ? null : Bytes.toString(val);
}
}