/**
* (c) Copyright 2012 WibiData, Inc.
*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kiji.mapreduce.kvstore.lib;
import java.io.IOException;
import java.util.Map;
import com.google.common.collect.ImmutableMap;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.kiji.annotations.ApiAudience;
import org.kiji.annotations.ApiStability;
import org.kiji.mapreduce.kvstore.KeyValueStore;
import org.kiji.mapreduce.kvstore.KeyValueStoreReader;
import org.kiji.mapreduce.kvstore.framework.KeyValueStoreConfiguration;
import org.kiji.mapreduce.util.LruCache;
import org.kiji.schema.EntityId;
import org.kiji.schema.Kiji;
import org.kiji.schema.KijiColumnName;
import org.kiji.schema.KijiDataRequest;
import org.kiji.schema.KijiDataRequestBuilder;
import org.kiji.schema.KijiRowData;
import org.kiji.schema.KijiRowKeyComponents;
import org.kiji.schema.KijiTable;
import org.kiji.schema.KijiTableReader;
import org.kiji.schema.KijiTableReaderPool;
import org.kiji.schema.KijiTableReaderPool.Builder.WhenExhaustedAction;
import org.kiji.schema.KijiURI;
import org.kiji.schema.KijiURIException;
import org.kiji.schema.layout.ColumnReaderSpec;
import org.kiji.schema.util.ResourceUtils;
/**
* KeyValueStore lookup implementation based on a Kiji table.
*
* <p>Allows you to use a single fully-qualified column of a Kiji table as a
* key-value store (using a KijiRowKeyComponents that corresponds to the
* entity id associated with each row as the key).</p>
*
* <p>This operates over the most recent timestamped value (within the optionally-configured
* timestamp range associated with the Store).</p>
*
* <p>This implementation assumes that the column is immutable while being used in this
* fashion. It may cache values to improve repeated read performance. You can set the
* cache size with {@link KijiTableKeyValueStore.Builder#withCacheLimit(int)}.</p>
*
* <h3>XML Configuration</h3>
*
* <p>When configuring a KijiTableKeyValueStore from a kvstores XML file, the following
* properties may be used to specify the behavior of this class:</p>
* <ul>
* <li><tt>table.uri</tt> - The Kiji URI for the table backing this store.</li>
* <li><tt>column</tt> - The family and qualifier of the column representing the values
* in this store. e.g., <tt>info:name</tt></li>
* <li><tt>min.ts</tt> - A <tt>long</tt> value representing the minimum timestamp to
* include in the results for this KeyValueStore.</li>
* <li><tt>max.ts</tt> - A <tt>long</tt> value representing the maximum timestamp to
* include in the results for this KeyValueStore.</li>
* <li><tt>cache.size</tt> - An <tt>int</tt> value representing the number of results
* to cache locally. (Default is 100; set to 0 to disable caching.)</li>
* <li><tt>avro.reader.schema</tt> - The common Avro reader schema used to
* deserialize values from
* the value column to return them to the client.</li>
* </ul>
*
* <h3>Default Values</h3>
* <ul>
* <li>You must specify the {@link KijiURI} of the table to read; it is an error to
* leave this unconfigured.</li>
* <li>You must specify the column to read as the value; it is an error to leave
* this unconfigured.</li>
* <li>HBase tables will be accessed through a new <tt>Configuration</tt> object
* if none is explicitly specified.</li>
* <li>The minimum timestamp to read defaults to <tt>0</tt>.</li>
* <li>The maximum timestamp to read is "infinity" (no maximum). This can be specified
* as <tt>HConstants.FOREVER</tt>.</li>
* <li>The number of rowkey-to-value mappings to cache locally defaults to 100. You
* can disable caching by setting the cache size to 0.</li>
* <li>The Avro reader schema field is currently ignored. The reader schema from the
* table layout for that column will always be used as the reader schema. Support for
* custom reader schemas will be added after SCHEMA-295 is committed to KijiSchema.</li>
* </ul>
*
* @param <V> the value type returned by this key-value store.
*/
@ApiAudience.Public
@ApiStability.Evolving
public final class KijiTableKeyValueStore<V>
implements Configurable, KeyValueStore<KijiRowKeyComponents, V> {
/** Cache the most recent 100 lookups in memory. */
private static final int DEFAULT_MAX_OBJECTS_TO_CACHE = 100;
// See javadoc for this class to understand the definitions of these configuration keys.
private static final String CONF_TABLE_URI = "table.uri";
private static final String CONF_COLUMN = "column";
private static final String CONF_MIN_TS = "min.ts";
private static final String CONF_MAX_TS = "max.ts";
private static final String CONF_CACHE_SIZE = "cache.size";
private static final String CONF_READER_SCHEMA = "avro.reader.schema";
private KijiURI mTableUri;
private KijiColumnName mColumn;
private long mMinTs;
private long mMaxTs;
private int mMaxObjectsToCache = DEFAULT_MAX_OBJECTS_TO_CACHE;
private Schema mReaderSchema;
private Configuration mConf;
/** true if the user has called open() on this object. */
private boolean mOpened;
/**
* A Builder-pattern class that configures and creates new KijiTableKeyValueStore
* instances. You should use this to specify the input to this KeyValueStore.
* Call the build() method to return a new KijiTableKeyValueStore instance.
*/
@ApiAudience.Public
@ApiStability.Evolving
public static final class Builder {
private KijiURI mTableUri;
private KijiColumnName mColumn;
private long mMinTs;
private long mMaxTs;
private int mMaxObjectsToCache = DEFAULT_MAX_OBJECTS_TO_CACHE;
private Schema mReaderSchema;
private Configuration mConf;
/**
* Private, default constructor. Call the builder() method of this KeyValueStore
* to get a new builder instance.
*/
private Builder() {
mMaxObjectsToCache = DEFAULT_MAX_OBJECTS_TO_CACHE;
mMinTs = 0;
mMaxTs = HConstants.LATEST_TIMESTAMP;
mConf = HBaseConfiguration.create(); //new Configuration();
}
/**
* Sets the Configuration to use to connect to Kiji.
*
* @param conf the Configuration to use.
* @return this builder instance.
*/
public Builder withConfiguration(Configuration conf) {
mConf = conf;
return this;
}
/**
* Sets the table to use as the backing store.
*
* @param tableUri the Kiji table URI to use.
* @return this builder instance.
*/
public Builder withTable(KijiURI tableUri) {
checkTableUri(tableUri);
mTableUri = tableUri;
return this;
}
/**
* Sets the column to retrieve values from.
*
* @param colName the column to use.
* @return this builder instance.
*/
public Builder withColumn(KijiColumnName colName) {
if (!colName.isFullyQualified()) {
throw new IllegalArgumentException("Must specify a fully-qualified column, not a map.");
}
mColumn = colName;
return this;
}
/**
* Sets the column to retrieve values from.
*
* @param family the column family to use.
* @param qualifier the column qualifier to use.
* @return this builder instance.
*/
public Builder withColumn(String family, String qualifier) {
return withColumn(new KijiColumnName(family, qualifier));
}
/**
* Sets the oldest timestamp to retrieve values from in the column.
*
* @param timestamp the oldest timestamp to consider.
* @return this builder instance.
*/
public Builder withMinTimestamp(long timestamp) {
mMinTs = timestamp;
return this;
}
/**
* Sets the newest timestamp to retrieve values from in the column.
*
* @param timestamp the newest timestamp to consider.
* @return this builder instance.
*/
public Builder withMaxTimestamp(long timestamp) {
mMaxTs = timestamp;
return this;
}
/**
* Sets the maximum number of lookups to cache in memory.
*
* <p>Defaults to 100.</p>
*
* @param numValues the maximum number of values to keep in the cache.
* @return this builder instance.
*/
public Builder withCacheLimit(int numValues) {
mMaxObjectsToCache = numValues;
return this;
}
/**
* Sets the reader schema to use when deserializing values from the value column.
* If set to null, will use the common reader schema associated with the column.
*
* @param schema the reader schema to use.
* @return this builder instance.
*/
public Builder withReaderSchema(Schema schema) {
mReaderSchema = schema;
return this;
}
/**
* Throws an IllegalArgumentException if tableUri is invalid. The URI
* is valid iff it is not null, specifies a Kiji instance, and specifies
* a Kiji table within that instance to read.
*
* @param tableUri a Kiji URI that must specify an instance and table.
* @throws IllegalArgumentException if the uri is invalid.
*/
private void checkTableUri(KijiURI tableUri) {
if (null == tableUri) {
throw new IllegalArgumentException("Must specify non-null table URI");
}
String tableName = tableUri.getTable();
if (null == tableName || tableName.isEmpty()) {
throw new IllegalArgumentException("Must specify a non-empty table name");
}
}
/**
* Build a new KijiTableKeyValueStore instance.
*
* @param <V> The value type for the KeyValueStore.
* @return the initialized KeyValueStore.
*/
public <V> KijiTableKeyValueStore<V> build() {
checkTableUri(mTableUri);
if (null == mColumn || !mColumn.isFullyQualified()) {
throw new IllegalArgumentException("Must specify a fully-qualified column");
}
if (mMinTs > mMaxTs) {
throw new IllegalArgumentException("Minimum timestamp must be less than max timestamp");
}
// Check that the table exists, so users can fail fast before finding this
// IO error after the MapReduce job has started.
Kiji kiji = null;
KijiTable kijiTable = null;
try {
kiji = Kiji.Factory.open(mTableUri, mConf);
kijiTable = kiji.openTable(mTableUri.getTable());
} catch (IOException ioe) {
throw new IllegalArgumentException("Could not open table: " + mTableUri, ioe);
} finally {
ResourceUtils.releaseOrLog(kijiTable);
ResourceUtils.releaseOrLog(kiji);
}
return new KijiTableKeyValueStore<V>(this);
}
}
/**
* Creates a new KijiTableKeyValueStore.Builder instance that can be used
* to configure and create a new KeyValueStore.
*
* @return a new Builder instance.
*/
public static Builder builder() {
return new Builder();
}
/**
* Reflection-only constructor. Used only for reflection. You should create and configure
* KijiTableKeyValueStore instances by using a builder;
* call KijiTableKeyValueStore.builder() to get a new builder instance.
*/
public KijiTableKeyValueStore() {
this(builder());
}
/**
* Constructor that up this KeyValueStore using a builder.
*
* @param builder the builder instance to read configuration from.
*/
private KijiTableKeyValueStore(Builder builder) {
mTableUri = builder.mTableUri;
mColumn = builder.mColumn;
mMinTs = builder.mMinTs;
mMaxTs = builder.mMaxTs;
mMaxObjectsToCache = builder.mMaxObjectsToCache;
mReaderSchema = builder.mReaderSchema;
mConf = builder.mConf;
}
/**
* Set the configuration object to refer to.
*
* @param conf the Configuration object to use.
*/
@Override
public void setConf(Configuration conf) {
if (mOpened) {
// Don't allow mutation after we start using this store for reads.
throw new IllegalStateException(
"Cannot set the configuration after a reader has been opened");
}
mConf = conf;
}
/** @return a copy of the Configuration object we are using. */
@Override
public Configuration getConf() {
return new Configuration(mConf);
}
/** {@inheritDoc} */
@Override
public void storeToConf(KeyValueStoreConfiguration conf) throws IOException {
if (null == mTableUri) {
throw new IOException("Required attribute not set: table URI");
}
if (null == mColumn) {
throw new IOException("Required attribute not set: column");
}
if (!mColumn.isFullyQualified()) {
throw new IOException("Column must be fully qualified");
}
conf.set(CONF_TABLE_URI, mTableUri.toString());
conf.set(CONF_COLUMN, mColumn.toString());
conf.setLong(CONF_MIN_TS, mMinTs);
conf.setLong(CONF_MAX_TS, mMaxTs);
conf.setInt(CONF_CACHE_SIZE, mMaxObjectsToCache);
if (null != mReaderSchema) {
conf.set(CONF_READER_SCHEMA, mReaderSchema.toString());
}
}
/** {@inheritDoc} */
@Override
public void initFromConf(KeyValueStoreConfiguration conf) throws IOException {
if (mOpened) {
// Don't allow mutation after we start using this store for reads.
throw new IllegalStateException(
"Cannot set the configuration after a reader has been opened");
}
try {
mTableUri = KijiURI.newBuilder(conf.get(CONF_TABLE_URI)).build();
} catch (KijiURIException kue) {
throw new IOException("Error parsing input URI: " + kue.getMessage(), kue);
}
mColumn = new KijiColumnName(conf.get(CONF_COLUMN));
mMinTs = conf.getLong(CONF_MIN_TS, 0);
mMaxTs = conf.getLong(CONF_MAX_TS, Long.MAX_VALUE);
mMaxObjectsToCache = conf.getInt(CONF_CACHE_SIZE, DEFAULT_MAX_OBJECTS_TO_CACHE);
String schemaStr = conf.get(CONF_READER_SCHEMA);
if (null != schemaStr) {
mReaderSchema = new Schema.Parser().parse(schemaStr);
} else {
mReaderSchema = null; // Just use whatever's in the cell directly.
}
// Set the job Configuration so that we get connection parameters to the Kiji table.
setConf(conf.getDelegate());
}
/** {@inheritDoc} */
@Override
public KeyValueStoreReader<KijiRowKeyComponents, V> open() throws IOException {
mOpened = true;
return new TableKVReader();
}
/** {@inheritDoc} */
@Override
public boolean equals(Object otherObj) {
if (otherObj == this) {
return true;
} else if (null == otherObj) {
return false;
} else if (!otherObj.getClass().equals(getClass())) {
return false;
}
@SuppressWarnings("unchecked")
KijiTableKeyValueStore<V> other = (KijiTableKeyValueStore<V>) otherObj;
if (null == mTableUri) {
if (other.mTableUri != null) {
return false;
}
} else if (!mTableUri.equals(other.mTableUri)) {
return false;
}
if (null == mColumn) {
if (other.mColumn != null) {
return false;
}
} else if (!mColumn.equals(other.mColumn)) {
return false;
}
if (mMinTs != other.mMinTs) {
return false;
} else if (mMaxTs != other.mMaxTs) {
return false;
} else if (mMaxObjectsToCache != other.mMaxObjectsToCache) {
return false;
}
if (null == mReaderSchema) {
if (other.mReaderSchema != null) {
return false;
}
} else if (!mReaderSchema.equals(other.mReaderSchema)) {
return false;
}
// We don't care about mConf.
return true;
}
/** {@inheritDoc} */
@Override
public int hashCode() {
int hash = 17;
hash = hash + (null == mTableUri ? 0 : mTableUri.hashCode()) * 31;
hash = hash + (null == mColumn ? 0 : mColumn.hashCode()) * 31;
return hash;
}
/** KeyValueStoreReader implementation that reads from a Kiji table. */
@ApiAudience.Private
private final class TableKVReader implements KeyValueStoreReader<KijiRowKeyComponents, V> {
/** Kiji Table instance to open. */
private KijiTable mKijiTable;
/** KijiTableReader to read the table. */
private KijiTableReaderPool mTableReaderPool;
/** Data request to use for all lookups. */
private final KijiDataRequest mDataReq;
/** If the user has requested result caching, do this here. */
private final Map<KijiRowKeyComponents, V> mResultCache;
/**
* Creates a new TableKVReader.
*
* @throws IOException if there's an error opening the Kiji table.
*/
private TableKVReader() throws IOException {
Configuration conf = getConf();
final Kiji kiji = Kiji.Factory.open(mTableUri, conf);
try {
mKijiTable = kiji.openTable(mTableUri.getTable());
} finally {
kiji.release();
}
final KijiTableReaderPool.Builder poolBuilder = KijiTableReaderPool.Builder.create()
.withReaderFactory(mKijiTable.getReaderFactory())
.withExhaustedAction(WhenExhaustedAction.GROW);
if (null != mReaderSchema) {
poolBuilder.withColumnReaderSpecOverrides(ImmutableMap.of(
mColumn, ColumnReaderSpec.avroReaderSchemaGeneric(mReaderSchema)));
}
mTableReaderPool = poolBuilder.build();
KijiDataRequestBuilder dataReqBuilder = KijiDataRequest.builder()
.withTimeRange(mMinTs, mMaxTs);
dataReqBuilder
.newColumnsDef().withMaxVersions(1).add(mColumn.getFamily(), mColumn.getQualifier());
mDataReq = dataReqBuilder.build();
if (mMaxObjectsToCache > 1) {
mResultCache = LruCache.create(mMaxObjectsToCache);
} else {
mResultCache = null;
}
}
/**
* Get the row for the given EntityId.
*
* @param eid Entity for which to get the row.
* @return the row for the given Entity.
* @throws IOException in case of an error reading from the table.
*/
private KijiRowData getRow(
final EntityId eid
) throws IOException {
try {
final KijiTableReader reader = mTableReaderPool.borrowObject();
try {
return reader.get(eid, mDataReq);
} finally {
reader.close();
}
} catch (IOException ioe) {
throw ioe;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/** {@inheritDoc} */
@Override
public boolean isOpen() {
return null != mKijiTable;
}
/** {@inheritDoc} */
@Override
public V get(KijiRowKeyComponents rowKey) throws IOException {
if (!isOpen()) {
throw new IOException("Closed");
}
// Check the cache first.
if (null != mResultCache && mResultCache.containsKey(rowKey)) {
return mResultCache.get(rowKey);
}
// Now do a full lookup.
final EntityId eid = rowKey.getEntityIdForTable(mKijiTable);
final KijiRowData rowData = getRow(eid);
if (null == rowData) {
return null;
}
if (rowData.containsColumn(mColumn.getFamily(), mColumn.getQualifier())) {
// If mReaderSchema is null, that's ok; it uses the cell writer schema.
// TODO: But we must actually use it if it's not null!
V val = rowData.<V>getMostRecentValue(mColumn.getFamily(), mColumn.getQualifier());
if (null != mResultCache) {
mResultCache.put(rowKey, val);
}
return val;
} else {
if (null != mResultCache) {
mResultCache.put(rowKey, null);
}
return null;
}
}
/** {@inheritDoc} */
@Override
public boolean containsKey(KijiRowKeyComponents rowKey) throws IOException {
if (!isOpen()) {
throw new IOException("Closed");
}
if (null != mResultCache && mResultCache.containsKey(rowKey)) {
return true; // Cache hit.
}
final EntityId eid = rowKey.getEntityIdForTable(mKijiTable);
final KijiRowData rowData = getRow(eid);
if (null == rowData) {
return false;
}
return rowData.containsColumn(mColumn.getFamily(), mColumn.getQualifier());
}
/** {@inheritDoc} */
@Override
public void close() throws IOException {
try {
mTableReaderPool.close();
} catch (IOException ioe) {
throw ioe;
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
mKijiTable.release();
}
}
}
}