/*
* Copyright © 2014-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data2.dataset2;
import co.cask.cdap.api.common.RuntimeArguments;
import co.cask.cdap.api.common.Scope;
import co.cask.cdap.api.data.DatasetContext;
import co.cask.cdap.api.data.DatasetInstantiationException;
import co.cask.cdap.api.dataset.Dataset;
import co.cask.cdap.api.dataset.DatasetDefinition;
import co.cask.cdap.api.metrics.MetricsContext;
import co.cask.cdap.common.lang.ClassLoaders;
import co.cask.cdap.data.dataset.SystemDatasetInstantiator;
import co.cask.cdap.data2.metadata.lineage.AccessType;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.tephra.TransactionAware;
import co.cask.tephra.TransactionContext;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Objects;
import com.google.common.base.Supplier;
import com.google.common.io.Closeables;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Implementation of {@link DatasetContext} that allows to dynamically load datasets
* into a started {@link TransactionContext}. Datasets acquired from this context are
* distinct from any Datasets instantiated outside this class. Datasets are cached,
* such that repeated calls to (@link #getDataset()} for the same dataset and arguments
* return the same instance.
*
* The cache also maintains a transaction context and adds all acquired datasets to that
* context, so that they participate in the transactions executed with that context. If a
* dataset is dismissed during the course of a transaction, then this context delays the
* dismissal until the transaction is complete.
*
* Optionally, this cache can have a set of static datasets that are added to every
* transaction context created by the cache. Static datasets cannot be dismissed.
*
* Also, transaction-aware "datasets" that were not created by this DynamicDatasetCache,
* can be added to the transaction context. This is useful for transaction-aware's that
* do not implement a Dataset (such as queue consumers etc.).
*/
public abstract class DynamicDatasetCache implements DatasetContext, Supplier<TransactionContext> {
protected final SystemDatasetInstantiator instantiator;
protected final TransactionSystemClient txClient;
protected final NamespaceId namespace;
protected final Map<String, String> runtimeArguments;
protected final MetricsContext metricsContext;
/**
* Create a dynamic dataset factory.
*
* @param txClient the transaction system client to use for new transaction contexts
* @param namespace the {@link NamespaceId} in which all datasets are instantiated
* @param runtimeArguments all runtime arguments that are available to datasets in the context. Runtime arguments
* are expected to be scoped so that arguments for one dataset do not override arguments
* of other datasets.
* @param metricsContext if non-null, this context is used as the context for dataset metrics,
* with an additional tag for the dataset name.
*/
public DynamicDatasetCache(SystemDatasetInstantiator instantiator,
TransactionSystemClient txClient,
NamespaceId namespace,
Map<String, String> runtimeArguments,
@Nullable MetricsContext metricsContext) {
this.instantiator = instantiator;
this.txClient = txClient;
this.namespace = namespace;
this.runtimeArguments = runtimeArguments;
this.metricsContext = metricsContext;
}
@Override
public final <T extends Dataset> T getDataset(String name)
throws DatasetInstantiationException {
return getDataset(name, DatasetDefinition.NO_ARGUMENTS);
}
@Override
public final <T extends Dataset> T getDataset(String name, Map<String, String> arguments)
throws DatasetInstantiationException {
return getDataset(name, arguments, false);
}
/**
* Instantiate a dataset, allowing to bypass the cache. This means that the dataset will not be added to
* the in-progress transactions, and it will also not be closed when the cache is closed.
*
* @param name the name of the dataset
* @param arguments arguments for the dataset
* @param bypass whether to bypass the cache
* @param <T> the type of the dataset
*/
public final <T extends Dataset> T getDataset(String name, Map<String, String> arguments, boolean bypass)
throws DatasetInstantiationException {
return getDataset(name, arguments, bypass, AccessType.UNKNOWN);
}
/**
* Get an instance of the specified dataset, with the specified access type.
*
* @param name the name of the dataset
* @param arguments arguments for the dataset
* @param <T> the type of the dataset
* @param accessType the accessType
*/
public final <T extends Dataset> T getDataset(String name, Map<String, String> arguments,
AccessType accessType) throws DatasetInstantiationException {
return getDataset(name, arguments, false, accessType);
}
/**
* Instantiate a dataset, allowing to bypass the cache. This means that the dataset will not be added to
* the in-progress transactions, and it will also not be closed when the cache is closed.
*
* @param name the name of the dataset
* @param arguments arguments for the dataset
* @param bypass whether to bypass the cache
* @param <T> the type of the dataset
* @param accessType the accessType
*/
public final <T extends Dataset> T getDataset(String name, Map<String, String> arguments, boolean bypass,
AccessType accessType) throws DatasetInstantiationException {
// apply actual runtime arguments on top of the context's runtime arguments for this dataset
Map<String, String> dsArguments =
RuntimeArguments.extractScope(Scope.DATASET, name, runtimeArguments);
dsArguments.putAll(arguments);
// Need to switch the context classloader to the CDAP system since getting dataset instance is in CDAP context
// The internal of the dataset instantiate may switch the context class loader to a different one when necessary
ClassLoader currentClassLoader = ClassLoaders.setContextClassLoader(getClass().getClassLoader());
try {
return getDataset(new DatasetCacheKey(name, dsArguments, accessType), bypass);
} finally {
ClassLoaders.setContextClassLoader(currentClassLoader);
}
}
@Override
public void releaseDataset(Dataset dataset) {
discardDataset(dataset);
}
/**
* To be implemented by subclasses.
*
* @param bypass if true, bypass the dataset cache, and do not add this to the transaction.
*/
protected abstract <T extends Dataset> T getDataset(DatasetCacheKey key, boolean bypass)
throws DatasetInstantiationException;
/**
* Return a new transaction context for the current thread. All transaction-aware static datasets and all
* extra transaction-awares are added to this transaction initially. Also, any transaction-aware that was
* previously dynamically acquired, and that has not been garbage-collected, is added to the transaction.
* Any transaction-aware datasets that will subsequently be obtained via (@link #getDataset()) will then
* also be added to this transaction context and thus participate in its transaction. These datasets can
* also be retrieved using {@link #getTransactionAwares()}.
*
* @return a new transaction context
*/
public abstract TransactionContext newTransactionContext();
/**
* Dismiss the current transaction context. This releases the references to the context's
* transaction-aware datasets so that they can be collected by the garbage collector (if no one
* else is holding a reference to them). The static datasets and the extra transaction-awares,
* however, will not be made available to garbage collection, and will participate in the
* next transaction (created by {@link #newTransactionContext()}).
*/
public abstract void dismissTransactionContext();
@Override
public TransactionContext get() {
return newTransactionContext();
}
/**
* @return the static datasets that are transaction-aware. This is the same independent of whether a
* transaction context was started using {@link #newTransactionContext()}.
*/
public abstract Iterable<TransactionAware> getStaticTransactionAwares();
/**
* @return the transaction-aware datasets that participate in the current transaction. If
* {@link #newTransactionContext()} has not been called (or {@link #dismissTransactionContext()} has been
* called), then there is no transaction and this will return an empty iterable.
*/
public abstract Iterable<TransactionAware> getTransactionAwares();
/**
* Add an extra transaction aware to the static datasets. This is a transaction aware that
* is not instantiated through this factory, but needs to participate in every transaction.
* Note that if a transaction is in progress, then this transaction aware will join that transaction.
*/
public abstract void addExtraTransactionAware(TransactionAware txAware);
/**
* Remove a transaction-aware that was added via {@link #addExtraTransactionAware(TransactionAware)}.
* Note that if a transaction is in progress, then this transaction aware will leave that transaction.
*/
public abstract void removeExtraTransactionAware(TransactionAware txAware);
/**
* Close and dismiss all datasets that were obtained through this factory, and destroy the factory.
* If an extra transaction-awares were added to this cache (and not removed), then they will also
* be closed.
*/
public void close() {
Closeables.closeQuietly(instantiator);
}
/**
* Close and dismiss all datasets that were obtained through this factory. This can be used to ensure
* that all resources held by datasets are released, even though the factory may be still be used for
* subsequent execution.
*/
public abstract void invalidate();
/**
* A key used by implementations of {@link DynamicDatasetCache} to cache Datasets. Includes the dataset name, its
* arguments, and its {@link AccessType}.
*/
protected static final class DatasetCacheKey {
private final String name;
private final Map<String, String> arguments;
private final AccessType accessType;
protected DatasetCacheKey(String name, @Nullable Map<String, String> arguments) {
this(name, arguments, AccessType.UNKNOWN);
}
protected DatasetCacheKey(String name, @Nullable Map<String, String> arguments, AccessType accessType) {
this.name = name;
this.arguments = arguments == null ? DatasetDefinition.NO_ARGUMENTS : arguments;
this.accessType = accessType;
}
public String getName() {
return name;
}
public Map<String, String> getArguments() {
return arguments;
}
public AccessType getAccessType() {
return accessType;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DatasetCacheKey that = (DatasetCacheKey) o;
return Objects.equal(this.name, that.name) &&
Objects.equal(this.arguments, that.arguments);
}
@Override
public int hashCode() {
return Objects.hashCode(name, arguments);
}
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("name", name)
.add("arguments", arguments)
.add("accessType", accessType)
.toString();
}
}
}