/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.dataset2; import co.cask.cdap.api.data.DatasetContext; import co.cask.cdap.api.data.DatasetInstantiationException; import co.cask.cdap.api.dataset.Dataset; import co.cask.cdap.api.dataset.metrics.MeteredDataset; import co.cask.cdap.api.metrics.MetricsContext; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.data.dataset.SystemDatasetInstantiator; import co.cask.cdap.proto.id.NamespaceId; import co.cask.tephra.Transaction; import co.cask.tephra.TransactionAware; import co.cask.tephra.TransactionContext; import co.cask.tephra.TransactionFailureException; import co.cask.tephra.TransactionSystemClient; import com.google.common.base.Preconditions; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.ForwardingLoadingCache; import com.google.common.cache.LoadingCache; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.google.common.io.Closeables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.Closeable; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import javax.annotation.Nullable; import javax.annotation.ParametersAreNonnullByDefault; /** * Implementation of {@link DatasetContext} that allows to dynamically load datasets * into a started {@link TransactionContext}. Datasets acquired from this context are distinct from any * Datasets instantiated outside this class. */ public class SingleThreadDatasetCache extends DynamicDatasetCache { private static final Logger LOG = LoggerFactory.getLogger(SingleThreadDatasetCache.class); private static final Iterable<TransactionAware> NO_TX_AWARES = ImmutableList.of(); private final LoadingCache<DatasetCacheKey, Dataset> datasetCache; private final CacheLoader<DatasetCacheKey, Dataset> datasetLoader; private final Map<DatasetCacheKey, TransactionAware> activeTxAwares = new HashMap<>(); private final Map<DatasetCacheKey, Dataset> staticDatasets = new HashMap<>(); private final Set<TransactionAware> extraTxAwares = Sets.newIdentityHashSet(); private DelayedDiscardingTransactionContext txContext = null; /** * See {@link DynamicDatasetCache}. * * @param staticDatasets if non-null, a map from dataset name to runtime arguments. These datasets will be * instantiated immediately, and they will participate in every transaction started * through {@link #newTransactionContext()}. */ public SingleThreadDatasetCache(final SystemDatasetInstantiator instantiator, final TransactionSystemClient txClient, final NamespaceId namespace, Map<String, String> runtimeArguments, @Nullable final MetricsContext metricsContext, @Nullable Map<String, Map<String, String>> staticDatasets) { super(instantiator, txClient, namespace, runtimeArguments, metricsContext); this.datasetLoader = new CacheLoader<DatasetCacheKey, Dataset>() { @Override @ParametersAreNonnullByDefault public Dataset load(DatasetCacheKey key) throws Exception { Dataset dataset = instantiator.getDataset(namespace.dataset(key.getName()).toId(), key.getArguments(), key.getAccessType()); if (dataset instanceof MeteredDataset && metricsContext != null) { ((MeteredDataset) dataset).setMetricsCollector( metricsContext.childContext(Constants.Metrics.Tag.DATASET, key.getName())); } return dataset; } }; LoadingCache<DatasetCacheKey, Dataset> delegate = CacheBuilder.newBuilder().removalListener( new RemovalListener<DatasetCacheKey, Dataset>() { @Override @ParametersAreNonnullByDefault public void onRemoval(RemovalNotification<DatasetCacheKey, Dataset> notification) { closeDataset(notification.getKey(), notification.getValue()); } }) .build(datasetLoader); this.datasetCache = new LineageRecordingDatasetCache(delegate, instantiator, namespace); // add all the static datasets to the cache. This makes sure that a) the cache is preloaded and // b) if any static datasets cannot be loaded, the problem show right away (and not later). See // also the javadoc of this c'tor, which states that all static datasets get loaded right away. if (staticDatasets != null) { for (Map.Entry<String, Map<String, String>> entry : staticDatasets.entrySet()) { this.staticDatasets.put(new DatasetCacheKey(entry.getKey(), entry.getValue()), getDataset(entry.getKey(), entry.getValue())); } } } /** * Cache that records lineage for a dataset access each time the dataset is requested. */ private static final class LineageRecordingDatasetCache extends ForwardingLoadingCache.SimpleForwardingLoadingCache<DatasetCacheKey, Dataset> { private final SystemDatasetInstantiator instantiator; private final NamespaceId namespaceId; protected LineageRecordingDatasetCache(LoadingCache<DatasetCacheKey, Dataset> delegate, SystemDatasetInstantiator instantiator, NamespaceId namespaceId) { super(delegate); this.instantiator = instantiator; this.namespaceId = namespaceId; } @Override public Dataset get(DatasetCacheKey key) throws ExecutionException { // write lineage information on each get call instantiator.writeLineage(namespaceId.dataset(key.getName()).toId(), key.getAccessType()); return super.get(key); } } private void closeDataset(DatasetCacheKey key, Dataset dataset) { // close the dataset if (dataset != null) { try { dataset.close(); } catch (Throwable e) { LOG.warn(String.format("Error closing dataset '%s' of type %s", String.valueOf(key), dataset.getClass().getName()), e); } } } @Override public <T extends Dataset> T getDataset(DatasetCacheKey key, boolean bypass) throws DatasetInstantiationException { Dataset dataset; try { if (bypass) { dataset = datasetLoader.load(key); } else { try { dataset = datasetCache.get(key); } catch (ExecutionException e) { throw e.getCause(); } } } catch (Throwable t) { throw new DatasetInstantiationException( String.format("Could not instantiate dataset '%s'", key.getName()), t); } // make sure the dataset exists and is of the right type if (dataset == null) { throw new DatasetInstantiationException(String.format("Dataset '%s' does not exist", key.getName())); } T typedDataset; try { @SuppressWarnings("unchecked") T t = (T) dataset; typedDataset = t; } catch (Throwable t) { // must be ClassCastException throw new DatasetInstantiationException( String.format("Could not cast dataset '%s' to requested type. Actual type is %s.", key.getName(), dataset.getClass().getName()), t); } // any transaction aware that is not in the active tx-awares is added to the current tx context (if there is one). if (!bypass && dataset instanceof TransactionAware) { TransactionAware txAware = (TransactionAware) dataset; TransactionAware existing = activeTxAwares.get(key); if (existing == null) { activeTxAwares.put(key, txAware); if (txContext != null) { txContext.addTransactionAware(txAware); } } else if (existing != dataset) { // this better be the same dataset, otherwise the cache did not work throw new IllegalStateException( String.format("Unexpected state: Cache returned %s for %s, which is different from the " + "active transaction aware %s for the same key. This should never happen.", dataset, key, existing)); } } return typedDataset; } @Override public void discardDataset(Dataset dataset) { Preconditions.checkNotNull(dataset); // static datasets cannot be discarded if (staticDatasets.containsValue(dataset)) { LOG.warn("Attempt to discard static dataset {} from dataset cache", dataset); return; } if (txContext == null || !(dataset instanceof TransactionAware)) { discardSafely(dataset); } else { // it is a tx-aware: it may participate in a transaction, so mark it as to be discarded after the tx: // the transaction context will call discardSafely() for this dataset when the tx is complete. txContext.discardAfterTx((TransactionAware) dataset); } // remove from activeTxAwares in any case - a discarded dataset does not need to participate in external tx // iterates over all datasets but we do not expect this map to become large for (Map.Entry<DatasetCacheKey, TransactionAware> entry : activeTxAwares.entrySet()) { if (dataset == entry.getValue()) { activeTxAwares.remove(entry.getKey()); return; } } } /** * Discard a dataset when it is known that no transaction is going on. * * @param dataset this is an Object because we need to pass in TransactionAware or Dataset */ public void discardSafely(Object dataset) { // iterates over all datasets but we do not expect this map to become large for (Map.Entry<DatasetCacheKey, Dataset> entry : datasetCache.asMap().entrySet()) { if (dataset == entry.getValue()) { datasetCache.invalidate(entry.getKey()); return; } } // we can only hope that dataset.toString() is meaningful LOG.warn("Attempt to discard a dataset that was not acquired through this context: {}", dataset); } @Override public TransactionContext newTransactionContext() { dismissTransactionContext(); txContext = new DelayedDiscardingTransactionContext(activeTxAwares.values(), extraTxAwares); return txContext; } @Override public void dismissTransactionContext() { if (txContext != null) { txContext.cleanup(); txContext = null; } } @Override public Iterable<TransactionAware> getStaticTransactionAwares() { return Iterables.filter(staticDatasets.values(), TransactionAware.class); } @Override public Iterable<TransactionAware> getTransactionAwares() { if (txContext == null) { return NO_TX_AWARES; } return Iterables.concat(extraTxAwares, activeTxAwares.values()); } @Override public void addExtraTransactionAware(TransactionAware txAware) { extraTxAwares.add(txAware); if (txContext != null) { txContext.addTransactionAware(txAware); } } @Override public void removeExtraTransactionAware(TransactionAware txAware) { extraTxAwares.remove(txAware); if (txContext != null) { txContext.removeTransactionAware(txAware); } } @Override public void invalidate() { dismissTransactionContext(); activeTxAwares.clear(); try { datasetCache.invalidateAll(); } catch (Throwable t) { LOG.error("Error invalidating dataset cache", t); } try { datasetCache.cleanUp(); } catch (Throwable t) { LOG.error("Error cleaning up dataset cache", t); } } @Override public void close() { for (TransactionAware txAware : extraTxAwares) { if (txAware instanceof Closeable) { Closeables.closeQuietly((Closeable) txAware); } } invalidate(); super.close(); } /** * This is an implementation of TransactionContext that delays the discarding of a transaction-aware * dataset until after the transaction is complete. This is needed in cases where a client calls * {@link DatasetContext#discardDataset(Dataset)} in the middle of a transaction: The client indicates * that it does not need that dataset any more. But it is participating in the current transaction, * and needs to continue to do so until the transaction has ended. Therefore this class will put * that dataset on a toDiscard set, which is inspected after every transaction. */ private class DelayedDiscardingTransactionContext extends TransactionContext { private final Collection<TransactionAware> txAwares; private final Collection<TransactionAware> toDiscard; private TransactionContext txContext; /** * Constructs the context from the transaction system client (needed by TransactionContext). */ private DelayedDiscardingTransactionContext(Collection<TransactionAware> txAwares, Collection<TransactionAware> extraTxAwares) { super(txClient); this.toDiscard = Sets.newIdentityHashSet(); this.txAwares = Sets.newIdentityHashSet(); this.txAwares.addAll(txAwares); this.txAwares.addAll(extraTxAwares); } @Override public boolean addTransactionAware(TransactionAware txAware) { if (!txAwares.add(txAware)) { return false; // it must already be in the actual tx-context } // this is new, add it to current tx context if (txContext != null) { txContext.addTransactionAware(txAware); } // in case this was marked for discarding, remove that mark toDiscard.remove(txAware); return true; } @Override public boolean removeTransactionAware(TransactionAware txAware) { // if the actual tx-context is non-null, we are in the middle of a transaction, and can't remove the tx-aware // so just remove this from the tx-awares here, and the next transaction will be started without it. return txAwares.remove(txAware); } /** * Mark a tx-aware for discarding after the transaction is complete. */ public void discardAfterTx(TransactionAware txAware) { toDiscard.add(txAware); txAwares.remove(txAware); } /** * Discards all datasets marked for discarding, through the dataset cache, and set the tx context to null. */ public void cleanup() { for (TransactionAware txAware : toDiscard) { SingleThreadDatasetCache.this.discardSafely(txAware); } toDiscard.clear(); txContext = null; } @Override public void start() throws TransactionFailureException { if (txContext != null && txContext.getCurrentTransaction() != null) { LOG.warn("Starting a new transaction while the previous transaction {} is still on-going. ", txContext.getCurrentTransaction().getTransactionId()); cleanup(); } txContext = new TransactionContext(SingleThreadDatasetCache.this.txClient, txAwares); txContext.start(); } @Override public void finish() throws TransactionFailureException { // copied from TransactionContext so it behaves exactly the same in this case Preconditions.checkState(txContext != null, "Cannot finish tx that has not been started"); try { txContext.finish(); } finally { cleanup(); } } @Override public void checkpoint() throws TransactionFailureException { // copied from TransactionContext so it behaves exactly the same in this case Preconditions.checkState(txContext != null, "Cannot checkpoint tx that has not been started"); txContext.checkpoint(); } @Nullable @Override public Transaction getCurrentTransaction() { return txContext == null ? null : txContext.getCurrentTransaction(); } @Override public void abort(TransactionFailureException cause) throws TransactionFailureException { if (txContext == null) { // same behavior as Tephra's TransactionContext // might be called by some generic exception handler even though already aborted/finished - we allow that return; } try { txContext.abort(cause); } finally { cleanup(); } } } }