/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.lineage; import co.cask.cdap.api.dataset.DatasetDefinition; import co.cask.cdap.api.dataset.DatasetManagementException; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.data.runtime.DataSetsModules; import co.cask.cdap.data2.datafabric.dataset.DatasetsUtil; import co.cask.cdap.data2.dataset2.DatasetFramework; import co.cask.cdap.data2.transaction.Transactions; import co.cask.cdap.proto.Id; import co.cask.tephra.TransactionExecutor; import co.cask.tephra.TransactionExecutorFactory; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.inject.Inject; import com.google.inject.name.Named; import java.io.IOException; import java.util.List; import java.util.Set; import javax.annotation.Nullable; /** * Store for storing/retrieving lineage information for a Dataset. */ public class LineageStore { private static final Id.DatasetInstance LINEAGE_DATASET_ID = Id.DatasetInstance.from(Id.Namespace.SYSTEM, "lineage"); private final TransactionExecutorFactory executorFactory; private final DatasetFramework datasetFramework; private final Id.DatasetInstance lineageDatasetId; @Inject public LineageStore(TransactionExecutorFactory executorFactory, @Named(DataSetsModules.BASIC_DATASET_FRAMEWORK) DatasetFramework datasetFramework) { this(executorFactory, datasetFramework, LINEAGE_DATASET_ID); } @VisibleForTesting public LineageStore(TransactionExecutorFactory executorFactory, DatasetFramework datasetFramework, Id.DatasetInstance lineageDatasetId) { this.executorFactory = executorFactory; this.datasetFramework = datasetFramework; this.lineageDatasetId = lineageDatasetId; } /** * Add a program-dataset access. * * @param run program run information * @param datasetInstance dataset accessed by the program * @param accessType access type * @param accessTimeMillis time of access */ public void addAccess(Id.Run run, Id.DatasetInstance datasetInstance, AccessType accessType, long accessTimeMillis) { addAccess(run, datasetInstance, accessType, accessTimeMillis, null); } /** * Add a program-dataset access. * * @param run program run information * @param datasetInstance dataset accessed by the program * @param accessType access type * @param accessTimeMillis time of access * @param component program component such as flowlet id, etc. */ public void addAccess(final Id.Run run, final Id.DatasetInstance datasetInstance, final AccessType accessType, final long accessTimeMillis, @Nullable final Id.NamespacedId component) { execute(new TransactionExecutor.Procedure<LineageDataset>() { @Override public void apply(LineageDataset input) throws Exception { input.addAccess(run, datasetInstance, accessType, accessTimeMillis, component); } }); } /** * Add a program-stream access. * * @param run program run information * @param stream stream accessed by the program * @param accessType access type * @param accessTimeMillis time of access */ public void addAccess(Id.Run run, Id.Stream stream, AccessType accessType, long accessTimeMillis) { addAccess(run, stream, accessType, accessTimeMillis, null); } /** * Add a program-stream access. * * @param run program run information * @param stream stream accessed by the program * @param accessType access type * @param accessTimeMillis time of access * @param component program component such as flowlet id, etc. */ public void addAccess(final Id.Run run, final Id.Stream stream, final AccessType accessType, final long accessTimeMillis, @Nullable final Id.NamespacedId component) { execute(new TransactionExecutor.Procedure<LineageDataset>() { @Override public void apply(LineageDataset input) throws Exception { input.addAccess(run, stream, accessType, accessTimeMillis, component); } }); } /** * @return a set of entities (program and data it accesses) associated with a program run. */ public Set<Id.NamespacedId> getEntitiesForRun(final Id.Run run) { return execute(new TransactionExecutor.Function<LineageDataset, Set<Id.NamespacedId>>() { @Override public Set<Id.NamespacedId> apply(LineageDataset input) throws Exception { return input.getEntitiesForRun(run); } }); } /** * Fetch program-dataset access information for a dataset for a given period. * * @param datasetInstance dataset for which to fetch access information * @param start start time period * @param end end time period * @param filter filter to be applied on result set * @return program-dataset access information */ public Set<Relation> getRelations(final Id.DatasetInstance datasetInstance, final long start, final long end, final Predicate<Relation> filter) { return execute(new TransactionExecutor.Function<LineageDataset, Set<Relation>>() { @Override public Set<Relation> apply(LineageDataset input) throws Exception { return input.getRelations(datasetInstance, start, end, filter); } }); } /** * Fetch program-stream access information for a dataset for a given period. * * @param stream stream for which to fetch access information * @param start start time period * @param end end time period * @param filter filter to be applied on result set * @return program-stream access information */ public Set<Relation> getRelations(final Id.Stream stream, final long start, final long end, final Predicate<Relation> filter) { return execute(new TransactionExecutor.Function<LineageDataset, Set<Relation>>() { @Override public Set<Relation> apply(LineageDataset input) throws Exception { return input.getRelations(stream, start, end, filter); } }); } /** * Fetch program-dataset access information for a program for a given period. * * @param program program for which to fetch access information * @param start start time period * @param end end time period * @param filter filter to be applied on result set * @return program-dataset access information */ public Set<Relation> getRelations(final Id.Program program, final long start, final long end, final Predicate<Relation> filter) { return execute(new TransactionExecutor.Function<LineageDataset, Set<Relation>>() { @Override public Set<Relation> apply(LineageDataset input) throws Exception { return input.getRelations(program, start, end, filter); } }); } /** * @return a set of access times (for program and data it accesses) associated with a program run. */ @VisibleForTesting public List<Long> getAccessTimesForRun(final Id.Run run) { return execute(new TransactionExecutor.Function<LineageDataset, List<Long>>() { @Override public List<Long> apply(LineageDataset input) throws Exception { return input.getAccessTimesForRun(run); } }); } private <T> T execute(TransactionExecutor.Function<LineageDataset, T> func) { LineageDataset lineageDataset = newLineageDataset(); TransactionExecutor txExecutor = Transactions.createTransactionExecutor(executorFactory, lineageDataset); return txExecutor.executeUnchecked(func, lineageDataset); } private void execute(TransactionExecutor.Procedure<LineageDataset> func) { LineageDataset lineageDataset = newLineageDataset(); TransactionExecutor txExecutor = Transactions.createTransactionExecutor(executorFactory, lineageDataset); txExecutor.executeUnchecked(func, lineageDataset); } private LineageDataset newLineageDataset() { try { return DatasetsUtil.getOrCreateDataset( datasetFramework, lineageDatasetId, LineageDataset.class.getName(), DatasetProperties.EMPTY, DatasetDefinition.NO_ARGUMENTS, null); } catch (Exception e) { throw Throwables.propagate(e); } } /** * Adds datasets and types to the given {@link DatasetFramework}. Used by the upgrade tool to upgrade Lineage Dataset. * * @param framework framework to add types and datasets to */ public static void setupDatasets(DatasetFramework framework) throws IOException, DatasetManagementException { framework.addInstance(LineageDataset.class.getName(), LINEAGE_DATASET_ID, DatasetProperties.EMPTY); } }