/* * Copyright 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.store; import co.cask.cdap.api.dataset.DatasetDefinition; import co.cask.cdap.api.dataset.DatasetManagementException; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.data.runtime.DataSetsModules; import co.cask.cdap.data2.audit.AuditPublisher; import co.cask.cdap.data2.audit.AuditPublishers; import co.cask.cdap.data2.audit.payload.builder.MetadataPayloadBuilder; import co.cask.cdap.data2.datafabric.dataset.DatasetsUtil; import co.cask.cdap.data2.dataset2.DatasetFramework; import co.cask.cdap.data2.metadata.dataset.Metadata; import co.cask.cdap.data2.metadata.dataset.MetadataDataset; import co.cask.cdap.data2.metadata.dataset.MetadataEntry; import co.cask.cdap.data2.metadata.indexer.Indexer; import co.cask.cdap.data2.metadata.publisher.MetadataChangePublisher; import co.cask.cdap.data2.transaction.Transactions; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.audit.AuditType; import co.cask.cdap.proto.metadata.MetadataChangeRecord; import co.cask.cdap.proto.metadata.MetadataRecord; import co.cask.cdap.proto.metadata.MetadataScope; import co.cask.cdap.proto.metadata.MetadataSearchResultRecord; import co.cask.cdap.proto.metadata.MetadataSearchTargetType; import co.cask.tephra.TransactionExecutor; import co.cask.tephra.TransactionExecutorFactory; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.google.inject.Inject; import com.google.inject.name.Named; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import javax.annotation.Nullable; /** * Implementation of {@link MetadataStore} used in distributed mode. */ public class DefaultMetadataStore implements MetadataStore { private static final Logger LOG = LoggerFactory.getLogger(DefaultMetadataStore.class); private static final Id.DatasetInstance BUSINESS_METADATA_INSTANCE_ID = Id.DatasetInstance.from(Id.Namespace.SYSTEM, "business.metadata"); private static final Id.DatasetInstance SYSTEM_METADATA_INSTANCE_ID = Id.DatasetInstance.from(Id.Namespace.SYSTEM, "system.metadata"); private static final Map<String, String> EMPTY_PROPERTIES = ImmutableMap.of(); private static final Set<String> EMPTY_TAGS = ImmutableSet.of(); private static final int BATCH_SIZE = 1000; private static final Comparator<Map.Entry<Id.NamespacedId, Integer>> SEARCH_RESULT_DESC_SCORE_COMPARATOR = new Comparator<Map.Entry<Id.NamespacedId, Integer>>() { @Override public int compare(Map.Entry<Id.NamespacedId, Integer> o1, Map.Entry<Id.NamespacedId, Integer> o2) { // sort in descending order return o2.getValue() - o1.getValue(); } }; private final TransactionExecutorFactory txExecutorFactory; private final DatasetFramework dsFramework; private final MetadataChangePublisher changePublisher; private AuditPublisher auditPublisher; @Inject DefaultMetadataStore(TransactionExecutorFactory txExecutorFactory, @Named(DataSetsModules.BASIC_DATASET_FRAMEWORK) DatasetFramework dsFramework, MetadataChangePublisher changePublisher) { this.txExecutorFactory = txExecutorFactory; this.dsFramework = dsFramework; this.changePublisher = changePublisher; } @SuppressWarnings("unused") @Inject(optional = true) public void setAuditPublisher(AuditPublisher auditPublisher) { this.auditPublisher = auditPublisher; } @Override public void setProperties(MetadataScope scope, Id.NamespacedId entityId, Map<String, String> properties) { setProperties(scope, entityId, properties, null); } /** * Adds/updates metadata for the specified {@link Id.NamespacedId}. */ @Override public void setProperties(final MetadataScope scope, final Id.NamespacedId entityId, final Map<String, String> properties, @Nullable final Indexer indexer) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { Map<String, String> existingProperties = input.getProperties(entityId); Set<String> existingTags = input.getTags(entityId); previousRef.set(new MetadataRecord(entityId, scope, existingProperties, existingTags)); for (Map.Entry<String, String> entry : properties.entrySet()) { input.setProperty(entityId, entry.getKey(), entry.getValue(), indexer); } } }, scope); final ImmutableMap.Builder<String, String> propAdditions = ImmutableMap.builder(); final ImmutableMap.Builder<String, String> propDeletions = ImmutableMap.builder(); MetadataRecord previousRecord = previousRef.get(); // Iterating over properties all over again, because we want to move the diff calculation outside the transaction. for (Map.Entry<String, String> entry : properties.entrySet()) { String existingValue = previousRecord.getProperties().get(entry.getKey()); if (existingValue != null && existingValue.equals(entry.getValue())) { // Value already exists and is the same as the value being passed. No update necessary. continue; } // At this point, its either an update of an existing property (1 addition + 1 deletion) or a new property. // If it is an update, then mark a single deletion. if (existingValue != null) { propDeletions.put(entry.getKey(), existingValue); } // In both update or new cases, mark a single addition. propAdditions.put(entry.getKey(), entry.getValue()); } publish(previousRecord, new MetadataRecord(entityId, scope, propAdditions.build(), EMPTY_TAGS), new MetadataRecord(entityId, scope, propDeletions.build(), EMPTY_TAGS)); } /** * Adds tags for the specified {@link Id.NamespacedId}. */ @Override public void addTags(final MetadataScope scope, final Id.NamespacedId entityId, final String... tagsToAdd) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { Map<String, String> existingProperties = input.getProperties(entityId); Set<String> existingTags = input.getTags(entityId); previousRef.set(new MetadataRecord(entityId, scope, existingProperties, existingTags)); input.addTags(entityId, tagsToAdd); } }, scope); publish(previousRef.get(), new MetadataRecord(entityId, scope, EMPTY_PROPERTIES, Sets.newHashSet(tagsToAdd)), new MetadataRecord(entityId, scope)); } @Override public Set<MetadataRecord> getMetadata(Id.NamespacedId entityId) { return ImmutableSet.of(getMetadata(MetadataScope.USER, entityId), getMetadata(MetadataScope.SYSTEM, entityId)); } @Override public MetadataRecord getMetadata(final MetadataScope scope, final Id.NamespacedId entityId) { return execute(new TransactionExecutor.Function<MetadataDataset, MetadataRecord>() { @Override public MetadataRecord apply(MetadataDataset input) throws Exception { Map<String, String> properties = input.getProperties(entityId); Set<String> tags = input.getTags(entityId); return new MetadataRecord(entityId, scope, properties, tags); } }, scope); } /** * @return a set of {@link MetadataRecord}s representing all the metadata (including properties and tags) * for the specified set of {@link Id.NamespacedId}s. */ @Override public Set<MetadataRecord> getMetadata(final MetadataScope scope, final Set<Id.NamespacedId> entityIds) { return execute(new TransactionExecutor.Function<MetadataDataset, Set<MetadataRecord>>() { @Override public Set<MetadataRecord> apply(MetadataDataset input) throws Exception { Set<MetadataRecord> metadataRecords = new HashSet<>(entityIds.size()); for (Id.NamespacedId entityId : entityIds) { Map<String, String> properties = input.getProperties(entityId); Set<String> tags = input.getTags(entityId); metadataRecords.add(new MetadataRecord(entityId, scope, properties, tags)); } return metadataRecords; } }, scope); } @Override public Map<String, String> getProperties(Id.NamespacedId entityId) { return ImmutableMap.<String, String>builder() .putAll(getProperties(MetadataScope.USER, entityId)) .putAll(getProperties(MetadataScope.SYSTEM, entityId)) .build(); } /** * @return the metadata for the specified {@link Id.NamespacedId} */ @Override public Map<String, String> getProperties(MetadataScope scope, final Id.NamespacedId entityId) { return execute(new TransactionExecutor.Function<MetadataDataset, Map<String, String>>() { @Override public Map<String, String> apply(MetadataDataset input) throws Exception { return input.getProperties(entityId); } }, scope); } @Override public Set<String> getTags(Id.NamespacedId entityId) { return ImmutableSet.<String>builder() .addAll(getTags(MetadataScope.USER, entityId)) .addAll(getTags(MetadataScope.SYSTEM, entityId)) .build(); } /** * @return the tags for the specified {@link Id.NamespacedId} */ @Override public Set<String> getTags(MetadataScope scope, final Id.NamespacedId entityId) { return execute(new TransactionExecutor.Function<MetadataDataset, Set<String>>() { @Override public Set<String> apply(MetadataDataset input) throws Exception { return input.getTags(entityId); } }, scope); } @Override public void removeMetadata(Id.NamespacedId entityId) { removeMetadata(MetadataScope.USER, entityId); removeMetadata(MetadataScope.SYSTEM, entityId); } /** * Removes all metadata (including properties and tags) for the specified {@link Id.NamespacedId}. */ @Override public void removeMetadata(final MetadataScope scope, final Id.NamespacedId entityId) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { previousRef.set(new MetadataRecord(entityId, scope, input.getProperties(entityId), input.getTags(entityId))); input.removeProperties(entityId); input.removeTags(entityId); } }, scope); MetadataRecord previous = previousRef.get(); publish(previous, new MetadataRecord(entityId, scope), new MetadataRecord(previous)); } /** * Removes all properties for the specified {@link Id.NamespacedId}. */ @Override public void removeProperties(final MetadataScope scope, final Id.NamespacedId entityId) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { previousRef.set(new MetadataRecord(entityId, scope, input.getProperties(entityId), input.getTags(entityId))); input.removeProperties(entityId); } }, scope); publish(previousRef.get(), new MetadataRecord(entityId, scope), new MetadataRecord(entityId, scope, previousRef.get().getProperties(), EMPTY_TAGS)); } /** * Removes the specified properties of the {@link Id.NamespacedId}. */ @Override public void removeProperties(final MetadataScope scope, final Id.NamespacedId entityId, final String... keys) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); final ImmutableMap.Builder<String, String> deletesBuilder = ImmutableMap.builder(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { previousRef.set(new MetadataRecord(entityId, scope, input.getProperties(entityId), input.getTags(entityId))); for (String key : keys) { MetadataEntry record = input.getProperty(entityId, key); if (record == null) { continue; } deletesBuilder.put(record.getKey(), record.getValue()); } input.removeProperties(entityId, keys); } }, scope); publish(previousRef.get(), new MetadataRecord(entityId, scope), new MetadataRecord(entityId, scope, deletesBuilder.build(), EMPTY_TAGS)); } /** * Removes all the tags from the {@link Id.NamespacedId} */ @Override public void removeTags(final MetadataScope scope, final Id.NamespacedId entityId) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { previousRef.set(new MetadataRecord(entityId, scope, input.getProperties(entityId), input.getTags(entityId))); input.removeTags(entityId); } }, scope); MetadataRecord previous = previousRef.get(); publish(previous, new MetadataRecord(entityId, scope), new MetadataRecord(entityId, scope, EMPTY_PROPERTIES, previous.getTags())); } /** * Removes the specified tags from the {@link Id.NamespacedId} */ @Override public void removeTags(final MetadataScope scope, final Id.NamespacedId entityId, final String ... tagsToRemove) { final AtomicReference<MetadataRecord> previousRef = new AtomicReference<>(); execute(new TransactionExecutor.Procedure<MetadataDataset>() { @Override public void apply(MetadataDataset input) throws Exception { previousRef.set(new MetadataRecord(entityId, scope, input.getProperties(entityId), input.getTags(entityId))); input.removeTags(entityId, tagsToRemove); } }, scope); publish(previousRef.get(), new MetadataRecord(entityId, scope), new MetadataRecord(entityId, scope, EMPTY_PROPERTIES, Sets.newHashSet(tagsToRemove))); } @Override public Set<MetadataSearchResultRecord> searchMetadata(String namespaceId, String searchQuery) { return ImmutableSet.<MetadataSearchResultRecord>builder() .addAll(searchMetadata(MetadataScope.USER, namespaceId, searchQuery)) .addAll(searchMetadata(MetadataScope.SYSTEM, namespaceId, searchQuery)) .build(); } @Override public Set<MetadataSearchResultRecord> searchMetadata(MetadataScope scope, String namespaceId, String searchQuery) { return searchMetadataOnType(scope, namespaceId, searchQuery, ImmutableSet.of(MetadataSearchTargetType.ALL)); } @Override public Set<MetadataSearchResultRecord> searchMetadataOnType(String namespaceId, String searchQuery, Set<MetadataSearchTargetType> types) { return ImmutableSet.<MetadataSearchResultRecord>builder() .addAll(searchMetadataOnType(MetadataScope.USER, namespaceId, searchQuery, types)) .addAll(searchMetadataOnType(MetadataScope.SYSTEM, namespaceId, searchQuery, types)) .build(); } @Override public Set<MetadataSearchResultRecord> searchMetadataOnType(final MetadataScope scope, final String namespaceId, final String searchQuery, final Set<MetadataSearchTargetType> types) { // Execute search query Iterable<MetadataEntry> results = execute(new TransactionExecutor.Function<MetadataDataset, Iterable<MetadataEntry>>() { @Override public Iterable<MetadataEntry> apply(MetadataDataset input) throws Exception { return input.search(namespaceId, searchQuery, types); } }, scope); // Score results final Map<Id.NamespacedId, Integer> weightedResults = new HashMap<>(); for (MetadataEntry metadataEntry : results) { Integer score = weightedResults.get(metadataEntry.getTargetId()); score = score == null ? 0 : score; weightedResults.put(metadataEntry.getTargetId(), score + 1); } // Sort the results by score List<Map.Entry<Id.NamespacedId, Integer>> resultList = new ArrayList<>(weightedResults.entrySet()); Collections.sort(resultList, SEARCH_RESULT_DESC_SCORE_COMPARATOR); // Fetch metadata for entities in the result list // Note: since the fetch is happening in a different transaction, the metadata for entities may have been // removed. It is okay not to have metadata for some results in case this happens. Map<Id.NamespacedId, Metadata> systemMetadata = fetchMetadata(weightedResults.keySet(), MetadataScope.SYSTEM); Map<Id.NamespacedId, Metadata> userMetadata = fetchMetadata(weightedResults.keySet(), MetadataScope.USER); return addMetadataToResults(resultList, systemMetadata, userMetadata); } private Map<Id.NamespacedId, Metadata> fetchMetadata(final Set<Id.NamespacedId> entityIds, MetadataScope scope) { Set<Metadata> metadataSet = execute(new TransactionExecutor.Function<MetadataDataset, Set<Metadata>>() { @Override public Set<Metadata> apply(MetadataDataset input) throws Exception { return input.getMetadata(entityIds); } }, scope); Map<Id.NamespacedId, Metadata> metadataMap = new HashMap<>(); for (Metadata m : metadataSet) { metadataMap.put(m.getEntityId(), m); } return metadataMap; } Set<MetadataSearchResultRecord> addMetadataToResults(List<Map.Entry<Id.NamespacedId, Integer>> results, Map<Id.NamespacedId, Metadata> systemMetadata, Map<Id.NamespacedId, Metadata> userMetadata) { Set<MetadataSearchResultRecord> result = new LinkedHashSet<>(); for (Map.Entry<Id.NamespacedId, Integer> entry : results) { ImmutableMap.Builder<MetadataScope, co.cask.cdap.proto.metadata.Metadata> builder = ImmutableMap.builder(); // Add system metadata Metadata metadata = systemMetadata.get(entry.getKey()); if (metadata != null) { builder.put(MetadataScope.SYSTEM, new co.cask.cdap.proto.metadata.Metadata(metadata.getProperties(), metadata.getTags())); } // Add user metadata metadata = userMetadata.get(entry.getKey()); if (metadata != null) { builder.put(MetadataScope.USER, new co.cask.cdap.proto.metadata.Metadata(metadata.getProperties(), metadata.getTags())); } // Create result result.add(new MetadataSearchResultRecord(entry.getKey(), builder.build())); } return result; } @Override public Set<MetadataRecord> getSnapshotBeforeTime(final Set<Id.NamespacedId> entityIds, final long timeMillis) { return ImmutableSet.<MetadataRecord>builder() .addAll(getSnapshotBeforeTime(MetadataScope.USER, entityIds, timeMillis)) .addAll(getSnapshotBeforeTime(MetadataScope.SYSTEM, entityIds, timeMillis)) .build(); } @Override public Set<MetadataRecord> getSnapshotBeforeTime(MetadataScope scope, final Set<Id.NamespacedId> entityIds, final long timeMillis) { Set<Metadata> metadataHistoryEntries = execute(new TransactionExecutor.Function<MetadataDataset, Set<Metadata>>() { @Override public Set<Metadata> apply(MetadataDataset input) throws Exception { return input.getSnapshotBeforeTime(entityIds, timeMillis); } }, scope); ImmutableSet.Builder<MetadataRecord> builder = ImmutableSet.builder(); for (Metadata metadata : metadataHistoryEntries) { builder.add(new MetadataRecord(metadata.getEntityId(), scope, metadata.getProperties(), metadata.getTags())); } return builder.build(); } @Override public void rebuildIndexes() { byte[] row = null; while ((row = rebuildIndex(row, MetadataScope.SYSTEM)) != null) { LOG.debug("Completed a batch for rebuilding system metadata indexes."); } while ((row = rebuildIndex(row, MetadataScope.USER)) != null) { LOG.debug("Completed a batch for rebuilding business metadata indexes."); } } @Override public void deleteAllIndexes() { while (deleteBatch(MetadataScope.SYSTEM) != 0) { LOG.debug("Deleted a batch of system metadata indexes."); } while (deleteBatch(MetadataScope.USER) != 0) { LOG.debug("Deleted a batch of business metadata indexes."); } } private void publish(MetadataRecord previous, MetadataRecord additions, MetadataRecord deletions) { MetadataChangeRecord.MetadataDiffRecord diff = new MetadataChangeRecord.MetadataDiffRecord(additions, deletions); MetadataChangeRecord changeRecord = new MetadataChangeRecord(previous, diff, System.currentTimeMillis()); changePublisher.publish(changeRecord); publishAudit(previous, additions, deletions); } private void publishAudit(MetadataRecord previous, MetadataRecord additions, MetadataRecord deletions) { MetadataPayloadBuilder builder = new MetadataPayloadBuilder(); builder.addPrevious(previous); builder.addAdditions(additions); builder.addDeletions(deletions); AuditPublishers.publishAudit(auditPublisher, previous.getEntityId(), AuditType.METADATA_CHANGE, builder.build()); } private <T> T execute(TransactionExecutor.Function<MetadataDataset, T> func, MetadataScope scope) { MetadataDataset metadataDataset = newMetadataDataset(scope); TransactionExecutor txExecutor = Transactions.createTransactionExecutor(txExecutorFactory, metadataDataset); return txExecutor.executeUnchecked(func, metadataDataset); } private void execute(TransactionExecutor.Procedure<MetadataDataset> func, MetadataScope scope) { MetadataDataset metadataDataset = newMetadataDataset(scope); TransactionExecutor txExecutor = Transactions.createTransactionExecutor(txExecutorFactory, metadataDataset); txExecutor.executeUnchecked(func, metadataDataset); } private byte[] rebuildIndex(final byte[] startRowKey, MetadataScope scope) { return execute(new TransactionExecutor.Function<MetadataDataset, byte[]>() { @Override public byte[] apply(MetadataDataset input) throws Exception { return input.rebuildIndexes(startRowKey, BATCH_SIZE); } }, scope); } private int deleteBatch(MetadataScope scope) { return execute(new TransactionExecutor.Function<MetadataDataset, Integer>() { @Override public Integer apply(MetadataDataset input) throws Exception { return input.deleteAllIndexes(BATCH_SIZE); } }, scope); } private MetadataDataset newMetadataDataset(MetadataScope scope) { try { return DatasetsUtil.getOrCreateDataset( dsFramework, getMetadataDatasetInstance(scope), MetadataDataset.class.getName(), DatasetProperties.EMPTY, DatasetDefinition.NO_ARGUMENTS, null); } catch (Exception e) { throw Throwables.propagate(e); } } private Id.DatasetInstance getMetadataDatasetInstance(MetadataScope scope) { return MetadataScope.USER == scope ? BUSINESS_METADATA_INSTANCE_ID : SYSTEM_METADATA_INSTANCE_ID; } /** * Adds datasets and types to the given {@link DatasetFramework}. Used by the upgrade tool to upgrade Metadata * Datasets. * * @param framework Dataset framework to add types and datasets to */ public static void setupDatasets(DatasetFramework framework) throws IOException, DatasetManagementException { framework.addInstance(MetadataDataset.class.getName(), BUSINESS_METADATA_INSTANCE_ID, DatasetProperties.EMPTY); framework.addInstance(MetadataDataset.class.getName(), SYSTEM_METADATA_INSTANCE_ID, DatasetProperties.EMPTY); } }