/* * Copyright 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data2.metadata.dataset; import co.cask.cdap.api.common.Bytes; import co.cask.cdap.api.dataset.lib.AbstractDataset; import co.cask.cdap.api.dataset.lib.IndexedTable; import co.cask.cdap.api.dataset.table.Delete; import co.cask.cdap.api.dataset.table.Put; import co.cask.cdap.api.dataset.table.Row; import co.cask.cdap.api.dataset.table.Scan; import co.cask.cdap.api.dataset.table.Scanner; import co.cask.cdap.common.utils.ImmutablePair; import co.cask.cdap.data2.dataset2.lib.table.FuzzyRowFilter; import co.cask.cdap.data2.dataset2.lib.table.MDSKey; import co.cask.cdap.data2.metadata.indexer.DefaultValueIndexer; import co.cask.cdap.data2.metadata.indexer.Indexer; import co.cask.cdap.data2.metadata.indexer.SchemaIndexer; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.codec.NamespacedIdCodec; import co.cask.cdap.proto.metadata.MetadataSearchTargetType; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.base.Splitter; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; import javax.annotation.Nullable; /** * Dataset that manages Metadata using an {@link IndexedTable}. */ public class MetadataDataset extends AbstractDataset { private static final Logger LOG = LoggerFactory.getLogger(MetadataDataset.class); private static final Gson GSON = new GsonBuilder() .registerTypeAdapter(Id.NamespacedId.class, new NamespacedIdCodec()) .create(); private static final Pattern SPACE_SEPARATOR_PATTERN = Pattern.compile("\\s+"); private static final String HISTORY_COLUMN = "h"; // column for metadata history private static final String VALUE_COLUMN = "v"; // column for metadata value private static final String TAGS_SEPARATOR = ","; // Fuzzy key is of form <row key, key mask>. We want to compare row keys. private static final Comparator<ImmutablePair<byte[], byte[]>> FUZZY_KEY_COMPARATOR = new Comparator<ImmutablePair<byte[], byte[]>>() { @Override public int compare(ImmutablePair<byte[], byte[]> o1, ImmutablePair<byte[], byte[]> o2) { return Bytes.compareTo(o1.getFirst(), o2.getFirst()); } }; static final String INDEX_COLUMN = "i"; // column for metadata indexes public static final String TAGS_KEY = "tags"; public static final String KEYVALUE_SEPARATOR = ":"; private final IndexedTable indexedTable; public MetadataDataset(IndexedTable indexedTable) { super("metadataDataset", indexedTable); this.indexedTable = indexedTable; } /** * Add new metadata. * * @param metadataEntry The value of the metadata to be saved. * @param indexer the indexer to use to create indexes for this {@link MetadataEntry} */ private void setMetadata(MetadataEntry metadataEntry, @Nullable Indexer indexer) { Id.NamespacedId targetId = metadataEntry.getTargetId(); // Put to the default column. write(targetId, metadataEntry, indexer == null ? new DefaultValueIndexer() : indexer); } /** * Sets a metadata property for the specified {@link Id.NamespacedId}. * * @param targetId The target Id: {@link Id.Application} / {@link Id.Program} / * {@link Id.DatasetInstance}/{@link Id.Stream} * @param key The metadata key to be added * @param value The metadata value to be added */ public void setProperty(Id.NamespacedId targetId, String key, String value) { setProperty(targetId, key, value, null); } /** * Sets a metadata property for the specified {@link Id.NamespacedId}. * * @param targetId The target Id: {@link Id.Application} / {@link Id.Program} / * {@link Id.DatasetInstance}/{@link Id.Stream} * @param key The metadata key to be added * @param value The metadata value to be added * @param indexer the indexer to use to create indexes for this key-value property */ public void setProperty(Id.NamespacedId targetId, String key, String value, @Nullable Indexer indexer) { setMetadata(new MetadataEntry(targetId, key, value), indexer); } /** * Replaces existing tags of the specified {@link Id.NamespacedId} with a new set of tags. * * @param targetId The target Id: app-id(ns+app) / program-id(ns+app+pgtype+pgm) / * dataset-id(ns+dataset)/stream-id(ns+stream) * @param tags the tags to set */ private void setTags(Id.NamespacedId targetId, String ... tags) { MetadataEntry tagsEntry = new MetadataEntry(targetId, TAGS_KEY, Joiner.on(TAGS_SEPARATOR).join(tags)); setMetadata(tagsEntry, null); } /** * Adds a new tag for the specified {@link Id.NamespacedId}. * * @param targetId the target Id: app-id(ns+app) / program-id(ns+app+pgtype+pgm) / * dataset-id(ns+dataset)/stream-id(ns+stream). * @param tagsToAdd the tags to add */ public void addTags(Id.NamespacedId targetId, String ... tagsToAdd) { Set<String> existingTags = getTags(targetId); Iterable<String> newTags = Iterables.concat(existingTags, Arrays.asList(tagsToAdd)); MetadataEntry newTagsEntry = new MetadataEntry(targetId, TAGS_KEY, Joiner.on(TAGS_SEPARATOR).join(newTags)); setMetadata(newTagsEntry, null); } /** * Return metadata based on target id, and key. * * @param targetId The id of the target * @param key The metadata key to get * @return instance of {@link MetadataEntry} for the target type, id, and key */ @Nullable private MetadataEntry getMetadata(Id.NamespacedId targetId, String key) { MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, key); Row row = indexedTable.get(mdsKey.getKey()); if (row.isEmpty()) { return null; } byte[] value = row.get(VALUE_COLUMN); if (value == null) { // This can happen when all tags are moved one by one. The row still exists, but the value is null. return null; } return new MetadataEntry(targetId, key, Bytes.toString(value)); } /** * Retrieve the {@link MetadataEntry} corresponding to the specified key for the {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which the {@link MetadataEntry} is to be retrieved * @param key the property key for which the {@link MetadataEntry} is to be retrieved * @return the {@link MetadataEntry} corresponding to the specified key for the {@link Id.NamespacedId} */ @Nullable public MetadataEntry getProperty(Id.NamespacedId targetId, String key) { return getMetadata(targetId, key); } /** * Retrieves the metadata for the specified {@link Id.NamespacedId}. * * @param targetId the specified {@link Id.NamespacedId} * @return a Map representing the metadata for the specified {@link Id.NamespacedId} */ private Map<String, String> getMetadata(Id.NamespacedId targetId) { String targetType = KeyHelper.getTargetType(targetId); MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null); byte[] startKey = mdsKey.getKey(); byte[] stopKey = Bytes.stopKeyForPrefix(startKey); Map<String, String> metadata = new HashMap<>(); Scanner scan = indexedTable.scan(startKey, stopKey); try { Row next; while ((next = scan.next()) != null) { String key = MdsKey.getMetadataKey(targetType, next.getRow()); byte[] value = next.get(VALUE_COLUMN); if (key == null || value == null) { continue; } metadata.put(key, Bytes.toString(value)); } return metadata; } finally { scan.close(); } } /** * Retrieves all the properties for the specified {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which properties are to be retrieved * @return the properties of the specified {@link Id.NamespacedId} */ public Map<String, String> getProperties(Id.NamespacedId targetId) { Map<String, String> properties = getMetadata(targetId); properties.remove(TAGS_KEY); // remove tags return properties; } /** * Retrieves all the tags for the specified {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which tags are to be retrieved * @return the tags of the specified {@link Id.NamespacedId} */ public Set<String> getTags(Id.NamespacedId targetId) { MetadataEntry tags = getMetadata(targetId, TAGS_KEY); if (tags == null) { return new HashSet<>(); } return splitTags(tags.getValue()); } private static HashSet<String> splitTags(String tags) { return Sets.newHashSet(Splitter.on(TAGS_SEPARATOR).omitEmptyStrings().trimResults().split(tags)); } /** * Removes all metadata for the specified {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which metadata is to be removed */ private void removeMetadata(Id.NamespacedId targetId) { removeMetadata(targetId, Predicates.<String>alwaysTrue()); } /** * Removes the specified keys from the metadata of the specified {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which the specified metadata keys are to be removed * @param keys the keys to remove from the metadata of the specified {@link Id.NamespacedId} */ private void removeMetadata(Id.NamespacedId targetId, String ... keys) { final Set<String> keySet = Sets.newHashSet(keys); removeMetadata(targetId, new Predicate<String>() { @Override public boolean apply(String input) { return keySet.contains(input); } }); } /** * Removes all keys that satisfy a given predicate from the metadata of the specified {@link Id.NamespacedId}. * * @param targetId the {@link Id.NamespacedId} for which keys are to be removed * @param filter the {@link Predicate} that should be satisfied to remove a key */ private void removeMetadata(Id.NamespacedId targetId, Predicate<String> filter) { String targetType = KeyHelper.getTargetType(targetId); MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null); byte[] prefix = mdsKey.getKey(); byte[] stopKey = Bytes.stopKeyForPrefix(prefix); List<String> deletedMetadataKeys = new LinkedList<>(); Scanner scan = indexedTable.scan(prefix, stopKey); try { Row next; while ((next = scan.next()) != null) { String value = next.getString(VALUE_COLUMN); if (value == null) { continue; } String metadataKey = MdsKey.getMetadataKey(targetType, next.getRow()); if (filter.apply(metadataKey)) { indexedTable.delete(new Delete(next.getRow())); // store the key to delete its indexes later deletedMetadataKeys.add(metadataKey); } } } finally { scan.close(); } // delete all the indexes for all deleted metadata key for (String deletedMetadataKey : deletedMetadataKeys) { deleteIndexes(targetId, deletedMetadataKey); } writeHistory(targetId); } /** * Deletes all indexes associated with a metadata key * * @param targetId the {@link Id.NamespacedId} for which keys are to be removed * @param metadataKey the key to remove from the metadata of the specified {@link Id.NamespacedId} */ private void deleteIndexes(Id.NamespacedId targetId, String metadataKey) { MDSKey mdsKey = MdsKey.getMDSIndexKey(targetId, metadataKey, null); byte[] startKey = mdsKey.getKey(); byte[] stopKey = Bytes.stopKeyForPrefix(startKey); Scanner scan = indexedTable.scan(startKey, stopKey); try { Row next; while ((next = scan.next()) != null) { deleteIndexRow(next); } } finally { scan.close(); } } /** * Removes the specified keys from the metadata properties of an entity. * * @param targetId the {@link Id.NamespacedId} from which to remove the specified keys * @param keys the keys to remove */ public void removeProperties(Id.NamespacedId targetId, String ... keys) { removeMetadata(targetId, keys); } /** * Removes the specified tags from the specified entity. * * @param targetId the {@link Id.NamespacedId} from which to remove the specified tags * @param tagsToRemove the tags to remove */ public void removeTags(Id.NamespacedId targetId, String... tagsToRemove) { Set<String> existingTags = getTags(targetId); if (existingTags.isEmpty()) { // nothing to remove return; } Iterables.removeAll(existingTags, Arrays.asList(tagsToRemove)); // call remove metadata for tags which will delete all the existing indexes for tags of this targetId removeMetadata(targetId, TAGS_KEY); setTags(targetId, Iterables.toArray(existingTags, String.class)); } /** * Removes all properties from the specified entity. * * @param targetId the {@link Id.NamespacedId} for which to remove the properties */ public void removeProperties(Id.NamespacedId targetId) { removeMetadata(targetId, new Predicate<String>() { @Override public boolean apply(String input) { return !TAGS_KEY.equals(input); } }); } /** * Removes all tags from the specified entity. * * @param targetId the {@link Id.NamespacedId} for which to remove the tags */ public void removeTags(Id.NamespacedId targetId) { removeMetadata(targetId, new Predicate<String>() { @Override public boolean apply(String input) { return TAGS_KEY.equals(input); } }); } /** * Returns the snapshot of the metadata for entities on or before the given time. * @param targetIds entity ids * @param timeMillis time in milliseconds * @return the snapshot of the metadata for entities on or before the given time */ public Set<Metadata> getSnapshotBeforeTime(Set<Id.NamespacedId> targetIds, long timeMillis) { ImmutableSet.Builder<Metadata> builder = ImmutableSet.builder(); for (Id.NamespacedId entityId : targetIds) { builder.add(getSnapshotBeforeTime(entityId, timeMillis)); } return builder.build(); } private Metadata getSnapshotBeforeTime(Id.NamespacedId targetId, long timeMillis) { byte[] scanStartKey = MdsHistoryKey.getMdsScanStartKey(targetId, timeMillis).getKey(); byte[] scanEndKey = MdsHistoryKey.getMdsScanEndKey(targetId).getKey(); // TODO: add limit to scan, we need only one row Scanner scanner = indexedTable.scan(scanStartKey, scanEndKey); try { Row next = scanner.next(); if (next != null) { return GSON.fromJson(next.getString(HISTORY_COLUMN), Metadata.class); } else { return new Metadata(targetId); } } finally { scanner.close(); } } /** * Returns metadata for a given set of entities * * @param targetIds entities for which metadata is required * @return map of entitiyId to set of metadata for that entity */ public Set<Metadata> getMetadata(Set<? extends Id.NamespacedId> targetIds) { if (targetIds.isEmpty()) { return Collections.emptySet(); } List<ImmutablePair<byte [], byte []>> fuzzyKeys = new ArrayList<>(); for (Id.NamespacedId targetId : targetIds) { fuzzyKeys.add(getFuzzyKeyFor(targetId)); } // Sort fuzzy keys Collections.sort(fuzzyKeys, FUZZY_KEY_COMPARATOR); // Scan using fuzzy filter. Scan returns one row per property. // Group the rows on entityId Multimap<Id.NamespacedId, MetadataEntry> metadataMap = HashMultimap.create(); byte[] start = fuzzyKeys.get(0).getFirst(); byte[] end = Bytes.stopKeyForPrefix(fuzzyKeys.get(fuzzyKeys.size() - 1).getFirst()); Scanner scan = indexedTable.scan(new Scan(start, end, new FuzzyRowFilter(fuzzyKeys))); try { Row next; while ((next = scan.next()) != null) { MetadataEntry metadataEntry = convertRow(next); if (metadataEntry != null) { metadataMap.put(metadataEntry.getTargetId(), metadataEntry); } } } finally { scan.close(); } // Create metadata objects for each entity from grouped rows Set<Metadata> metadataSet = new HashSet<>(); for (Map.Entry<Id.NamespacedId, Collection<MetadataEntry>> entry : metadataMap.asMap().entrySet()) { Map<String, String> properties = new HashMap<>(); Set<String> tags = Collections.emptySet(); for (MetadataEntry metadataEntry : entry.getValue()) { if (TAGS_KEY.equals(metadataEntry.getKey())) { tags = splitTags(metadataEntry.getValue()); } else { properties.put(metadataEntry.getKey(), metadataEntry.getValue()); } } metadataSet.add(new Metadata(entry.getKey(), properties, tags)); } return metadataSet; } @Nullable private MetadataEntry convertRow(Row row) { byte[] rowKey = row.getRow(); String targetType = MdsKey.getTargetType(rowKey); Id.NamespacedId namespacedId = MdsKey.getNamespacedIdFromKey(targetType, rowKey); String key = MdsKey.getMetadataKey(targetType, rowKey); byte[] value = row.get(VALUE_COLUMN); if (key == null || value == null) { return null; } return new MetadataEntry(namespacedId, key, Bytes.toString(value)); } private ImmutablePair<byte[], byte[]> getFuzzyKeyFor(Id.NamespacedId targetId) { // We need to create fuzzy pairs to match the first part of the key containing targetId MDSKey mdsKey = MdsKey.getMDSValueKey(targetId, null); byte[] keyBytes = mdsKey.getKey(); // byte array is automatically initialized to 0, which implies fixed match in fuzzy info // the row key after targetId doesn't need to be a match. // Workaround for HBASE-15676, need to have at least one 1 in the fuzzy filter byte[] infoBytes = new byte[keyBytes.length + 1]; infoBytes[infoBytes.length - 1] = 1; // the key array size and mask array size has to be equal so increase the size by 1 return new ImmutablePair<>(Bytes.concat(keyBytes, new byte[1]), infoBytes); } /** * Searches entities that match the specified search query in the specified namespace and {@link Id.Namespace#SYSTEM} * for the specified {@link MetadataSearchTargetType}. * * @param namespaceId the namespace to search in * @param searchQuery the search query, which could be of two forms: [key]:[value] or just [value] and can have '*' * at the end for a prefix search * @param types the {@link MetadataSearchTargetType} to restrict the search to, if empty all types are searched */ public List<MetadataEntry> search(String namespaceId, String searchQuery, Set<MetadataSearchTargetType> types) { boolean includeAllTypes = types.isEmpty() || types.contains(MetadataSearchTargetType.ALL); List<MetadataEntry> results = new ArrayList<>(); for (String searchTerm : getSearchTerms(namespaceId, searchQuery)) { Scanner scanner; if (searchTerm.endsWith("*")) { // if prefixed search get start and stop key byte[] startKey = Bytes.toBytes(searchTerm.substring(0, searchTerm.lastIndexOf("*"))); byte[] stopKey = Bytes.stopKeyForPrefix(startKey); scanner = indexedTable.scanByIndex(Bytes.toBytes(INDEX_COLUMN), startKey, stopKey); } else { byte[] value = Bytes.toBytes(searchTerm); scanner = indexedTable.readByIndex(Bytes.toBytes(INDEX_COLUMN), value); } try { Row next; while ((next = scanner.next()) != null) { String rowValue = next.getString(INDEX_COLUMN); if (rowValue == null) { continue; } final byte[] rowKey = next.getRow(); String targetType = MdsKey.getTargetType(rowKey); // Filter on target type if not set to include all types if (!includeAllTypes && !types.contains(MetadataSearchTargetType.valueOfSerializedForm(targetType))) { continue; } Id.NamespacedId targetId = MdsKey.getNamespacedIdFromKey(targetType, rowKey); String key = MdsKey.getMetadataKey(targetType, rowKey); MetadataEntry entry = getMetadata(targetId, key); results.add(entry); } } finally { scanner.close(); } } return results; } /** * Prepares search terms from the specified search query by * <ol> * <li>Splitting on {@link #SPACE_SEPARATOR_PATTERN} and trimming</li> * <li>Handling {@link #KEYVALUE_SEPARATOR}, so searches of the pattern key:value* can be supported</li> * <li>Prepending the result with the specified namespaceId and {@link Id.Namespace#SYSTEM} so the search can * be restricted to entities in the specified namespace and {@link Id.Namespace#SYSTEM}.</li> * </ol>t * * @param namespaceId the namespaceId to search in * @param searchQuery the user specified search query * @return formatted search query which is namespaced */ private Iterable<String> getSearchTerms(String namespaceId, String searchQuery) { List<String> searchTerms = new ArrayList<>(); for (String term : Splitter.on(SPACE_SEPARATOR_PATTERN).omitEmptyStrings().trimResults().split(searchQuery)) { String formattedSearchTerm = term.toLowerCase(); // if this is a key:value search remove spaces around the separator too if (formattedSearchTerm.contains(KEYVALUE_SEPARATOR)) { // split the search query in two parts on first occurrence of KEYVALUE_SEPARATOR and the trim the key and value String[] split = formattedSearchTerm.split(KEYVALUE_SEPARATOR, 2); formattedSearchTerm = split[0].trim() + KEYVALUE_SEPARATOR + split[1].trim(); } searchTerms.add(namespaceId + KEYVALUE_SEPARATOR + formattedSearchTerm); // for non-system namespaces, also add the system namespace, so entities from system namespace are surfaced // in the search results as well if (!Id.Namespace.SYSTEM.getId().equals(namespaceId)) { searchTerms.add(Id.Namespace.SYSTEM.getId() + KEYVALUE_SEPARATOR + formattedSearchTerm); } } return searchTerms; } private void write(Id.NamespacedId targetId, MetadataEntry entry, Indexer indexer) { String key = entry.getKey(); MDSKey mdsValueKey = MdsKey.getMDSValueKey(targetId, key); Put put = new Put(mdsValueKey.getKey()); // add the metadata value put.add(Bytes.toBytes(VALUE_COLUMN), Bytes.toBytes(entry.getValue())); indexedTable.put(put); storeIndexes(targetId, entry.getKey(), indexer.getIndexes(entry)); writeHistory(targetId); } /** * Store indexes for a {@link MetadataEntry} * * @param targetId the {@link Id.NamespacedId} from which the metadata indexes has to be stored * @param metadataKey the metadata key for which the indexes are to be stored * @param indexes {@link Set<String>} of indexes to store for this {@link MetadataEntry} */ private void storeIndexes(Id.NamespacedId targetId, String metadataKey, Set<String> indexes) { // Delete existing indexes for targetId-key deleteIndexes(targetId, metadataKey); for (String index : indexes) { // store the index with key of the metadata, so that we allow searches of the form [key]:[value] indexedTable.put(getIndexPut(targetId, metadataKey, metadataKey + KEYVALUE_SEPARATOR + index)); // store just the index value indexedTable.put(getIndexPut(targetId, metadataKey, index)); } } /** * Creates a {@link Put} for the a metadata index * * @param targetId the {@link Id.NamespacedId} from which the metadata index has to be created * @param metadataKey the key of the metadata entry * @param index the index for this metadata * @return {@link Put} which is a index row with the value to be indexed in the {@link #INDEX_COLUMN} */ private Put getIndexPut(Id.NamespacedId targetId, String metadataKey, String index) { MDSKey mdsIndexKey = MdsKey.getMDSIndexKey(targetId, metadataKey, index.toLowerCase()); String namespacedIndex = MdsKey.getNamespaceId(mdsIndexKey) + KEYVALUE_SEPARATOR + index.toLowerCase(); Put put = new Put(mdsIndexKey.getKey()); put.add(Bytes.toBytes(INDEX_COLUMN), Bytes.toBytes(namespacedIndex)); return put; } /** * Snapshots the metadata for the given targetId at the given time. * @param targetId target id for which metadata needs snapshotting */ private void writeHistory(Id.NamespacedId targetId) { Map<String, String> properties = getProperties(targetId); Set<String> tags = getTags(targetId); Metadata metadata = new Metadata(targetId, properties, tags); byte[] row = MdsHistoryKey.getMdsKey(targetId, System.currentTimeMillis()).getKey(); indexedTable.put(row, Bytes.toBytes(HISTORY_COLUMN), Bytes.toBytes(GSON.toJson(metadata))); } /** * Rebuilds all the indexes in the {@link MetadataDataset} in batches. * * @param startRowKey the key of the row to start the scan for the current batch with * @param limit the batch size * @return the row key of the last row scanned in the current batch, {@code null} if there are no more rows to scan. */ @Nullable public byte[] rebuildIndexes(@Nullable byte[] startRowKey, int limit) { // Now rebuild indexes for all values in the metadata dataset byte[] valueRowPrefix = MdsKey.getValueRowPrefix(); // If startRow is null, start at the beginning, else start at the provided start row startRowKey = startRowKey == null ? valueRowPrefix : startRowKey; // stopRowKey will always be the last row key with the valueRowPrefix byte[] stopRowKey = Bytes.stopKeyForPrefix(valueRowPrefix); Scanner scanner = indexedTable.scan(startRowKey, stopRowKey); Row row; try { while ((limit > 0) && (row = scanner.next()) != null) { byte[] rowKey = row.getRow(); String targetType = MdsKey.getTargetType(rowKey); Id.NamespacedId namespacedId = MdsKey.getNamespacedIdFromKey(targetType, rowKey); String metadataKey = MdsKey.getMetadataKey(targetType, rowKey); Indexer indexer = getIndexerForKey(metadataKey); MetadataEntry metadataEntry = getMetadata(namespacedId, metadataKey); if (metadataEntry == null) { LOG.warn("Found null metadata entry for a known metadata key {} for entity {} which has an index stored. " + "Ignoring.", metadataKey, namespacedId); continue; } Set<String> indexes = indexer.getIndexes(metadataEntry); // storeIndexes deletes old indexes storeIndexes(namespacedId, metadataKey, indexes); limit--; } Row startRowForNextBatch = scanner.next(); if (startRowForNextBatch == null) { return null; } return startRowForNextBatch.getRow(); } finally { scanner.close(); } } /** * Delete all indexes in the metadata dataset. * * @param limit the number of rows (indexes) to delete * @return the offset at which to start deletion */ public int deleteAllIndexes(int limit) { byte[] indexStartPrefix = MdsKey.getIndexRowPrefix(); byte[] indexStopPrefix = Bytes.stopKeyForPrefix(indexStartPrefix); int count = 0; Scanner scanner = indexedTable.scan(indexStartPrefix, indexStopPrefix); Row row; try { while (count < limit && ((row = scanner.next()) != null)) { if (deleteIndexRow(row)) { count++; } } } finally { scanner.close(); } return count; } // TODO: CDAP-5663 The entire logic of mapping between Indexers and keys should be made internal to MetadataDataset private Indexer getIndexerForKey(String key) { if ("schema".equals(key)) { return new SchemaIndexer(); } return new DefaultValueIndexer(); } /** * Deletes a row if the value in the index column is non-null. This is necessary because at least in the * InMemoryTable implementation, after deleting the index row, the index column still has a {@code null} value in it. * A {@link Scanner} on the table after the delete returns the deleted rows with {@code null} values. * * @param row the row to delete * @return {@code true} if the row was deleted, {@code false} otherwise */ private boolean deleteIndexRow(Row row) { if (row.get(INDEX_COLUMN) == null) { return false; } indexedTable.delete(new Delete(row.getRow())); return true; } }