SecondaryIndexManager.java example

Explorer
CassandraQoS-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.index;

import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.*;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.compaction.CompactionManager;
import org.apache.cassandra.db.filter.ExtendedFilter;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.io.sstable.ReducingKeyIterator;
import org.apache.cassandra.io.sstable.SSTableReader;
import org.apache.cassandra.thrift.IndexExpression;
import org.apache.cassandra.thrift.IndexType;
import org.apache.cassandra.utils.FBUtilities;

/**
 * Manages all the indexes associated with a given CFS
 * Different types of indexes can be created across the same CF
 */
public class SecondaryIndexManager
{
    private static final Logger logger = LoggerFactory.getLogger(SecondaryIndexManager.class);

    public static final Updater nullUpdater = new Updater()
    {
        public void insert(Column column) { }

        public void update(Column oldColumn, Column column) { }

        public void remove(Column current) { }

        public void updateRowLevelIndexes() {}
    };

    /**
     * Organizes the indexes by column name
     */
    private final ConcurrentNavigableMap<ByteBuffer, SecondaryIndex> indexesByColumn;

    /**
     * Keeps a single instance of a SecondaryIndex for many columns when the index type
     * has isRowLevelIndex() == true
     *
     * This allows updates to happen to an entire row at once
     */
    private final ConcurrentMap<Class<? extends SecondaryIndex>, SecondaryIndex> rowLevelIndexMap;

    /**
     * Keeps all secondary index instances, either per-column or per-row
     */
    private final Set<SecondaryIndex> allIndexes;

    /**
     * The underlying column family containing the source data for these indexes
     */
    public final ColumnFamilyStore baseCfs;

    public SecondaryIndexManager(ColumnFamilyStore baseCfs)
    {
        indexesByColumn = new ConcurrentSkipListMap<>();
        rowLevelIndexMap = new ConcurrentHashMap<>();
        allIndexes = Collections.newSetFromMap(new ConcurrentHashMap<SecondaryIndex, Boolean>());

        this.baseCfs = baseCfs;
    }

    /**
     * Drops and adds new indexes associated with the underlying CF
     */
    public void reload()
    {
        // figure out what needs to be added and dropped.
        // future: if/when we have modifiable settings for secondary indexes,
        // they'll need to be handled here.
        Collection<ByteBuffer> indexedColumnNames = indexesByColumn.keySet();
        for (ByteBuffer indexedColumn : indexedColumnNames)
        {
            ColumnDefinition def = baseCfs.metadata.getColumnDefinition(indexedColumn);
            if (def == null || def.getIndexType() == null)
                removeIndexedColumn(indexedColumn);
        }

        // TODO: allow all ColumnDefinition type
        for (ColumnDefinition cdef : baseCfs.metadata.allColumns())
            if (cdef.getIndexType() != null && !indexedColumnNames.contains(cdef.name))
                addIndexedColumn(cdef);

        for (SecondaryIndex index : allIndexes)
            index.reload();
    }

    public Set<String> allIndexesNames()
    {
        Set<String> names = new HashSet<>(allIndexes.size());
        for (SecondaryIndex index : allIndexes)
            names.add(index.getIndexName());
        return names;
    }

    /**
     * Does a full, blocking rebuild of the indexes specified by columns from the sstables.
     * Does nothing if columns is empty.
     *
     * Caller must acquire and release references to the sstables used here.
     *
     * @param sstables the data to build from
     * @param idxNames the list of columns to index, ordered by comparator
     */
    public void maybeBuildSecondaryIndexes(Collection<SSTableReader> sstables, Set<String> idxNames)
    {
        idxNames = filterByColumn(idxNames);
        if (idxNames.isEmpty())
            return;        

        logger.info(String.format("Submitting index build of %s for data in %s",
                                  idxNames, StringUtils.join(sstables, ", ")));

        SecondaryIndexBuilder builder = new SecondaryIndexBuilder(baseCfs, idxNames, new ReducingKeyIterator(sstables));
        Future<?> future = CompactionManager.instance.submitIndexBuild(builder);
        FBUtilities.waitOnFuture(future);

        flushIndexesBlocking();

        logger.info("Index build of " + idxNames + " complete");
    }

    public boolean indexes(ByteBuffer name, Set<SecondaryIndex> indexes)
    {
        boolean matching = false;
        for (SecondaryIndex index : indexes)
        {
            if (index.indexes(name))
            {
                matching = true;
                break;
            }
        }
        return matching;
    }

    public Set<SecondaryIndex> indexFor(ByteBuffer name, Set<SecondaryIndex> indexes)
    {
        Set<SecondaryIndex> matching = null;
        for (SecondaryIndex index : indexes)
        {
            if (index.indexes(name))
            {
                if (matching == null)
                    matching = new HashSet<>();
                matching.add(index);
            }
        }
        return matching == null ? Collections.<SecondaryIndex>emptySet() : matching;
    }

    public boolean indexes(Column column)
    {
        return indexes(column.name());
    }

    public boolean indexes(ByteBuffer name)
    {
        return indexes(name, allIndexes);
    }

    public Set<SecondaryIndex> indexFor(ByteBuffer name)
    {
        return indexFor(name, allIndexes);
    }

    /**
     * @return true if the indexes can handle the clause.
     */
    public boolean hasIndexFor(List<IndexExpression> clause)
    {
        if (clause == null || clause.isEmpty())
            return false;

        List<SecondaryIndexSearcher> searchers = getIndexSearchersForQuery(clause);
        if (searchers.isEmpty())
            return false;

        for (SecondaryIndexSearcher searcher : searchers)
            if (searcher.isIndexing(clause))
                return true;

        return false;
    }

    /**
     * Removes a existing index
     * @param column the indexed column to remove
     */
    public void removeIndexedColumn(ByteBuffer column)
    {
        SecondaryIndex index = indexesByColumn.remove(column);

        if (index == null)
            return;

        // Remove this column from from row level index map as well as all indexes set
        if (index instanceof PerRowSecondaryIndex)
        {
            index.removeColumnDef(column);

            // If no columns left remove from row level lookup as well as all indexes set
            if (index.getColumnDefs().isEmpty())
            {
                allIndexes.remove(index);
                rowLevelIndexMap.remove(index.getClass());
            }
        }
        else
        {
            allIndexes.remove(index);
        }

        index.removeIndex(column);
        SystemKeyspace.setIndexRemoved(baseCfs.metadata.ksName, index.getNameForSystemKeyspace(column));
    }

    /**
     * Adds and builds a index for a column
     * @param cdef the column definition holding the index data
     * @return a future which the caller can optionally block on signaling the index is built
     */
    public synchronized Future<?> addIndexedColumn(ColumnDefinition cdef)
    {

        if (indexesByColumn.containsKey(cdef.name))
            return null;

        assert cdef.getIndexType() != null;

        SecondaryIndex index;
        try
        {
            index = SecondaryIndex.createInstance(baseCfs, cdef);
        }
        catch (ConfigurationException e)
        {
            throw new RuntimeException(e);
        }

        // Keep a single instance of the index per-cf for row level indexes
        // since we want all columns to be under the index
        if (index instanceof PerRowSecondaryIndex)
        {
            SecondaryIndex currentIndex = rowLevelIndexMap.get(index.getClass());

            if (currentIndex == null)
            {
                rowLevelIndexMap.put(index.getClass(), index);
                index.init();
            }
            else
            {
                index = currentIndex;
                index.addColumnDef(cdef);
                logger.info("Creating new index : {}",cdef);
            }
        }
        else
        {
            // TODO: We sould do better than throw a RuntimeException
            if (cdef.getIndexType() == IndexType.CUSTOM && index instanceof AbstractSimplePerColumnSecondaryIndex)
                throw new RuntimeException("Cannot use a subclass of AbstractSimplePerColumnSecondaryIndex as a CUSTOM index, as they assume they are CFS backed");
            index.init();
        }

        // link in indexedColumns. this means that writes will add new data to
        // the index immediately,
        // so we don't have to lock everything while we do the build. it's up to
        // the operator to wait
        // until the index is actually built before using in queries.
        indexesByColumn.put(cdef.name, index);

        // Add to all indexes set:
        allIndexes.add(index);

        // if we're just linking in the index to indexedColumns on an
        // already-built index post-restart, we're done
        if (index.isIndexBuilt(cdef.name))
            return null;

        return index.buildIndexAsync();
    }

    /**
     *
     * @param column the name of indexes column
     * @return the index
     */
    public SecondaryIndex getIndexForColumn(ByteBuffer column)
    {
        return indexesByColumn.get(column);
    }

    /**
     * Remove the index
     */
    public void invalidate()
    {
        for (SecondaryIndex index : allIndexes)
            index.invalidate();
    }

    /**
     * Flush all indexes to disk
     */
    public void flushIndexesBlocking()
    {
        for (SecondaryIndex index : allIndexes)
            index.forceBlockingFlush();
    }

    /**
     * @return all built indexes (ready to use)
     */
    public List<String> getBuiltIndexes()
    {
        List<String> indexList = new ArrayList<>();

        for (Map.Entry<ByteBuffer, SecondaryIndex> entry : indexesByColumn.entrySet())
        {
            SecondaryIndex index = entry.getValue();

            if (index.isIndexBuilt(entry.getKey()))
                indexList.add(entry.getValue().getIndexName());
        }

        return indexList;
    }

    /**
     * @return all CFS from indexes which use a backing CFS internally (KEYS)
     */
    public Set<ColumnFamilyStore> getIndexesBackedByCfs()
    {
        Set<ColumnFamilyStore> cfsList = new HashSet<>();

        for (SecondaryIndex index: allIndexes)
        {
            ColumnFamilyStore cfs = index.getIndexCfs();
            if (cfs != null)
                cfsList.add(cfs);
        }

        return cfsList;
    }

    /**
     * @return all indexes which do *not* use a backing CFS internally
     */
    public Set<SecondaryIndex> getIndexesNotBackedByCfs()
    {
        // we use identity map because per row indexes use same instance across many columns
        Set<SecondaryIndex> indexes = Collections.newSetFromMap(new IdentityHashMap<SecondaryIndex, Boolean>());
        for (SecondaryIndex index: allIndexes)
            if (index.getIndexCfs() == null)
                indexes.add(index);
        return indexes;
    }

    /**
     * @return all of the secondary indexes without distinction to the (non-)backed by secondary ColumnFamilyStore.
     */
    public Set<SecondaryIndex> getIndexes()
    {
        return allIndexes;
    }

    /**
     * @return if there are ANY indexes for this table..
     */
    public boolean hasIndexes()
    {
        return !indexesByColumn.isEmpty();
    }

    /**
     * @return total current ram size of all indexes
     */
    public long getTotalLiveSize()
    {
        long total = 0;
        for (SecondaryIndex index : getIndexes())
            total += index.getLiveSize();
        return total;
    }

    /**
     * When building an index against existing data, add the given row to the index
     *
     * @param key the row key
     * @param cf the current rows data
     */
    public void indexRow(ByteBuffer key, ColumnFamily cf)
    {
        // Update entire row only once per row level index
        Set<Class<? extends SecondaryIndex>> appliedRowLevelIndexes = null;

        for (SecondaryIndex index : allIndexes)
        {
            if (index instanceof PerRowSecondaryIndex)
            {
                if (appliedRowLevelIndexes == null)
                    appliedRowLevelIndexes = new HashSet<>();

                if (appliedRowLevelIndexes.add(index.getClass()))
                    ((PerRowSecondaryIndex)index).index(key, cf);
            }
            else
            {
                for (Column column : cf)
                    if (column.isLive(System.currentTimeMillis()) && index.indexes(column.name()))
                        ((PerColumnSecondaryIndex) index).insert(key, column);
            }
        }
    }

    /**
     * Delete all columns from all indexes for this row.  For when cleanup rips a row out entirely.
     *
     * @param key the row key
     * @param indexedColumnsInRow all column names in row
     */
    public void deleteFromIndexes(DecoratedKey key, List<Column> indexedColumnsInRow)
    {
        // Update entire row only once per row level index
        Set<Class<? extends SecondaryIndex>> cleanedRowLevelIndexes = null;

        for (Column column : indexedColumnsInRow)
        {
            for (SecondaryIndex index : indexFor(column.name()))
            {
                if (index instanceof PerRowSecondaryIndex)
                {
                    if (cleanedRowLevelIndexes == null)
                        cleanedRowLevelIndexes = new HashSet<>();
                    if (cleanedRowLevelIndexes.add(index.getClass()))
                        ((PerRowSecondaryIndex) index).delete(key);
                }
                else
                {
                    ((PerColumnSecondaryIndex) index).deleteForCleanup(key.key, column);
                }
            }
        }
    }

    /**
     * This helper acts as a closure around the indexManager
     * and updated cf data to ensure that down in
     * Memtable's ColumnFamily implementation, the index
     * can get updated. Note: only a CF backed by AtomicSortedColumns implements
     * this behaviour fully, other types simply ignore the index updater.
     */
    public Updater updaterFor(DecoratedKey key, ColumnFamily cf)
    {
        return (indexesByColumn.isEmpty() && rowLevelIndexMap.isEmpty())
                ? nullUpdater
                : new StandardUpdater(key, cf);
    }

    /**
     * Updated closure with only the modified row key.
     */
    public Updater updaterFor(DecoratedKey key)
    {
        return updaterFor(key, null);
    }

    /**
     * Get a list of IndexSearchers from the union of expression index types
     * @param clause the query clause
     * @return the searchers needed to query the index
     */
    private List<SecondaryIndexSearcher> getIndexSearchersForQuery(List<IndexExpression> clause)
    {
        Map<String, Set<ByteBuffer>> groupByIndexType = new HashMap<>();

        //Group columns by type
        for (IndexExpression ix : clause)
        {
            SecondaryIndex index = getIndexForColumn(ix.column_name);

            if (index == null)
                continue;

            Set<ByteBuffer> columns = groupByIndexType.get(index.indexTypeForGrouping());

            if (columns == null)
            {
                columns = new HashSet<>();
                groupByIndexType.put(index.indexTypeForGrouping(), columns);
            }

            columns.add(ix.column_name);
        }

        List<SecondaryIndexSearcher> indexSearchers = new ArrayList<>(groupByIndexType.size());

        //create searcher per type
        for (Set<ByteBuffer> column : groupByIndexType.values())
            indexSearchers.add(getIndexForColumn(column.iterator().next()).createSecondaryIndexSearcher(column));

        return indexSearchers;
    }

    /**
     * Performs a search across a number of column indexes
     * TODO: add support for querying across index types
     *
     * @param filter the column range to restrict to
     * @return found indexed rows
     */
    public List<Row> search(ExtendedFilter filter)
    {
        List<SecondaryIndexSearcher> indexSearchers = getIndexSearchersForQuery(filter.getClause());

        if (indexSearchers.isEmpty())
            return Collections.emptyList();

        //We currently don't support searching across multiple index types
        if (indexSearchers.size() > 1)
            throw new RuntimeException("Unable to search across multiple secondary index types");

        return indexSearchers.get(0).search(filter);
    }

    public Set<SecondaryIndex> getIndexesByNames(Set<String> idxNames)
    {
        Set<SecondaryIndex> result = new HashSet<>();
        for (SecondaryIndex index : allIndexes)
            if (idxNames.contains(index.getIndexName()))
                result.add(index);
        return result;
    }

    public void setIndexBuilt(Set<String> idxNames)
    {
        for (SecondaryIndex index : getIndexesByNames(idxNames))
            index.setIndexBuilt();
    }

    public void setIndexRemoved(Set<String> idxNames)
    {
        for (SecondaryIndex index : getIndexesByNames(idxNames))
            index.setIndexRemoved();
    }

    public SecondaryIndex validate(ByteBuffer rowKey, Column column)
    {
        for (SecondaryIndex index : indexFor(column.name()))
            if (!index.validate(rowKey, column))
                return index;
        return null;
    }

    private Set<String> filterByColumn(Set<String> idxNames)
    {
        Set<SecondaryIndex> indexes = getIndexesByNames(idxNames);
        Set<String> filtered = new HashSet<>(idxNames.size());
        for (SecondaryIndex candidate : indexes)
        {
            for (ColumnDefinition column : baseCfs.metadata.allColumns())
            {
                if (candidate.indexes(column))
                {
                    filtered.add(candidate.getIndexName());
                    break;
                }
            }
        }
        return filtered;
    }

    public static interface Updater
    {
        /** called when constructing the index against pre-existing data */
        public void insert(Column column);

        /** called when updating the index from a memtable */
        public void update(Column oldColumn, Column column);

        /** called when lazy-updating the index during compaction (CASSANDRA-2897) */
        public void remove(Column current);

        /** called after memtable updates are complete (CASSANDRA-5397) */
        public void updateRowLevelIndexes();
    }

    private class StandardUpdater implements Updater
    {
        private final DecoratedKey key;
        private final ColumnFamily cf;

        public StandardUpdater(DecoratedKey key, ColumnFamily cf)
        {
            this.key = key;
            this.cf = cf;
        }

        public void insert(Column column)
        {
            if (column.isMarkedForDelete(System.currentTimeMillis()))
                return;

            for (SecondaryIndex index : indexFor(column.name()))
                if (index instanceof PerColumnSecondaryIndex)
                    ((PerColumnSecondaryIndex) index).insert(key.key, column);
        }

        public void update(Column oldColumn, Column column)
        {
            if (oldColumn.equals(column))
                return;

            for (SecondaryIndex index : indexFor(column.name()))
            {
                if (index instanceof PerColumnSecondaryIndex)
                {
                    // insert the new value before removing the old one, so we never have a period
                    // where the row is invisible to both queries (the opposite seems preferable); see CASSANDRA-5540
                    if (!column.isMarkedForDelete(System.currentTimeMillis()))
                        ((PerColumnSecondaryIndex) index).insert(key.key, column);

                    // Usually we want to delete the old value from the index, except when
                    // name/value/timestamp are all equal, but the columns themselves
                    // are not (as is the case when overwriting expiring columns with
                    // identical values and ttl) Then, we don't want to delete as the
                    // tombstone will hide the new value we just inserted; see CASSANDRA-7268
                    if (shouldCleanupOldValue(oldColumn, column))
                        ((PerColumnSecondaryIndex) index).delete(key.key, oldColumn);
                }
            }
        }

        public void remove(Column column)
        {
            if (column.isMarkedForDelete(System.currentTimeMillis()))
                return;

            for (SecondaryIndex index : indexFor(column.name()))
                if (index instanceof PerColumnSecondaryIndex)
                   ((PerColumnSecondaryIndex) index).delete(key.key, column);
        }

        public void updateRowLevelIndexes()
        {
            for (SecondaryIndex index : rowLevelIndexMap.values())
                ((PerRowSecondaryIndex) index).index(key.key, cf);
        }

        private boolean shouldCleanupOldValue(Column oldColumn, Column newColumn)
        {
            // If any one of name/value/timestamp are different, then we
            // should delete from the index. If not, then we can infer that
            // at least one of the columns is an ExpiringColumn and that the
            // difference is in the expiry time. In this case, we don't want to
            // delete the old value from the index as the tombstone we insert
            // will just hide the inserted value.
            // Completely identical columns (including expiring columns with
            // identical ttl & localExpirationTime) will not get this far due
            // to the oldColumn.equals(newColumn) in StandardUpdater.update
            return !oldColumn.name().equals(newColumn.name())
                || !oldColumn.value().equals(newColumn.value())
                || oldColumn.timestamp() != newColumn.timestamp();
        }
    }
}