AtomicSortedColumns.java example

Explorer
CassandraQoS-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db;

import java.nio.ByteBuffer;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;

import com.google.common.base.Function;

import edu.stanford.ppl.concurrent.SnapTreeMap;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.db.filter.ColumnSlice;
import org.apache.cassandra.db.index.SecondaryIndexManager;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.utils.*;

/**
 * A thread-safe and atomic ISortedColumns implementation.
 * Operations (in particular addAll) on this implemenation are atomic and
 * isolated (in the sense of ACID). Typically a addAll is guaranteed that no
 * other thread can see the state where only parts but not all columns have
 * been added.
 *
 * The implementation uses snaptree (https://github.com/nbronson/snaptree),
 * and in particular it's copy-on-write clone operation to achieve its
 * atomicity guarantee.
 *
 * WARNING: removing element through getSortedColumns().iterator() is *not*
 * isolated of other operations and could actually be fully ignored in the
 * face of a concurrent. Don't use it unless in a non-concurrent context.
 */
public class AtomicSortedColumns extends ColumnFamily
{
    private final AtomicReference<Holder> ref;

    private static final boolean enableColUpdateTimeDelta = Boolean.parseBoolean(System.getProperty("cassandra.enableColUpdateTimeDelta", "false"));

    public static final ColumnFamily.Factory<AtomicSortedColumns> factory = new Factory<AtomicSortedColumns>()
    {
        public AtomicSortedColumns create(CFMetaData metadata, boolean insertReversed)
        {
            return new AtomicSortedColumns(metadata);
        }
    };

    private AtomicSortedColumns(CFMetaData metadata)
    {
        this(metadata, new Holder(metadata.comparator));
    }

    private AtomicSortedColumns(CFMetaData metadata, Holder holder)
    {
        super(metadata);
        this.ref = new AtomicReference<>(holder);
    }

    public AbstractType<?> getComparator()
    {
        return (AbstractType<?>)ref.get().map.comparator();
    }

    public ColumnFamily.Factory getFactory()
    {
        return factory;
    }

    public ColumnFamily cloneMe()
    {
        return new AtomicSortedColumns(metadata, ref.get().cloneMe());
    }

    public DeletionInfo deletionInfo()
    {
        return ref.get().deletionInfo;
    }

    public void delete(DeletionTime delTime)
    {
        delete(new DeletionInfo(delTime));
    }

    protected void delete(RangeTombstone tombstone)
    {
        delete(new DeletionInfo(tombstone, getComparator()));
    }

    public void delete(DeletionInfo info)
    {
        if (info.isLive())
            return;

        // Keeping deletion info for max markedForDeleteAt value
        while (true)
        {
            Holder current = ref.get();
            DeletionInfo newDelInfo = current.deletionInfo.copy().add(info);
            if (ref.compareAndSet(current, current.with(newDelInfo)))
                break;
        }
    }

    public void setDeletionInfo(DeletionInfo newInfo)
    {
        ref.set(ref.get().with(newInfo));
    }

    public void purgeTombstones(int gcBefore)
    {
        while (true)
        {
            Holder current = ref.get();
            if (!current.deletionInfo.hasPurgeableTombstones(gcBefore))
                break;

            DeletionInfo purgedInfo = current.deletionInfo.copy();
            purgedInfo.purge(gcBefore);
            if (ref.compareAndSet(current, current.with(purgedInfo)))
                break;
        }
    }

    public void addColumn(Column column, Allocator allocator)
    {
        Holder current, modified;
        do
        {
            current = ref.get();
            modified = current.cloneMe();
            modified.addColumn(column, allocator, SecondaryIndexManager.nullUpdater);
        }
        while (!ref.compareAndSet(current, modified));
    }

    public void addAll(ColumnFamily cm, Allocator allocator, Function<Column, Column> transformation)
    {
        addAllWithSizeDelta(cm, allocator, transformation, SecondaryIndexManager.nullUpdater);
    }

    /**
     *  This is only called by Memtable.resolve, so only AtomicSortedColumns needs to implement it.
     *
     *  @return the difference in size seen after merging the given columns and minimum time delta between timestamps of two reconciled columns.
     */
    public Pair<Long,Long> addAllWithSizeDelta(ColumnFamily cm, Allocator allocator, Function<Column, Column> transformation, SecondaryIndexManager.Updater indexer)
    {
        /*
         * This operation needs to atomicity and isolation. To that end, we
         * add the new column to a copy of the map (a cheap O(1) snapTree
         * clone) and atomically compare and swap when everything has been
         * added. Of course, we must not forget to update the deletion times
         * too.
         * In case we are adding a lot of columns, failing the final compare
         * and swap could be expensive. To mitigate, we check we haven't been
         * beaten by another thread after every column addition. If we have,
         * we bail early, avoiding unnecessary work if possible.
         */
        Holder current, modified;
        long sizeDelta;
        long timeDelta;

        main_loop:
        do
        {
            sizeDelta = 0;
            timeDelta = Long.MAX_VALUE;
            current = ref.get();
            DeletionInfo newDelInfo = current.deletionInfo;
            if (cm.deletionInfo().mayModify(newDelInfo))
            {
                newDelInfo = current.deletionInfo.copy().add(cm.deletionInfo());
                sizeDelta += newDelInfo.dataSize() - current.deletionInfo.dataSize();
            }
            modified = new Holder(current.map.clone(), newDelInfo);

            for (Column column : cm)
            {
                final Pair<Integer, Long> pair = modified.addColumn(transformation.apply(column), allocator, indexer);
                sizeDelta += pair.left;

                //We will store the minimum delta for all columns if enabled
                if(enableColUpdateTimeDelta)
                    timeDelta = Math.min(pair.right, timeDelta);

                // bail early if we know we've been beaten
                if (ref.get() != current)
                    continue main_loop;
            }
        }
        while (!ref.compareAndSet(current, modified));

        indexer.updateRowLevelIndexes();

        return Pair.create(sizeDelta, timeDelta);
    }

    public boolean replace(Column oldColumn, Column newColumn)
    {
        if (!oldColumn.name().equals(newColumn.name()))
            throw new IllegalArgumentException();

        Holder current, modified;
        boolean replaced;
        do
        {
            current = ref.get();
            modified = current.cloneMe();
            replaced = modified.map.replace(oldColumn.name(), oldColumn, newColumn);
        }
        while (!ref.compareAndSet(current, modified));
        return replaced;
    }

    public void clear()
    {
        Holder current, modified;
        do
        {
            current = ref.get();
            modified = current.clear();
        }
        while (!ref.compareAndSet(current, modified));
    }

    public Column getColumn(ByteBuffer name)
    {
        return ref.get().map.get(name);
    }

    public SortedSet<ByteBuffer> getColumnNames()
    {
        return ref.get().map.keySet();
    }

    public Collection<Column> getSortedColumns()
    {
        return ref.get().map.values();
    }

    public Collection<Column> getReverseSortedColumns()
    {
        return ref.get().map.descendingMap().values();
    }

    public int getColumnCount()
    {
        return ref.get().map.size();
    }

    public Iterator<Column> iterator(ColumnSlice[] slices)
    {
        return new ColumnSlice.NavigableMapIterator(ref.get().map, slices);
    }

    public Iterator<Column> reverseIterator(ColumnSlice[] slices)
    {
        return new ColumnSlice.NavigableMapIterator(ref.get().map.descendingMap(), slices);
    }

    public boolean isInsertReversed()
    {
        return false;
    }

    private static class Holder
    {
        // This is a small optimization: DeletionInfo is mutable, but we know that we will always copy it in that class,
        // so we can safely alias one DeletionInfo.live() reference and avoid some allocations.
        private static final DeletionInfo LIVE = DeletionInfo.live();

        final SnapTreeMap<ByteBuffer, Column> map;
        final DeletionInfo deletionInfo;

        Holder(AbstractType<?> comparator)
        {
            this(new SnapTreeMap<ByteBuffer, Column>(comparator), LIVE);
        }

        Holder(SnapTreeMap<ByteBuffer, Column> map, DeletionInfo deletionInfo)
        {
            this.map = map;
            this.deletionInfo = deletionInfo;
        }

        Holder cloneMe()
        {
            return with(map.clone());
        }

        Holder with(DeletionInfo info)
        {
            return new Holder(map, info);
        }

        Holder with(SnapTreeMap<ByteBuffer, Column> newMap)
        {
            return new Holder(newMap, deletionInfo);
        }

        // There is no point in cloning the underlying map to clear it
        // afterwards.
        Holder clear()
        {
            return new Holder(new SnapTreeMap<ByteBuffer, Column>(map.comparator()), LIVE);
        }

        Pair<Integer, Long> addColumn(Column column, Allocator allocator, SecondaryIndexManager.Updater indexer)
        {
            ByteBuffer name = column.name();
            while (true)
            {
                Column oldColumn = map.putIfAbsent(name, column);
                if (oldColumn == null)
                {
                    indexer.insert(column);
                    return Pair.create(column.dataSize(), Long.MAX_VALUE);
                }

                Column reconciledColumn = column.reconcile(oldColumn, allocator);
                if (map.replace(name, oldColumn, reconciledColumn))
                {
                    indexer.update(oldColumn, reconciledColumn);
                    return Pair.create(reconciledColumn.dataSize() - oldColumn.dataSize(),
                            enableColUpdateTimeDelta?  Math.abs(oldColumn.timestamp - column.timestamp): Long.MAX_VALUE );
                }
                // We failed to replace column due to a concurrent update or a concurrent removal. Keep trying.
                // (Currently, concurrent removal should not happen (only updates), but let us support that anyway.)
            }
        }
    }
}