UnfilteredDeserializer.java example

Explorer
scylla-tools-java-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db;

import java.io.IOException;
import java.io.IOError;
import java.util.*;

import com.google.common.collect.Iterables;
import com.google.common.collect.PeekingIterator;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.io.util.DataInputPlus;
import org.apache.cassandra.io.util.FileDataInput;
import org.apache.cassandra.net.MessagingService;

/**
 * Helper class to deserialize Unfiltered object from disk efficiently.
 *
 * More precisely, this class is used by the low-level reader to ensure
 * we don't do more work than necessary (i.e. we don't allocate/deserialize
 * objects for things we don't care about).
 */
public abstract class UnfilteredDeserializer
{
    private static final Logger logger = LoggerFactory.getLogger(UnfilteredDeserializer.class);

    protected final CFMetaData metadata;
    protected final DataInputPlus in;
    protected final SerializationHelper helper;

    protected UnfilteredDeserializer(CFMetaData metadata,
                                     DataInputPlus in,
                                     SerializationHelper helper)
    {
        this.metadata = metadata;
        this.in = in;
        this.helper = helper;
    }

    public static UnfilteredDeserializer create(CFMetaData metadata,
                                                DataInputPlus in,
                                                SerializationHeader header,
                                                SerializationHelper helper,
                                                DeletionTime partitionDeletion,
                                                boolean readAllAsDynamic)
    {
        if (helper.version >= MessagingService.VERSION_30)
            return new CurrentDeserializer(metadata, in, header, helper);
        else
            return new OldFormatDeserializer(metadata, in, helper, partitionDeletion, readAllAsDynamic);
    }

    /**
     * Whether or not there is more atom to read.
     */
    public abstract boolean hasNext() throws IOException;

    /**
     * Compare the provided bound to the next atom to read on disk.
     *
     * This will not read/deserialize the whole atom but only what is necessary for the
     * comparison. Whenever we know what to do with this atom (read it or skip it),
     * readNext or skipNext should be called.
     */
    public abstract int compareNextTo(Slice.Bound bound) throws IOException;

    /**
     * Returns whether the next atom is a row or not.
     */
    public abstract boolean nextIsRow() throws IOException;

    /**
     * Returns whether the next atom is the static row or not.
     */
    public abstract boolean nextIsStatic() throws IOException;

    /**
     * Returns the next atom.
     */
    public abstract Unfiltered readNext() throws IOException;

    /**
     * Clears any state in this deserializer.
     */
    public abstract void clearState() throws IOException;

    /**
     * Skips the next atom.
     */
    public abstract void skipNext() throws IOException;


    /**
     * For the legacy layout deserializer, we have to deal with the fact that a row can span multiple index blocks and that
     * the call to hasNext() reads the next element upfront. We must take that into account when we check in AbstractSSTableIterator if
     * we're past the end of an index block boundary as that check expect to account for only consumed data (that is, if hasNext has
     * been called and made us cross an index boundary but neither readNext() or skipNext() as yet been called, we shouldn't consider
     * the index block boundary crossed yet).
     *
     * TODO: we don't care about this for the current file format because a row can never span multiple index blocks (further, hasNext()
     * only just basically read 2 bytes from disk in that case). So once we drop backward compatibility with pre-3.0 sstable, we should
     * remove this.
     */
    public abstract long bytesReadForUnconsumedData();

    private static class CurrentDeserializer extends UnfilteredDeserializer
    {
        private final ClusteringPrefix.Deserializer clusteringDeserializer;
        private final SerializationHeader header;

        private int nextFlags;
        private int nextExtendedFlags;
        private boolean isReady;
        private boolean isDone;

        private final Row.Builder builder;

        private CurrentDeserializer(CFMetaData metadata,
                                    DataInputPlus in,
                                    SerializationHeader header,
                                    SerializationHelper helper)
        {
            super(metadata, in, helper);
            this.header = header;
            this.clusteringDeserializer = new ClusteringPrefix.Deserializer(metadata.comparator, in, header);
            this.builder = BTreeRow.sortedBuilder();
        }

        public boolean hasNext() throws IOException
        {
            if (isReady)
                return true;

            prepareNext();
            return !isDone;
        }

        private void prepareNext() throws IOException
        {
            if (isDone)
                return;

            nextFlags = in.readUnsignedByte();
            if (UnfilteredSerializer.isEndOfPartition(nextFlags))
            {
                isDone = true;
                isReady = false;
                return;
            }

            nextExtendedFlags = UnfilteredSerializer.readExtendedFlags(in, nextFlags);

            clusteringDeserializer.prepare(nextFlags, nextExtendedFlags);
            isReady = true;
        }

        public int compareNextTo(Slice.Bound bound) throws IOException
        {
            if (!isReady)
                prepareNext();

            assert !isDone;

            return clusteringDeserializer.compareNextTo(bound);
        }

        public boolean nextIsRow() throws IOException
        {
            if (!isReady)
                prepareNext();

            return UnfilteredSerializer.kind(nextFlags) == Unfiltered.Kind.ROW;
        }

        public boolean nextIsStatic() throws IOException
        {
            // This exists only for the sake of the OldFormatDeserializer
            throw new UnsupportedOperationException();
        }

        public Unfiltered readNext() throws IOException
        {
            isReady = false;
            if (UnfilteredSerializer.kind(nextFlags) == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
            {
                RangeTombstone.Bound bound = clusteringDeserializer.deserializeNextBound();
                return UnfilteredSerializer.serializer.deserializeMarkerBody(in, header, bound);
            }
            else
            {
                builder.newRow(clusteringDeserializer.deserializeNextClustering());
                return UnfilteredSerializer.serializer.deserializeRowBody(in, header, helper, nextFlags, nextExtendedFlags, builder);
            }
        }

        public void skipNext() throws IOException
        {
            isReady = false;
            clusteringDeserializer.skipNext();
            if (UnfilteredSerializer.kind(nextFlags) == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
            {
                UnfilteredSerializer.serializer.skipMarkerBody(in);
            }
            else
            {
                UnfilteredSerializer.serializer.skipRowBody(in);
            }
        }

        public void clearState()
        {
            isReady = false;
            isDone = false;
        }

        public long bytesReadForUnconsumedData()
        {
            // In theory, hasNext() does consume 2-3 bytes, but we don't care about this for the current file format so returning
            // 0 to mean "do nothing".
            return 0;
        }
    }

    public static class OldFormatDeserializer extends UnfilteredDeserializer
    {
        private final boolean readAllAsDynamic;
        private boolean skipStatic;

        // The next Unfiltered to return, computed by hasNext()
        private Unfiltered next;
        // A temporary storage for an unfiltered that isn't returned next but should be looked at just afterwards
        private Unfiltered saved;

        private boolean isFirst = true;

        // The Unfiltered as read from the old format input
        private final UnfilteredIterator iterator;

        // The position in the input after the last data consumption (readNext/skipNext).
        private long lastConsumedPosition;

        private OldFormatDeserializer(CFMetaData metadata,
                                      DataInputPlus in,
                                      SerializationHelper helper,
                                      DeletionTime partitionDeletion,
                                      boolean readAllAsDynamic)
        {
            super(metadata, in, helper);
            this.iterator = new UnfilteredIterator(partitionDeletion);
            this.readAllAsDynamic = readAllAsDynamic;
            this.lastConsumedPosition = currentPosition();
        }

        public void setSkipStatic()
        {
            this.skipStatic = true;
        }

        private boolean isStatic(Unfiltered unfiltered)
        {
            return unfiltered.isRow() && ((Row)unfiltered).isStatic();
        }

        public boolean hasNext() throws IOException
        {
            try
            {
                while (next == null)
                {
                    if (saved == null && !iterator.hasNext())
                        return false;

                    next = saved == null ? iterator.next() : saved;
                    saved = null;

                    // The sstable iterators assume that if there is one, the static row is the first thing this deserializer will return.
                    // However, in the old format, a range tombstone with an empty start would sort before any static cell. So we should
                    // detect that case and return the static parts first if necessary.
                    if (isFirst && iterator.hasNext() && isStatic(iterator.peek()))
                    {
                        saved = next;
                        next = iterator.next();
                    }
                    isFirst = false;

                    // When reading old tables, we sometimes want to skip static data (due to how staticly defined column of compact
                    // tables are handled).
                    if (skipStatic && isStatic(next))
                        next = null;
                }
                return true;
            }
            catch (IOError e)
            {
                if (e.getCause() != null && e.getCause() instanceof IOException)
                    throw (IOException)e.getCause();
                throw e;
            }
        }

        private boolean isRow(LegacyLayout.LegacyAtom atom)
        {
            if (atom.isCell())
                return true;

            LegacyLayout.LegacyRangeTombstone tombstone = atom.asRangeTombstone();
            return tombstone.isCollectionTombstone() || tombstone.isRowDeletion(metadata);
        }

        public int compareNextTo(Slice.Bound bound) throws IOException
        {
            if (!hasNext())
                throw new IllegalStateException();
            return metadata.comparator.compare(next.clustering(), bound);
        }

        public boolean nextIsRow() throws IOException
        {
            if (!hasNext())
                throw new IllegalStateException();
            return next.isRow();
        }

        public boolean nextIsStatic() throws IOException
        {
            return nextIsRow() && ((Row)next).isStatic();
        }

        private long currentPosition()
        {
            // We return a bogus value if the input is not file based, but check we never rely
            // on that value in that case in bytesReadForUnconsumedData
            return in instanceof FileDataInput ? ((FileDataInput)in).getFilePointer() : 0;
        }

        public Unfiltered readNext() throws IOException
        {
            if (!hasNext())
                throw new IllegalStateException();
            Unfiltered toReturn = next;
            next = null;
            lastConsumedPosition = currentPosition();
            return toReturn;
        }

        public void skipNext() throws IOException
        {
            if (!hasNext())
                throw new UnsupportedOperationException();
            next = null;
            lastConsumedPosition = currentPosition();
        }

        public long bytesReadForUnconsumedData()
        {
            if (!(in instanceof FileDataInput))
                throw new AssertionError();

            return currentPosition() - lastConsumedPosition;
        }

        public void clearState()
        {
            next = null;
            saved = null;
            iterator.clearState();
            lastConsumedPosition = currentPosition();
        }

        // Groups atoms from the input into proper Unfiltered.
        // Note: this could use guava AbstractIterator except that we want to be able to clear
        // the internal state of the iterator so it's cleaner to do it ourselves.
        private class UnfilteredIterator implements PeekingIterator<Unfiltered>
        {
            private final AtomIterator atoms;
            private final LegacyLayout.CellGrouper grouper;
            private final TombstoneTracker tombstoneTracker;

            private Unfiltered next;

            private UnfilteredIterator(DeletionTime partitionDeletion)
            {
                this.grouper = new LegacyLayout.CellGrouper(metadata, helper);
                this.tombstoneTracker = new TombstoneTracker(partitionDeletion);
                this.atoms = new AtomIterator(tombstoneTracker);
            }

            public boolean hasNext()
            {
                // Note that we loop on next == null because TombstoneTracker.openNew() could return null below or the atom might be shadowed.
                while (next == null)
                {
                    if (atoms.hasNext())
                    {
                        // If a range tombstone closes strictly before the next row/RT, we need to return that close (or boundary) marker first.
                        if (tombstoneTracker.hasClosingMarkerBefore(atoms.peek()))
                        {
                            next = tombstoneTracker.popClosingMarker();
                        }
                        else
                        {
                            LegacyLayout.LegacyAtom atom = atoms.next();
                            if (!tombstoneTracker.isShadowed(atom))
                                next = isRow(atom) ? readRow(atom) : tombstoneTracker.openNew(atom.asRangeTombstone());
                        }
                    }
                    else if (tombstoneTracker.hasOpenTombstones())
                    {
                        next = tombstoneTracker.popClosingMarker();
                    }
                    else
                    {
                        return false;
                    }
                }
                return next != null;
            }

            private Unfiltered readRow(LegacyLayout.LegacyAtom first)
            {
                LegacyLayout.CellGrouper grouper = first.isStatic()
                                                 ? LegacyLayout.CellGrouper.staticGrouper(metadata, helper)
                                                 : this.grouper;
                grouper.reset();
                grouper.addAtom(first);
                // As long as atoms are part of the same row, consume them. Note that the call to addAtom() uses
                // atoms.peek() so that the atom is only consumed (by next) if it's part of the row (addAtom returns true)
                while (atoms.hasNext() && grouper.addAtom(atoms.peek()))
                {
                    atoms.next();
                }
                return grouper.getRow();
            }

            public Unfiltered next()
            {
                if (!hasNext())
                    throw new UnsupportedOperationException();
                Unfiltered toReturn = next;
                next = null;
                return toReturn;
            }

            public Unfiltered peek()
            {
                if (!hasNext())
                    throw new UnsupportedOperationException();
                return next;
            }

            public void clearState()
            {
                atoms.clearState();
                tombstoneTracker.clearState();
                next = null;
            }

            public void remove()
            {
                throw new UnsupportedOperationException();
            }
        }

        // Wraps the input of the deserializer to provide an iterator (and skip shadowed atoms).
        // Note: this could use guava AbstractIterator except that we want to be able to clear
        // the internal state of the iterator so it's cleaner to do it ourselves.
        private class AtomIterator implements PeekingIterator<LegacyLayout.LegacyAtom>
        {
            private final TombstoneTracker tombstoneTracker;
            private boolean isDone;
            private LegacyLayout.LegacyAtom next;

            private AtomIterator(TombstoneTracker tombstoneTracker)
            {
                this.tombstoneTracker = tombstoneTracker;
            }

            public boolean hasNext()
            {
                if (isDone)
                    return false;

                if (next == null)
                {
                    next = readAtom();
                    if (next == null)
                    {
                        isDone = true;
                        return false;
                    }
                }
                return true;
            }

            private LegacyLayout.LegacyAtom readAtom()
            {
                try
                {
                    return LegacyLayout.readLegacyAtom(metadata, in, readAllAsDynamic);
                }
                catch (IOException e)
                {
                    throw new IOError(e);
                }
            }

            public LegacyLayout.LegacyAtom next()
            {
                if (!hasNext())
                    throw new UnsupportedOperationException();
                LegacyLayout.LegacyAtom toReturn = next;
                next = null;
                return toReturn;
            }

            public LegacyLayout.LegacyAtom peek()
            {
                if (!hasNext())
                    throw new UnsupportedOperationException();
                return next;
            }

            public void clearState()
            {
                this.next = null;
                this.isDone = false;
            }

            public void remove()
            {
                throw new UnsupportedOperationException();
            }
        }

        /**
         * Tracks which range tombstones are open when deserializing the old format.
         */
        private class TombstoneTracker
        {
            private final DeletionTime partitionDeletion;

            // Open tombstones sorted by their closing bound (i.e. first tombstone is the first to close).
            // As we only track non-fully-shadowed ranges, the first range is necessarily the currently
            // open tombstone (the one with the higher timestamp).
            private final SortedSet<LegacyLayout.LegacyRangeTombstone> openTombstones;

            public TombstoneTracker(DeletionTime partitionDeletion)
            {
                this.partitionDeletion = partitionDeletion;
                this.openTombstones = new TreeSet<>((rt1, rt2) -> metadata.comparator.compare(rt1.stop.bound, rt2.stop.bound));
            }

            /**
             * Checks if the provided atom is fully shadowed by the open tombstones of this tracker (or the partition deletion).
             */
            public boolean isShadowed(LegacyLayout.LegacyAtom atom)
            {
                assert !hasClosingMarkerBefore(atom);
                long timestamp = atom.isCell() ? atom.asCell().timestamp : atom.asRangeTombstone().deletionTime.markedForDeleteAt();

                if (partitionDeletion.deletes(timestamp))
                    return true;

                SortedSet<LegacyLayout.LegacyRangeTombstone> coveringTombstones = isRow(atom) ? openTombstones : openTombstones.tailSet(atom.asRangeTombstone());
                return Iterables.any(coveringTombstones, tombstone -> tombstone.deletionTime.deletes(timestamp));
            }

            /**
             * Whether the currently open marker closes stricly before the provided row/RT.
             */
            public boolean hasClosingMarkerBefore(LegacyLayout.LegacyAtom atom)
            {
                return !openTombstones.isEmpty()
                    && metadata.comparator.compare(openTombstones.first().stop.bound, atom.clustering()) < 0;
            }

            /**
             * Returns the unfiltered corresponding to closing the currently open marker (and update the tracker accordingly).
             */
            public Unfiltered popClosingMarker()
            {
                assert !openTombstones.isEmpty();

                Iterator<LegacyLayout.LegacyRangeTombstone> iter = openTombstones.iterator();
                LegacyLayout.LegacyRangeTombstone first = iter.next();
                iter.remove();

                // If that was the last open tombstone, we just want to close it. Otherwise, we have a boundary with the
                // next tombstone
                if (!iter.hasNext())
                    return new RangeTombstoneBoundMarker(first.stop.bound, first.deletionTime);

                LegacyLayout.LegacyRangeTombstone next = iter.next();
                return RangeTombstoneBoundaryMarker.makeBoundary(false, first.stop.bound, first.stop.bound.invert(), first.deletionTime, next.deletionTime);
            }

            /**
             * Update the tracker given the provided newly open tombstone. This return the Unfiltered corresponding to the opening
             * of said tombstone: this can be a simple open mark, a boundary (if there was an open tombstone superseded by this new one)
             * or even null (if the new tombston start is supersedes by the currently open tombstone).
             *
             * Note that this method assume the added tombstone is not fully shadowed, i.e. that !isShadowed(tombstone). It also
             * assumes no opened tombstone closes before that tombstone (so !hasClosingMarkerBefore(tombstone)).
             */
            public Unfiltered openNew(LegacyLayout.LegacyRangeTombstone tombstone)
            {
                if (openTombstones.isEmpty())
                {
                    openTombstones.add(tombstone);
                    return new RangeTombstoneBoundMarker(tombstone.start.bound, tombstone.deletionTime);
                }

                Iterator<LegacyLayout.LegacyRangeTombstone> iter = openTombstones.iterator();
                LegacyLayout.LegacyRangeTombstone first = iter.next();
                if (tombstone.deletionTime.supersedes(first.deletionTime))
                {
                    // We're supperseding the currently open tombstone, so we should produce a boundary that close the currently open
                    // one and open the new one. We should also add the tombstone, but if it stop after the first one, we should
                    // also remove that first tombstone as it won't be useful anymore.
                    if (metadata.comparator.compare(tombstone.stop.bound, first.stop.bound) >= 0)
                        iter.remove();

                    openTombstones.add(tombstone);
                    return RangeTombstoneBoundaryMarker.makeBoundary(false, tombstone.start.bound.invert(), tombstone.start.bound, first.deletionTime, tombstone.deletionTime);
                }
                else
                {
                    // If the new tombstone don't supersedes the currently open tombstone, we don't have anything to return, we
                    // just add the new tombstone (because we know tombstone is not fully shadowed, this imply the new tombstone
                    // simply extend after the first one and we'll deal with it later)

                    // changed scylla-enterprise #103. This assert seems wrong. An extending tombstone should be valid 
                    // with an equal comparison here as well (strictly speaking not, but the comparator does not 
                    // care (enough) about inclusive/exclusive)...

                    //assert metadata.comparator.compare(tombstone.start.bound, first.stop.bound) > 0;
                    
                    // See above. We need to deal with "extending" TS that have connecting bounds. 
                    // These should be treated similarly to above, i.e. remove the currently pending 
                    // tombstone (which we assume generated either RangeTombstoneBoundMarker or RangeTombstoneBoundaryMarker)
                    if (metadata.comparator.compare(tombstone.start.bound, first.stop.bound) == 0) {
                        iter.remove();
                    }
                    openTombstones.add(tombstone);
                    return null;
                }
            }

            public boolean hasOpenTombstones()
            {
                return !openTombstones.isEmpty();
            }

            private boolean formBoundary(LegacyLayout.LegacyRangeTombstone close, LegacyLayout.LegacyRangeTombstone open)
            {
                return metadata.comparator.compare(close.stop.bound, open.start.bound) == 0;
            }

            public void clearState()
            {
                openTombstones.clear();
            }
        }
    }
}