NodeShardAllocationImpl.java example

Explorer
usergrid-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.usergrid.persistence.graph.serialization.impl.shard.impl;


import java.util.Collections;
import java.util.Iterator;

import com.google.common.base.Optional;
import org.apache.usergrid.persistence.graph.serialization.impl.shard.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.usergrid.persistence.core.consistency.TimeService;
import org.apache.usergrid.persistence.core.scope.ApplicationScope;
import org.apache.usergrid.persistence.core.util.ValidationUtils;
import org.apache.usergrid.persistence.graph.GraphFig;
import org.apache.usergrid.persistence.graph.MarkedEdge;
import org.apache.usergrid.persistence.graph.SearchByEdgeType;
import org.apache.usergrid.persistence.graph.exception.GraphRuntimeException;
import org.apache.usergrid.persistence.graph.serialization.util.GraphValidation;

import com.google.common.base.Preconditions;
import com.google.inject.Inject;
import com.netflix.astyanax.MutationBatch;
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
import com.netflix.astyanax.util.TimeUUIDUtils;


/**
 * Implementation of the node shard monitor and allocation
 */
public class NodeShardAllocationImpl implements NodeShardAllocation {


    private static final Logger logger = LoggerFactory.getLogger( NodeShardAllocationImpl.class );

    private final EdgeShardSerialization edgeShardSerialization;
    private final EdgeColumnFamilies edgeColumnFamilies;
    private final ShardedEdgeSerialization shardedEdgeSerialization;
    private final TimeService timeService;
    private final GraphFig graphFig;
    private final ShardGroupCompaction shardGroupCompaction;
    private final NodeShardCache nodeShardCache;


    @Inject
    public NodeShardAllocationImpl( final EdgeShardSerialization edgeShardSerialization,
                                    final EdgeColumnFamilies edgeColumnFamilies,
                                    final ShardedEdgeSerialization shardedEdgeSerialization, final TimeService timeService,
                                    final GraphFig graphFig, final ShardGroupCompaction shardGroupCompaction,
                                    final NodeShardCache nodeShardCache) {
        this.edgeShardSerialization = edgeShardSerialization;
        this.edgeColumnFamilies = edgeColumnFamilies;
        this.shardedEdgeSerialization = shardedEdgeSerialization;
        this.timeService = timeService;
        this.graphFig = graphFig;
        this.shardGroupCompaction = shardGroupCompaction;
        this.nodeShardCache = nodeShardCache;
    }


    @Override
    public Iterator<ShardEntryGroup> getShards(final ApplicationScope scope,
                                               final DirectedEdgeMeta directedEdgeMeta) {

        ValidationUtils.validateApplicationScope( scope );
        GraphValidation.validateDirectedEdgeMeta( directedEdgeMeta );

        Iterator<Shard> existingShards;

        //its a new node, it doesn't need to check cassandra, it won't exist
        if ( isNewNode( directedEdgeMeta ) ) {
            existingShards = Collections.singleton( Shard.MIN_SHARD ).iterator();
        }

        else {
            existingShards = edgeShardSerialization.getShardMetaData( scope, Optional.absent(), directedEdgeMeta );

            /**
             * We didn't get anything out of cassandra, so we need to create the minimum shard
             */
            if ( existingShards == null || !existingShards.hasNext() ) {

                final MutationBatch batch = edgeShardSerialization.writeShardMeta( scope, Shard.MIN_SHARD, directedEdgeMeta );
                try {
                    batch.execute();
                }
                catch ( ConnectionException e ) {
                    throw new RuntimeException( "Unable to connect to casandra", e );
                }

                existingShards = Collections.singleton( Shard.MIN_SHARD ).iterator();
            }
        }

        return new ShardEntryGroupIterator( existingShards, graphFig.getShardMinDelta(), shardGroupCompaction, scope,
            directedEdgeMeta );
    }


    @Override
    public boolean auditShard( final ApplicationScope scope, final ShardEntryGroup shardEntryGroup,
                               final DirectedEdgeMeta directedEdgeMeta ) {

        ValidationUtils.validateApplicationScope( scope );
        GraphValidation.validateShardEntryGroup( shardEntryGroup );
        GraphValidation.validateDirectedEdgeMeta( directedEdgeMeta );

        Preconditions.checkNotNull( shardEntryGroup, "shardEntryGroup cannot be null" );


        /**
         * Nothing to do, it's been created very recently, we don't create a new one
         */
        if ( shardEntryGroup.isCompactionPending() ) {
            if (logger.isTraceEnabled()) logger.trace( "Shard entry group {} is compacting, not auditing", shardEntryGroup );
            return false;
        }

        //we can't allocate, we have more than 1 write shard currently.  We need to compact first
        if ( shardEntryGroup.entrySize() != 1 ) {
            if (logger.isTraceEnabled()) logger.trace( "Shard entry group {} does not have 1 entry, not allocating", shardEntryGroup );
            return false;
        }


        /**
         * Check the min shard in our system
         */
        final Shard shard = shardEntryGroup.getMinShard();
        final long minTime = getMinTime();



        if ( shard.getCreatedTime() >= minTime ) {
            if (logger.isTraceEnabled()) logger.trace( "Shard entry group {}  and shard {} is before the minimum created time of {}.  Not allocating", shardEntryGroup, shard, minTime );
            return false;
        }


        /**
         * Check out if we have a count for our shard allocation
         */



        final long shardSize = graphFig.getShardSize();



        /**
         * We want to allocate a new shard as close to the max value as possible.  This way if we're filling up a
         * shard rapidly, we split it near the head of the values.
         * Further checks to this group will result in more splits, similar to creating a tree type structure and
         * splitting each node.
         *
         * This means that the lower shard can be re-split later if it is still too large.  We do the division to
         * truncate
         * to a split point < what our current max is that would be approximately be our pivot ultimately if we split
         * from the
         * lower bound and moved forward.  Doing this will stop the current shard from expanding and avoid a point
         * where we cannot
         * ultimately compact to the correct shard size.
         */


        /**
         * Allocate the shard
         */

        final Iterator<MarkedEdge> edges = directedEdgeMeta
            .loadEdges( shardedEdgeSerialization, edgeColumnFamilies, scope, Collections.singletonList(shard),0,
                SearchByEdgeType.Order.ASCENDING );


        if ( !edges.hasNext() ) {
            if (logger.isTraceEnabled()) logger.trace(
                "Tried to allocate a new shard for edge meta data {}, but no max value could be found in that row",
                directedEdgeMeta );
            return false;
        }


        MarkedEdge marked = null;

        /**
         * Advance to the pivot point we should use.  Once it's compacted, we can split again.
         * We either want to take the first one (unlikely) or we take our total count - the shard size.
         * If this is a negative number, we're approaching our max count for this shard, so the first
         * element will suffice.
         */

        long edgeCount = 0;
        for ( long i = 1; edges.hasNext(); i++ ) {
            //we hit a pivot shard, set it since it could be the last one we encounter
            if ( i % shardSize == 0 ) {
                marked = edges.next();
            }
            else {
                edges.next();
            }
            edgeCount++;
        }


        /**
         * Sanity check in case we audit before we have a full shard
         */
        if ( marked == null ) {
            if (logger.isTraceEnabled()){
                logger.trace( "Shard {} in shard group {} not full, " +
                    "not splitting. Edge count: {}",  shard, shardEntryGroup, edgeCount );
            }
            return false;
        }

        final long createTimestamp = timeService.getCurrentTime();

        final Shard newShard = new Shard( marked.getTimestamp(), createTimestamp, false );

        if(logger.isTraceEnabled()) {
            logger.trace("Allocating new shard {} for edge meta {}", newShard, directedEdgeMeta);
        }

        final MutationBatch batch = this.edgeShardSerialization.writeShardMeta( scope, newShard, directedEdgeMeta );

        try {
            batch.execute();

            if(logger.isTraceEnabled()) {
                logger.trace("Clearing shard cache");
            }

            // invalidate the shard cache so we can be sure that all read shards are up to date
            nodeShardCache.invalidate(scope, directedEdgeMeta);
        }
        catch ( ConnectionException e ) {
            throw new RuntimeException( "Unable to connect to casandra", e );
        }


        return true;
    }


    @Override
    public long getMinTime() {

        final long minimumAllowed = ( long ) (2.5 * graphFig.getShardCacheTimeout());

        final long minDelta = graphFig.getShardMinDelta();


        if ( minDelta < minimumAllowed ) {
            throw new GraphRuntimeException( String
                .format( "You must configure the property %s to be >= 2 x %s.  Otherwise you risk losing data",
                    GraphFig.SHARD_MIN_DELTA, GraphFig.SHARD_CACHE_TIMEOUT ) );
        }

        return timeService.getCurrentTime() - minDelta;
    }


    /**
     * Return true if the node has been created within our timeout.  If this is the case, we dont' need to check
     * cassandra, we know it won't exist
     */
    private boolean isNewNode( DirectedEdgeMeta directedEdgeMeta ) {


        //The timeout is in milliseconds.  Time for a time uuid is 1/10000 of a milli, so we need to get the units
        // correct
        final long timeoutDelta = graphFig.getShardCacheTimeout();

        final long timeNow = timeService.getCurrentTime();

        boolean isNew = true;

        for ( DirectedEdgeMeta.NodeMeta node : directedEdgeMeta.getNodes() ) {

            //short circuit if not a type 1 time UUID
            if ( !isNew || node.getId().getUuid().version() != 1 ) {
                return false;
            }

            final long uuidTime = TimeUUIDUtils.getTimeFromUUID( node.getId().getUuid() );

            final long newExpirationTimeout = uuidTime + timeoutDelta;

            //our expiration is after our current time, treat it as new
            isNew = isNew && newExpirationTimeout > timeNow;
        }

        return isNew;
    }
}