BTreeMetadata.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.resources;

import java.lang.ref.SoftReference;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.log4j.Logger;

import com.bigdata.btree.BTree;
import com.bigdata.btree.BTreeCounters;
import com.bigdata.btree.ILocalBTreeView;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegment;
import com.bigdata.journal.AbstractJournal;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.mdi.LocalPartitionMetadata;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.resources.StoreManager.ManagedJournal;

/**
 * Class encapsulates a bunch of metadata used to make decisions about how to
 * handle an index partition during asynchronous overflow.
 * <p>
 * Note: This class uses {@link SoftReference}s to hold onto the mutable
 * {@link BTree}. The {@link SoftReference} was chosen because it is important
 * to keep these {@link BTree}s open so that we do not loose their buffers
 * until we have finish asynchronous overflow for a given {@link BTree}. Once
 * asynchronous overflow processing is complete for the {@link BTree} you SHOULD
 * use {@link #clearRef()} to release your hold those buffers.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
class BTreeMetadata {

    static protected final Logger log = Logger.getLogger(BTreeMetadata.class);
    
    /**
     * The object which may be used to (re-)open the {@link BTree} and the index
     * partition view.
     */
    protected final ResourceManager resourceManager;

    /**
     * The commit time associated with the {@link BTree} and the index partition
     * view.
     */
    public final long commitTime;
    
    /**
     * Name of the local index (the index partition name).
     */
    public final String name;
    
    /**
     * A {@link SoftReference} is used to cache the {@link BTree} reference
     * since we really want to hold onto the reference until we get around to
     * finishing overflow processing for this index partition. However, you
     * SHOULD clear the reference using {@link #clearRef()} as soon as you
     * have handled asynchronous overflow for this view.
     */
    private volatile SoftReference<BTree> ref;

    /**
     * Open the mutable {@link BTree}. The {@link BTree} reference is cached by
     * a {@link SoftReference}. If the reference has been cleared then the
     * {@link BTree} is re-opened from the backing journal.
     */
    final public BTree getBTree() {

        // double checked locking.
        BTree btree = ref == null ? null : ref.get();

        if (btree == null) {

            synchronized (this) {

                btree = ref == null ? null : ref.get();

                /*
                 * The mutable btree on the journal associated with the
                 * commitTime, not the full view of that index.
                 */

                final AbstractJournal store = resourceManager
                        .getJournal(commitTime);

                btree = (BTree) resourceManager.getIndexOnStore(name,
                        commitTime, store);

                if (btree == null)
                    throw new IllegalArgumentException();

                ref = new SoftReference<BTree>(btree);

            }
            
        }

        return btree;

    }

    /**
     * Release the {@link SoftReference} for the {@link BTree}.
     */
    public void clearRef() {
        
        synchronized(this) {
            
            if(ref != null) { 
                
                ref.clear();
        
            }

        }
        
    }

    public final IndexMetadata indexMetadata;
    
    public final LocalPartitionMetadata pmd;
    
    /**
     * The #of journals and index segments in the view.
     */
    public final int sourceCount;
    
    /**
     * The #of journals in the view.
     */
    public final int sourceJournalCount;

    /**
     * The #of index segments in the view.
     */
    public final int sourceSegmentCount;

    /**
     * The sum of the size on disk across the index segments in the view.
     */
    public final long sumSegBytes;

    /**
     * These constants are used to compute the {@link #mergePriority}.
     * 
     * See src/architecture/mergePriority.xls.
     */
    final private int A = 3, B = 1;//, C = 10;

    /**
     * This is the inverse of the {@link #mergePriority}. If the merge priority
     * is ZERO (0), then this is 1.0 (which is greater than the build priority
     * which associated with any index partition with a non-zero merge
     * priority).
     */
    public final double buildPriority;

    /**
     * The computed merge priority is based on the complexity of the view. This
     * is ZERO (0) if there is no reason to perform a merge.
     */
    public final double mergePriority;

//    /**
//     * The split priority is based solely on {@link #sumSegBytes} (it is the
//     * ratio of that value to the nominal shard size) and is ZERO (0) until
//     * {@link #sumSegBytes} is GTE the {@link OverflowManager#nominalShardSize}.
//     *
//     * @deprecated This did not consider the adjusted nominal shard size.
//     */
//    public final double splitPriority;

    /**
     * <code>true</code> iff this index partition meets the criteria for a
     * mandatory compacting merge (too many journals in the view, too many index
     * segments in the view, or too many sources in the view).
     * 
     * @deprecated by {@link #mergePriority}.
     *             <p>
     *             Note: This field can be dropped once we are running split,
     *             tailSplit, and scatterSplit as merge after actions. At the
     *             same time, make sure that those tasks will not accept a view
     *             unless it is a {@link #compactView}.
     */
    public final boolean mandatoryMerge;

    /**
     * <code>true</code> iff there are two sources in the view and the second
     * source is an {@link IndexSegment} (the first will be the {@link BTree} on
     * some {@link ManagedJournal}).
     */
    public final boolean compactView;
    
    /**
     * The entry count for the {@link BTree} itself NOT the view.
     */
    public final long entryCount;

    /**
     * The counters for the index partition view.
     */
    public final BTreeCounters btreeCounters;
    
    /**
     * The percentage of leaf splits which occurred at or near the head of the
     * {@link BTree}.
     */
    public final double percentHeadSplits;

    /**
     * The percentage of leaf splits which occurred at or near the tail of the
     * {@link BTree}.
     */
    public final double percentTailSplits;

    /**
     * A package private lock used to ensure that decisions concerning which
     * index partition operation (build, merge, split, etc) to execute are
     * serialized.
     * 
     * @see OverflowMetadata#setAction(String, OverflowActionEnum)
     */
    final ReentrantLock lock = new ReentrantLock();

    /**
     * The action taken and <code>null</code> if no action has been taken for
     * this local index.
     * <p>
     * Note: An {@link AtomicReference} is used to permit inspection of the
     * value without holding the {@link #lock}. If you want to make an atomic
     * decision based on this value, then make sure that you are holding the
     * {@link #lock} before you look at the value.
     */
    private final AtomicReference<OverflowActionEnum> actionRef = new AtomicReference<OverflowActionEnum>();

    /**
     * The action taken and <code>null</code> if no action has been taken for
     * this local index (non-blocking). 
     * <p>
     * Note: An {@link AtomicReference} is used to permit inspection of the
     * value without holding the {@link #lock}. If you want to make an atomic
     * decision based on this value, then make sure that you are holding the
     * {@link #lock} before you look at the value.
     */
    public OverflowActionEnum getAction() {
        
        return actionRef.get();
        
    }

    /**
     * Set the action to be taken.
     * 
     * @param action
     *            The action.
     * @throws IllegalArgumentException
     *             if the argument is <code>null</code>.
     * @throws IllegalMonitorStateException
     *             unless the {@link #lock} is held by the caller.
     * @throws IllegalStateException
     *             if the action has already been set.
     */
    public void setAction(final OverflowActionEnum action) {
        
        if(action == null)
            throw new IllegalArgumentException();

        if (!lock.isHeldByCurrentThread())
            throw new IllegalMonitorStateException();

        if (actionRef.get() != null) {

            throw new IllegalStateException("Already set: " + actionRef.get()
                    + ", given=" + action);
            
        }
        
        actionRef.set(action);

    }

    /**
     * Used to force clear a {@link OverflowActionEnum#Copy} action
     * when we will force a compacting merge.  This allows us to do
     * compacting merges on shard views which would otherwise simply
     * be copied onto the new journal.
     */
    void clearCopyAction() {

	lock.lock();
	try {
	    if(actionRef.get().equals(OverflowActionEnum.Copy)) {
		actionRef.set(null/*clear*/);
	    }
	} finally {
	    lock.unlock();
	}

    }
    
    /**
     * 
     * @param resourceManager
     *            Used to (re-)open the {@link BTree} as necessary.
     * @param commitTime
     *            The commit time corresponding to the desired commit point.
     * @param name
     *            The name of the {@link BTree}.
     * @param btreeCounters
     *            The aggregated counters for the {@link BTree} or the
     *            {@link ILocalBTreeView index partition view} as reported by
     *            the {@link IndexManager}
     */
    public BTreeMetadata(final ResourceManager resourceManager,
            final long commitTime, final String name,
            final BTreeCounters btreeCounters) {

        if (resourceManager == null)
            throw new IllegalArgumentException();

        if (name == null)
            throw new IllegalArgumentException();

        if (btreeCounters == null)
            throw new IllegalArgumentException();
        
        this.resourceManager = resourceManager;

        this.commitTime = commitTime;
        
        this.name = name;

        // eager resolution to put a SoftReference in place.
        final BTree btree = getBTree();

        // index metadata for that index partition.
        indexMetadata = btree.getIndexMetadata();

        // index partition metadata
        pmd = indexMetadata.getPartitionMetadata();

        if (pmd == null)
            log.warn("Not an index partition: " + name);

        // #of sources in the view (very fast).
        int sourceCount = 0, sourceJournalCount = 0, sourceSegmentCount = 0;
        long sumSegBytes = 0L;
        boolean secondSourceIsSeg = false;
        if (pmd != null) {
            for (IResourceMetadata x : pmd.getResources()) {
                sourceCount++;
                if (x.isJournal()) {
                    sourceJournalCount++;
                } else {
                    sourceSegmentCount++;
                    /*
                     * Note: This opens the backing segment store in order to
                     * determine its size on the disk. This is a fairly light
                     * weight operation (the nodes region is not read, just the
                     * checkpoint record and the IndexMetadata record).
                     * 
                     * Note: This does not use IResourceMetadata#getFile() to
                     * determine the size of the file in the backing file system
                     * because it is not an absolute file path.
                     */
                    final IRawStore store = resourceManager.openStore(x.getUUID());
                    sumSegBytes += store.size();//new File(x.getFile()).length();
                    if (sourceCount == 2) {
                        secondSourceIsSeg = true;
                    }
                }
            }
        }
        this.sourceCount = sourceCount;
        this.sourceJournalCount = sourceJournalCount;
        this.sourceSegmentCount = sourceSegmentCount;
        this.sumSegBytes = sumSegBytes;
        this.compactView = sourceCount == 2 && secondSourceIsSeg;

        if (sourceJournalCount + sourceSegmentCount < 2) {
            /*
             * Nothing to merge. The build priority is 1.0 for this case. The
             * maximum build priority for an index partition with a non-zero
             * mergePriority will always be less than one.
             */
            this.mergePriority = 0d;
            this.buildPriority = 1d;
        } else {
            /*
             * Compute a score that will be used to prioritize compacting merges
             * vs builds for index partitions where either option is allowable.
             * The higher the score, the more we want to make sure that we do a
             * compacting merge for that index.
             * 
             * Note: The main purpose of an index partition build is to convert
             * from a write-order to a read-order and permit the release of the
             * old journal. However, applications which require frequent access
             * to historical commit points on the old journals will continue to
             * rely on the write-order journals.
             * 
             * Note: I have removed the sumSegBytes term from this formula since
             * that would tend to cause the priority of merge to increase for a
             * view until a split is performed, with the likelihood that
             * repeated merges would be performed for the same view just when it
             * is nearing its largest extent. Instead I have modified the
             * formula to consider only the view complexity for merge.
             * 
             * @todo if the application requires access to modest amounts of
             * history then consider a policy where the buffers are retained for
             * old journals up to the minReleaseAge. Of course, this can run
             * into memory constraints so that needs to be traded off against
             * IOWAIT.
             */
            this.mergePriority = (sourceJournalCount - 1) * A
                    + (sourceSegmentCount * B)
                    //+ ((sumSegBytes / resourceManager.nominalShardSize) * C)
                    ;
            this.buildPriority = 1. / mergePriority;
        }

//        /*
//         * The splitPriority considers only sumSegBytes.
//         * 
//         * @todo This does not consider the adjustedNominalShardSize.
//         */
//        this.splitPriority = (sumSegBytes < resourceManager.nominalShardSize) ? 0
//                : (sumSegBytes / (double) resourceManager.nominalShardSize);

        this.mandatoryMerge //
            =  sourceJournalCount >= resourceManager.maximumJournalsPerView //
            || sourceSegmentCount >= resourceManager.maximumSegmentsPerView //
        ;
        
        // BTree's directly maintained entry count (very fast).
        this.entryCount = btree.getEntryCount();

        this.btreeCounters = btreeCounters;

        // Note: +1 in the denominator to avoid divide by zero.
        this.percentHeadSplits = btreeCounters.headSplit
                / (btreeCounters.leavesSplit + 1d);

        // Note: +1 in the denominator to avoid divide by zero.
        this.percentTailSplits = btreeCounters.tailSplit
                / (btreeCounters.leavesSplit + 1d);

    }
        
    public String toString() {

        final StringBuilder sb = new StringBuilder();

        sb.append("name=" + name);

        sb.append(", action=" + actionRef.get());
        
        sb.append(", entryCount=" + entryCount);

        sb.append(", sumSegBytes=" + sumSegBytes);

        sb.append(", mergePriority=" + mergePriority);

//        sb.append(", splitPriority=" + splitPriority);

        sb.append(", manditoryMerge=" + mandatoryMerge);
        
        sb.append(", sourceCounts=" + "{all=" + sourceCount + ",journals="
                + sourceJournalCount + ",segments=" + sourceSegmentCount + "}");
        
        sb.append(", #leafSplit=" + btreeCounters.leavesSplit);
        
        sb.append(", #headSplit=" + btreeCounters.headSplit);
        
        sb.append(", #tailSplit=" + btreeCounters.tailSplit);

        sb.append(", percentHeadSplits=" + percentHeadSplits);

        sb.append(", percentTailSplits=" + percentTailSplits);
        
        toString(sb);

        return sb.toString();

    }

    /**
     * Permits extension of {@link #toString()} in subclass.
     * 
     * @param sb
     */
    protected void toString(final StringBuilder sb) {
        
        // NOP
        
    }
    
}