PageStats.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.btree;

import com.bigdata.btree.data.IAbstractNodeData;
import com.bigdata.btree.data.ILeafData;
import com.bigdata.rawstore.IRawStore;

/**
 * Class reports various summary statistics for nodes and leaves.
 */
abstract public class PageStats extends BaseIndexStats {

    /** Number of nodes/leaves visited so far. */
    public long nvisited;
    /** The #of bytes in the raw records for the nodes visited. */
    public long nodeBytes;
    /** The #of bytes in the raw records for the leaves visited. */
    public long leafBytes;
    /** The min/max bytes per node. */
    public long minNodeBytes, maxNodeBytes;
    /** The min/max bytes per leaf. */
    public long minLeafBytes, maxLeafBytes;
    /**
     * Histogram of the allocation slot sizes based on {@link #SLOT_SIZES}. The
     * indices into this array are correlated with the indices into the
     * {@link #SLOT_SIZES} array. If the allocation is larger than the maximum
     * value in {@link #SLOT_SIZES}, then it is recorded in {@link #blobs}
     * instead.
     */
    public final long[] histogram;
    /**
     * The #of allocations that are larger than the maximum slot size in
     * {@link #SLOT_SIZES}.
     */
    public long blobs;
    /** The #of errors encountered during traversal. */
    public long nerrors;
    /**
     * This map is used to report the histogram of pages based on the actual
     * byte count of the user data in the allocation when the backing slot size
     * is not directly available. Allocations
     */
    public static final int[] SLOT_SIZES = new int[] { 64, 128, 192, 320, 512,
            768, 1024, 2048, 3072, 4096, 8192 };
    /**
     * The number of raw record allocations and the byte size of those raw
     * record allocations.
     * 
     * TODO We could also use a histogram over this information (raw records
     * sizes).
     */
    public long nrawRecs = 0, rawRecBytes;

    public PageStats() {

        histogram = new long[SLOT_SIZES.length];

    }

    /**
     * Track the histogram of allocation sizes.
     * 
     * @param allocationSize
     *            The size of some allocation.
     * 
     * @see #histogram
     * @see #blobs
     * @see #SLOT_SIZES
     */
    protected void trackSlotSize(final long allocationSize) {

        for (int i = 0; i < SLOT_SIZES.length; i++) {

            if (allocationSize <= SLOT_SIZES[i]) {

                histogram[i]++;

                return;

            }

        }

        blobs++;

    }

    /** Return {@link #nodeBytes} plus {@link #leafBytes}. */
    public long getTotalBytes() {
        return nodeBytes + leafBytes + rawRecBytes;
    }

    /** The average bytes per node. */
    public long getBytesPerNode() {
        return (nnodes == 0 ? 0 : nodeBytes / nnodes);
    }

    /** The average bytes per leaf. */
    public long getBytesPerLeaf() {
        return (nleaves == 0 ? 0 : leafBytes / nleaves);
    }

    /** The average bytes per raw record. */
    public long getBytesPerRawRecord() {
        return (nrawRecs== 0 ? 0 : rawRecBytes / nrawRecs);
    }

    public String toString() {
        final StringBuilder sb = new StringBuilder();
        sb.append(getClass().getName());
        sb.append("{indexType=" + indexType);
        sb.append(",m=" + m);
        sb.append(",nnodes=" + nnodes);
        sb.append(",nleaves=" + nleaves);
        sb.append(",nrawRecs=" + nrawRecs);
        sb.append(",nodeBytes=" + nodeBytes);
        sb.append(",minNodeBytes=" + minNodeBytes);
        sb.append(",maxNodeBytes=" + maxNodeBytes);
        sb.append(",leafBytes=" + leafBytes);
        sb.append(",minLeafBytes=" + minLeafBytes);
        sb.append(",maxLeafBytes=" + maxLeafBytes);
        sb.append(",rawRecBytes=" + rawRecBytes);
        sb.append(",bytesPerNode=" + getBytesPerNode());
        sb.append(",bytesPerLeaf=" + getBytesPerLeaf());
        sb.append(",bytesPerRawRec=" + getBytesPerRawRecord());
        sb.append(",nerrors=" + nerrors);
        final long npages = (nleaves + nnodes);
        for (int i = 0; i < SLOT_SIZES.length; i++) {
            final long slotsThisSize = histogram[i];
            final double percentSlotSize = ((double) slotsThisSize) / npages;
            sb.append(",slot_" + SLOT_SIZES[i] + "=" + round(percentSlotSize));
        }
        {
            final double percentBlobs = ((double) blobs) / npages;
            sb.append(",blobs=" + round(percentBlobs));
        }
        sb.append(",newM=" + getRecommendedBranchingFactor());
        sb.append("}");
        return sb.toString();
    }
    
    /**
     * Round the value to 2 decimal points.
     */
    private static double round(final double d) {

        return ((int) (100 * d)) / 100d;

    }

    /**
     * Return the header row for a table.
     * 
     * @return The header row.
     */
    @Override
    public String getHeaderRow() {

        final StringBuilder sb = new StringBuilder();

        sb.append("name");
        sb.append('\t');
        sb.append("indexType");
        sb.append('\t');
        sb.append("m");
        sb.append('\t');
        sb.append("height");
        sb.append('\t');
        sb.append("nnodes");
        sb.append('\t');
        sb.append("nleaves");
        sb.append('\t');
        sb.append("nentries");
        sb.append('\t');
        sb.append("nrawRecs");
        sb.append('\t');
        sb.append("nerrors");
        sb.append('\t');
        sb.append("nodeBytes");
        sb.append('\t');
        sb.append("leafBytes");
        sb.append('\t');
        sb.append("rawRecBytes");
        sb.append('\t');
        sb.append("totalBytes");
        sb.append('\t');
        sb.append("avgNodeBytes");
        sb.append('\t');
        sb.append("avgLeafBytes");
        sb.append('\t');
        sb.append("avgRawRecBytes");
        sb.append('\t');
        sb.append("minNodeBytes");
        sb.append('\t');
        sb.append("maxNodeBytes");
        sb.append('\t');
        sb.append("minLeafBytes");
        sb.append('\t');
        sb.append("maxLeafBytes");
        
        // One column for each slot size in the histogram. The data are
        // written out as percentages.
        for (int i = 0; i < PageStats.SLOT_SIZES.length; i++) {
            sb.append('\t');
            sb.append(PageStats.SLOT_SIZES[i]);
        }
        
        // #of blob allocations.
        sb.append('\t');
        sb.append("blobs");

        // Recommended branching factor for the index.
        sb.append('\t');
        sb.append("newM");

        // Current branching factor.
        sb.append('\t');
        sb.append("curM");

        return sb.toString();
    }

    /**
     * Return a row of data for an index as aggregated by this {@link PageStats}
     * object.
     * 
     * @see #getHeaderRow()
     */
    @Override
    public String getDataRow() {

        final PageStats stats = this;

        final StringBuilder sb = new StringBuilder();

        sb.append(name);
        sb.append('\t');
        sb.append(indexType);
        sb.append('\t');
        sb.append(stats.m);
        sb.append('\t');
        sb.append(stats.height);
        sb.append('\t');
        sb.append(stats.nnodes);
        sb.append('\t');
        sb.append(stats.nleaves);
        sb.append('\t');
        sb.append(stats.ntuples);
        sb.append('\t');
        sb.append(stats.nrawRecs);
        sb.append('\t');
        sb.append(stats.nerrors);
        sb.append('\t');
        sb.append(stats.nodeBytes);
        sb.append('\t');
        sb.append(stats.leafBytes);
        sb.append('\t');
        sb.append(stats.rawRecBytes);
        sb.append('\t');
        sb.append(stats.getTotalBytes());
        sb.append('\t');
        sb.append(stats.getBytesPerNode());
        sb.append('\t');
        sb.append(stats.getBytesPerLeaf());
        sb.append('\t');
        sb.append(stats.getBytesPerRawRecord());
        sb.append('\t');
        sb.append(stats.minNodeBytes);
        sb.append('\t');
        sb.append(stats.maxNodeBytes);
        sb.append('\t');
        sb.append(stats.minLeafBytes);
        sb.append('\t');
        sb.append(stats.maxLeafBytes);
        
        final long npages = (stats.nleaves + stats.nnodes);

        for (int i = 0; i < PageStats.SLOT_SIZES.length; i++) {
            sb.append('\t');
            final long slotsThisSize = stats.histogram[i];
            final double percentSlotSize = ((double) slotsThisSize) / npages;
            sb.append(percentSlotSize);
        }
        
        sb.append('\t');
        sb.append(((double) stats.blobs) / npages);

        sb.append('\t');
        sb.append(stats.getRecommendedBranchingFactor());

        sb.append('\t');
        sb.append(stats.m);

        return sb.toString();
    }

    /**
     * This computes the recommended branching factor for the index based on an
     * examination of the current branching factor, an assumed nominal page size
     * of 8k, the min, max, and average node and leaf sizes, and the histogram
     * of the allocation sizes for the index.
     * 
     * @return
     * 
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/592">
     *      Optimize RWStore allocator sizes </a>
     * 
     *      TODO This says "branching factor", but {@link #m} is overloaded and
     *      is the branching factor for the BTree and the addressBits for the
     *      HTree. This method should be renamed (or moved into the concrete
     *      instances of this class).
     */
    abstract public int getRecommendedBranchingFactor();
    
    /**
     * Visit a node or leaf, updating the {@link PageStats}.
     * <p>
     * Note: This method MUST be extended to capture at least the initialization
     * of the {@link #ntuples}, {@link #nnodes}, {@link #nleaves}, and
     * {@link #m} fields.
     * 
     * @param ndx
     *            The index.
     * @param node
     *            A node or leaf in that index.
     */
    public void visit(final ISimpleTreeIndexAccess ndx,
            final IAbstractNodeData node) {

        final PageStats stats = this;

        if (stats.nvisited == 0) {

            stats.name = ((ICheckpointProtocol) ndx).getIndexMetadata()
                    .getName();

            stats.indexType = ((ICheckpointProtocol) ndx).getCheckpoint()
                    .getIndexType();
            
        }

        final IIdentityAccess po = (IIdentityAccess) node;

        if (po.isPersistent()) {

            /*
             * We can only report on storage for persistent nodes (those
             * associated with an address in a backing store). This test
             * works around a problem that would otherwise exist if you
             * attempt to collect the PageStats on an index that has not
             * been checkpointed.
             */
            
            final long addrSelf = po.getIdentity();

            final IRawStore store = ndx.getStore();

            final long nbytes = store.getByteCount(addrSelf);

            stats.trackSlotSize(nbytes);

            final boolean isLeaf = node.isLeaf();

            if (isLeaf) {

                stats.nleaves++;
                stats.leafBytes += nbytes;
                if (stats.minLeafBytes > nbytes || stats.minLeafBytes == 0)
                    stats.minLeafBytes = nbytes;
                if (stats.maxLeafBytes < nbytes)
                    stats.maxLeafBytes = nbytes;

                if (node instanceof ILeafData) {
                    final ILeafData data = (ILeafData) node;
                    if(data.hasRawRecords()) {
                        for (int i = 0; i < data.getKeys().size(); i++) {
                            final long rawAddr = data.getRawRecord(i);
                            if (rawAddr != IRawStore.NULL) {
                                stats.nrawRecs++;
                                stats.rawRecBytes += store
                                        .getByteCount(rawAddr);
                            }
                        }
                    }
                }

            } else {

                stats.nnodes++;
                stats.nodeBytes += nbytes;
                if (stats.minNodeBytes > nbytes || stats.minNodeBytes == 0)
                    stats.minNodeBytes = nbytes;
                if (stats.maxNodeBytes < nbytes)
                    stats.maxNodeBytes = nbytes;

            }

        } // if(isPersistent())

        stats.nvisited++;
        
    }

}