MarkovCostModel.java example

Explorer
h-store-master
package edu.brown.costmodel;

import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.types.QueryType;
import org.voltdb.utils.Pair;

import edu.brown.catalog.CatalogUtil;
import edu.brown.hstore.HStoreConstants;
import edu.brown.hstore.conf.HStoreConf;
import edu.brown.hstore.estimators.Estimate;
import edu.brown.hstore.estimators.markov.MarkovEstimate;
import edu.brown.hstore.estimators.markov.MarkovEstimator;
import edu.brown.hstore.estimators.markov.MarkovEstimatorState;
import edu.brown.hstore.txns.TransactionUtil;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.markov.EstimationThresholds;
import edu.brown.markov.MarkovGraph;
import edu.brown.markov.MarkovProbabilityCalculator;
import edu.brown.markov.MarkovUtil;
import edu.brown.markov.MarkovVertex;
import edu.brown.markov.containers.MarkovGraphsContainerUtil;
import edu.brown.markov.containers.MarkovGraphsContainer;
import edu.brown.profilers.ProfileMeasurement;
import edu.brown.statistics.ObjectHistogram;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.PartitionEstimator;
import edu.brown.utils.PartitionSet;
import edu.brown.utils.StringUtil;
import edu.brown.utils.ThreadUtil;
import edu.brown.workload.TransactionTrace;
import edu.brown.workload.Workload;
import edu.brown.workload.filters.Filter;

public class MarkovCostModel extends AbstractCostModel {
    private static final Logger LOG = Logger.getLogger(MarkovCostModel.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }

    public enum MarkovOptimization {
        OP1_BASEPARTITION, OP2_PARTITIONS, OP3_ABORTS, OP4_FINISHED;

        private final Set<Penalty> penalties = new TreeSet<Penalty>();
    }

    /**
     * Cost Model Penalties
     */
    public enum Penalty {
        // ----------------------------------------------------------------------------
        // OP1_BASEPARTITION
        // ----------------------------------------------------------------------------
        /**
         * The most access partition is not the same as the PartitionEstimator's
         * base partition
         */
        WRONG_BASE_PARTITION(MarkovOptimization.OP1_BASEPARTITION, 0.5d),

        // ----------------------------------------------------------------------------
        // OP2_PARTITIONS
        // ----------------------------------------------------------------------------
        /**
         * The transaction did not declare it would read at a partition
         */
        MISSED_READ_PARTITION(MarkovOptimization.OP2_PARTITIONS, 0.5d),
        /**
         * The transaction did not declare it would write at a partition
         */
        MISSED_WRITE_PARTITION(MarkovOptimization.OP2_PARTITIONS, 0.5d),
        /**
         * The transaction said it was going to read at a partition but it never
         * did And it would have executed as single-partitioned if we didn't say
         * it was going to!
         */
        UNUSED_READ_PARTITION_SINGLE(MarkovOptimization.OP2_PARTITIONS, 0.5d),
        /**
         * The transaction said it was going to write at a partition but it
         * never did And it would have executed as single-partitioned if we
         * didn't say it was going to!
         */
        UNUSED_WRITE_PARTITION_SINGLE(MarkovOptimization.OP2_PARTITIONS, 0.5d),
        /**
         * The transaction said it was going to read at a partition but it never
         * did
         */
        UNUSED_READ_PARTITION_MULTI(MarkovOptimization.OP2_PARTITIONS, 0.1d),
        /**
         * The transaction said it was going to write at a partition but it
         * never did
         */
        UNUSED_WRITE_PARTITION_MULTI(MarkovOptimization.OP2_PARTITIONS, 0.1d),

        // ----------------------------------------------------------------------------
        // OP3_ABORTS
        // ----------------------------------------------------------------------------
        /**
         * The transaction is single-partitioned and it aborts when we predicted
         * that it wouldn't. Evan says that this is the worst!
         */
        MISSED_ABORT_SINGLE(MarkovOptimization.OP3_ABORTS, 1.0d),
        /**
         * The transaction is multi-partitioned and it aborts when we predicted
         * that it wouldn't.
         */
        MISSED_ABORT_MULTI(MarkovOptimization.OP3_ABORTS, 0.8d),
        /**
         * A transaction will never abort after a certain point in its
         * execution, but we failed to identify that we could have disabled undo
         * buffers
         */
        MISSED_NO_UNDO_BUFFER(MarkovOptimization.OP3_ABORTS, 0.25d),

        // ----------------------------------------------------------------------------
        // OP4_FINISHED
        // ----------------------------------------------------------------------------
        /**
         * The transaction goes back to read at a partition after it declared it
         * was done with it
         */
        RETURN_READ_PARTITION(MarkovOptimization.OP4_FINISHED, 0.25d),
        /**
         * The transaction goes back to write at a partition after it declared
         * it was done with it
         */
        RETURN_WRITE_PARTITION(MarkovOptimization.OP4_FINISHED, 0.25d),
        /**
         * The transaction is done with a partition but we don't identify it
         * until later in the execution path
         */
        LATE_DONE_PARTITION(MarkovOptimization.OP4_FINISHED, 0.05d), ;

        private final double cost;
        private final MarkovOptimization group;

        private Penalty(MarkovOptimization group, double cost) {
            this.group = group;
            this.group.penalties.add(this);
            this.cost = cost;
        }

        public double getCost() {
            return this.cost;
        }

        public MarkovOptimization getGroup() {
            return this.group;
        }
    }

    // ----------------------------------------------------------------------------
    // DATA MEMBERS
    // ----------------------------------------------------------------------------

    private final CatalogContext catalogContext;
    private final EstimationThresholds thresholds;
    private final MarkovEstimator t_estimator;
    private boolean force_full_comparison = false;
    private boolean force_regenerate_markovestimates = false;

    private ObjectHistogram<Procedure> fast_path_counter = new ObjectHistogram<Procedure>();
    private ObjectHistogram<Procedure> full_path_counter = new ObjectHistogram<Procedure>();

    // ----------------------------------------------------------------------------
    // INVOCATION DATA MEMBERS
    // ----------------------------------------------------------------------------

    /**
     * The list of penalties accrued for this transaction
     */
    private transient final List<Penalty> penalties = new ArrayList<Penalty>();

    private transient final PartitionSet done_partitions = new PartitionSet();
    private transient final ObjectHistogram<Integer> idle_partition_ctrs = new ObjectHistogram<Integer>();

    private transient final PartitionSet e_all_partitions = new PartitionSet();
    private transient final PartitionSet e_read_partitions = new PartitionSet();
    private transient final PartitionSet e_write_partitions = new PartitionSet();

    private transient final PartitionSet a_all_partitions = new PartitionSet();
    private transient final PartitionSet a_read_partitions = new PartitionSet();
    private transient final PartitionSet a_write_partitions = new PartitionSet();

    /**
     * Constructor
     * 
     * @param catalogContext
     * @param p_estimator
     */
    public MarkovCostModel(CatalogContext catalogContext, PartitionEstimator p_estimator, MarkovEstimator t_estimator, EstimationThresholds thresholds) {
        super(MarkovCostModel.class, catalogContext, p_estimator);
        this.catalogContext = catalogContext;
        this.thresholds = thresholds;
        this.t_estimator = t_estimator;

        assert (this.t_estimator != null) : "Missing TransactionEstimator";
    }

    /**
     * Get the penalties for the last TransactionTrace processed Not thread-safe
     * 
     * @return
     */
    protected List<Penalty> getLastPenalties() {
        return this.penalties;
    }

    protected PartitionSet getLastEstimatedAllPartitions() {
        return (this.e_all_partitions);
    }

    protected PartitionSet getLastEstimatedReadPartitions() {
        return (this.e_read_partitions);
    }

    protected PartitionSet getLastEstimatedWritePartitions() {
        return (this.e_write_partitions);
    }

    protected PartitionSet getLastActualAllPartitions() {
        return (this.a_all_partitions);
    }

    protected PartitionSet getLastActualReadPartitions() {
        return (this.a_read_partitions);
    }

    protected PartitionSet getLastActualWritePartitions() {
        return (this.a_write_partitions);
    }

    protected MarkovCostModel forceFullPathComparison() {
        this.force_full_comparison = true;
        return (this);
    }

    protected MarkovCostModel forceRegenerateMarkovEstimates() {
        this.force_regenerate_markovestimates = true;
        return (this);
    }

    @Override
    public synchronized double estimateTransactionCost(CatalogContext catalogContext, Workload workload, Filter filter, TransactionTrace txn_trace) throws Exception {
        // Throw the txn at the estimator and let it come up with the initial
        // path estimation.
        // Now execute the queries and see what path the txn actually takes
        // I don't think it matters whether we do this in batches, but it
        // probably doesn't hurt
        // to do it right in case we need it later
        // At this point we know what the transaction actually would do using
        // the TransactionEstimator's
        // internal Markov models.
        MarkovEstimatorState s = this.t_estimator.processTransactionTrace(txn_trace);
        assert (s != null);
        Procedure catalog_proc = txn_trace.getCatalogItem(catalogContext.database);

        if (debug.val) {
            LOG.debug("Measuring MarkovEstimate Accuracy: " + txn_trace);
            if (trace.val) {
                LOG.trace("Estimated: " + ((MarkovEstimate)s.getInitialEstimate()).getMarkovPath());
                LOG.trace("Actual:    " + s.getActualPath());
            }
        }

        double cost = 0.0d;

        this.e_read_partitions.clear();
        this.e_write_partitions.clear();
        MarkovEstimate initialEst = s.getInitialEstimate();
        assert(initialEst != null);
        assert(initialEst.isInitialized());
        for (Integer partition : initialEst.getTouchedPartitions(this.thresholds)) {
            if (initialEst.isDonePartition(this.thresholds, partition.intValue()) == false) {
                for (MarkovVertex v : initialEst.getMarkovPath()) {
                    if (v.getPartitions().contains(partition) == false)
                        continue;
                    if (((Statement) v.getCatalogItem()).getReadonly()) {
                        this.e_read_partitions.add(partition);
                    } else {
                        this.e_write_partitions.add(partition);
                    }
                } // FOR
            }
        } // FOR

        List<MarkovVertex> initialPath = initialEst.getMarkovPath();
        List<MarkovVertex> actualPath = s.getActualPath();
        this.a_read_partitions.clear();
        this.a_write_partitions.clear();
        MarkovUtil.getReadWritePartitions(actualPath, this.a_read_partitions, this.a_write_partitions);

        // Try fast version
        try {
            if (this.force_full_comparison || !this.comparePathsFast(CollectionUtil.last(initialPath), actualPath)) {
                // Otherwise we have to do the full path comparison to figure
                // out just how wrong we are
                cost = this.comparePathsFull(s);
                this.full_path_counter.put(catalog_proc);
            } else {
                this.fast_path_counter.put(catalog_proc);
            }
        } catch (Throwable ex) {
            System.err.println(txn_trace.debug(catalogContext.database));
            System.err.println("COST = " + cost);
            System.err.println("BASE PARTITION = " + s.getBasePartition());
            System.err.println("PENALTIES = " + this.penalties);
            System.err.println("ESTIMATED PARTITIONS: " + this.e_all_partitions);
            System.err.println("ACTUAL PARTITIONS: " + this.a_all_partitions);
            System.err.println("MARKOV GRAPH: " + MarkovUtil.exportGraphviz(s.getMarkovGraph(), true, null).writeToTempFile(catalog_proc));
            System.err.println();

            String e_path = "ESTIMATED PATH:\n" + StringUtil.join("\n", initialEst.getMarkovPath());
            String a_path = "ACTUAL PATH:\n" + StringUtil.join("\n", s.getActualPath());
            System.err.println(StringUtil.columns(e_path, a_path));
            System.err.println("MARKOV ESTIMATE:\n" + s.getInitialEstimate());

            throw new RuntimeException(ex);
        }

        this.t_estimator.destroyEstimatorState(s);
        return (cost);
    }

    /**
     * Quickly compare the two paths and return true if they are similar enough
     * We don't care if they execute different queries just as long as the
     * read/write partitions match and that the estimated path either
     * commits/aborts as same as actual path
     * 
     * @param estimated
     * @param actual
     * @return
     */
    protected boolean comparePathsFast(List<MarkovVertex> estimated, List<MarkovVertex> actual) {
        if (trace.val)
            LOG.trace(String.format("Fast Path Compare: Estimated [size=%d] vs. Actual [size=%d]", estimated.size(), actual.size()));
        this.e_read_partitions.clear();
        this.e_write_partitions.clear();
        MarkovUtil.getReadWritePartitions(estimated, this.e_read_partitions, this.e_write_partitions);
        this.a_read_partitions.clear();
        this.a_write_partitions.clear();
        MarkovUtil.getReadWritePartitions(actual, this.a_read_partitions, this.a_write_partitions);
        return (this.comparePathsFast(CollectionUtil.last(estimated), actual));
    }

    /**
     * @param e_last
     * @param actual
     * @return
     */
    private boolean comparePathsFast(MarkovVertex e_last, List<MarkovVertex> actual) {
        // (1) Check that the MarkovEstimate's last state matches the actual
        // path (commit vs abort)
        assert (e_last != null);
        MarkovVertex a_last = CollectionUtil.last(actual);
        assert (a_last != null);
        assert (a_last.isEndingVertex());
        if (trace.val) {
            LOG.trace("Estimated Last Vertex: " + e_last);
            LOG.trace("Actual Last Vertex: " + a_last);
        }
        if (e_last.getType() != a_last.getType()) {
            return (false);
        }

        // (2) Check that the partitions that we predicted that the txn would
        // read/write are the same
        if (trace.val) {
            LOG.trace("Estimated Read Partitions:  " + this.e_read_partitions);
            LOG.trace("Estimated Write Partitions: " + this.e_write_partitions);
            LOG.trace("Actual Read Partitions:     " + this.a_read_partitions);
            LOG.trace("Actual Write Partitions:    " + this.a_write_partitions);
        }

        if (this.e_read_partitions.equals(this.a_read_partitions) == false ||
            this.e_write_partitions.equals(this.a_write_partitions) == false) {
            return (false);
        }

        // All clear!
        return (true);
    }

    /**
     * Calculate relative cost difference the estimated and actual execution
     * paths
     * 
     * @param estimated
     * @param actual
     * @return
     */
    protected double comparePathsFull(MarkovEstimatorState s) {
        if (debug.val)
            LOG.debug("Performing full comparison of Transaction #" + s.getTransactionId());

        this.penalties.clear();

        MarkovEstimate initialEst = s.getInitialEstimate();
        List<MarkovVertex> estimated = initialEst.getMarkovPath();
        this.e_all_partitions.clear();
        this.e_all_partitions.addAll(this.e_read_partitions);
        this.e_all_partitions.addAll(this.e_write_partitions);
        MarkovVertex e_last = CollectionUtil.last(estimated);
        assert (e_last != null);

        List<MarkovVertex> actual = s.getActualPath();
        this.a_all_partitions.clear();
        this.a_all_partitions.addAll(this.a_read_partitions);
        this.a_all_partitions.addAll(this.a_write_partitions);
        MarkovVertex a_last = CollectionUtil.last(actual);
        assert (a_last != null);
        assert (a_last.isEndingVertex());

        MarkovEstimate initial_est = s.getInitialEstimate();
        assert (initial_est != null);
        MarkovEstimate last_est = s.getLastEstimate();
        assert (last_est != null);
        MarkovGraph markov = s.getMarkovGraph();
        assert (markov != null);

        final int base_partition = s.getBasePartition();
        final int num_estimates = s.getEstimateCount();
        List<Estimate> estimates = null;

        // This is strictly for the paper so that we can show how slow it would
        // be to have calculate probabilities through a traversal for each batch
        if (this.force_regenerate_markovestimates) {
            if (debug.val) {
                String name = TransactionUtil.formatTxnName(markov.getProcedure(), s.getTransactionId());
                LOG.debug("Using " + MarkovProbabilityCalculator.class.getSimpleName() + " to calculate MarkoEstimates for " + name);
            }
            estimates = new ArrayList<Estimate>();
            for (Estimate e : s.getEstimates()) {
                MarkovEstimate est = (MarkovEstimate)e; 
                MarkovVertex v = est.getVertex();
                MarkovEstimate new_est = MarkovProbabilityCalculator.generate(this.catalogContext, markov, v);
                assert (new_est != null);
                estimates.add(est);
            } // FOR
        } else {
            estimates = s.getEstimates();
        }

        boolean e_singlepartitioned = initial_est.isSinglePartitioned(this.thresholds);
        boolean a_singlepartitioned = (this.a_all_partitions.size() == 1);

        boolean first_penalty = true;

        if (trace.val) {
            LOG.trace("Estimated Read Partitions:  " + this.e_read_partitions);
            LOG.trace("Estimated Write Partitions: " + this.e_write_partitions);
            LOG.trace("Actual Read Partitions:     " + this.a_read_partitions);
            LOG.trace("Actual Write Partitions:    " + this.a_write_partitions);
        }

        // ----------------------------------------------------------------------------
        // BASE PARTITION
        // ----------------------------------------------------------------------------
        PartitionSet most_touched = initial_est.getMostTouchedPartitions(this.thresholds);
        Integer e_base_partition = null;
        if (most_touched.size() > 1) {
            e_base_partition = CollectionUtil.random(most_touched);
        } else {
            e_base_partition = CollectionUtil.first(most_touched);
        }
        if (e_base_partition == null || e_base_partition != base_partition) {
            if (trace.val) {
                LOG.trace(String.format("Estimated base partition for txn #%d was %d but PartitionEstimator says it should be %d", s.getTransactionId(), e_base_partition, base_partition));
            }
            this.penalties.add(Penalty.WRONG_BASE_PARTITION);
            // assert(false) : e_base_partition + " != " + base_partition + "  "
            // + most_touched;
        }

        // ----------------------------------------------------------------------------
        // ABORTS
        // If the transaction was predicted to be single-partitioned and we
        // don't predict that it's going to
        // abort when it actually did, then that's bad! Really bad!
        // ----------------------------------------------------------------------------
        first_penalty = true;
        if (initial_est.isAbortable(this.thresholds) == false && a_last.isAbortVertex()) {
            if (trace.val) {
                if (first_penalty) {
                    LOG.trace("PENALTY: " + MarkovOptimization.OP3_ABORTS);
                    first_penalty = false;
                }
                LOG.trace(String.format("Txn #%d aborts but we predicted that it would never!", s.getTransactionId()));
            }
            this.penalties.add(a_singlepartitioned ? Penalty.MISSED_ABORT_SINGLE : Penalty.MISSED_ABORT_MULTI);
        }

        // For each MarkovEstimate, check whether there is a path in the graph
        // for the current vertex
        // to the abort state. If there isn't, then we need to check whether
        // This should match ExecutionSite.executeLocalPlan()
        MarkovVertex abort_v = markov.getAbortVertex();
        boolean last_hadAbortPath = true;
        first_penalty = true;
        for (Estimate e : estimates) {
            MarkovEstimate est = (MarkovEstimate)e;
            assert(est.isInitialized()) : "Uninitialized MarkovEstimate from " + s;
            MarkovVertex v = est.getVertex();
            assert (v != null) : "No vertex?\n" + est;
            boolean isAbortable = est.isAbortable(this.thresholds);
            boolean isReadOnly = est.isReadOnlyPartition(this.thresholds, base_partition);
            boolean hasAbortPath;
            synchronized (markov) {
                hasAbortPath = (markov.getPath(v, abort_v).isEmpty() == false);
            } // SYNCH

            // Make sure that we didn't have a path for the last MarkovEstimate
            // but
            // we somehow have one now
            if (hasAbortPath && last_hadAbortPath == false) {
                LOG.info("MARKOV: " + MarkovUtil.exportGraphviz(markov, false, markov.getPath(v, abort_v)).writeToTempFile());
                assert (last_hadAbortPath);
            }

            // If the path is not empty, then this txn could still abort
            if (hasAbortPath)
                continue;

            // Otherwise check whether our estimate still says to go with undo
            // buffers when
            // we're going to be read-only for the rest of the transaction
            // This is would be considered wasted work
            if (isAbortable && isReadOnly) {
                if (trace.val) {
                    if (first_penalty) {
                        LOG.trace("PENALTY: " + MarkovOptimization.OP3_ABORTS);
                        first_penalty = false;
                    }
                    LOG.trace(String.format("Txn #%d will never abort but we failed to disable undo buffers!", s.getTransactionId()));
                }
                this.penalties.add(Penalty.MISSED_NO_UNDO_BUFFER);
            }
            last_hadAbortPath = false;
        } // FOR

        // ----------------------------------------------------------------------------
        // MISSED PARTITIONS
        // The transaction actually reads/writes at more partitions than it originally predicted
        // This is expensive because it means that we have to abort+restart the txn
        // ----------------------------------------------------------------------------
        first_penalty = true;
        for (Integer p : this.a_read_partitions) {
            if (this.e_read_partitions.contains(p) == false) {
                if (trace.val) {
                    if (first_penalty) {
                        LOG.trace("PENALTY: " + MarkovOptimization.OP2_PARTITIONS);
                        first_penalty = false;
                    }
                    LOG.trace(String.format("Txn #%d failed to predict that it was READING at partition %d", s.getTransactionId(), p));
                }
                this.penalties.add(Penalty.MISSED_READ_PARTITION);
            }
        } // FOR
        for (Integer p : this.a_write_partitions) {
            if (this.e_write_partitions.contains(p) == false) {
                if (trace.val) {
                    if (first_penalty) {
                        LOG.trace("PENALTY: " + MarkovOptimization.OP2_PARTITIONS);
                        first_penalty = false;
                    }
                    LOG.trace(String.format("Txn #%d failed to predict that it was WRITING at partition %d", s.getTransactionId(), p));
                }
                this.penalties.add(Penalty.MISSED_WRITE_PARTITION);
            }
        } // FOR
        // if (this.penalties.size() > 0) {
        // LOG.info("Estimated Read Partitions:  " + this.e_read_partitions);
        // LOG.info("Estimated Write Partitions: " + this.e_write_partitions);
        // LOG.info("Actual Read Partitions:     " + this.a_read_partitions);
        // LOG.info("Actual Write Partitions:    " + this.a_write_partitions);
        //
        // LOG.info("IS ABORTABLE:    " +
        // initial_est.isAbortable(this.thresholds));
        // LOG.info("ABORT THRESHOLD: " + this.thresholds.getAbortThreshold());
        // LOG.info("Current State\n" + actual.get(1).debug());
        // LOG.info("MarkovEstimate\n" + initial_est.toString());
        // // LOG.info("GRAPH: " + MarkovUtil.exportGraphviz(s.getMarkovGraph(),
        // false, true, false, null).writeToTempFile());
        // System.exit(1);
        // }

        // ----------------------------------------------------------------------------
        // RETURN TO PARTITIONS
        // We declared that we were done at a partition but then later we
        // actually needed it. This can happen if there is a path that a has
        // very low probability of us taking it, but then ended up taking it anyway
        //
        // LATE FINISHED PARTITIONS
        // We keep track of the last batch round that we finished with a partition.
        // We then count how long it takes before we realize that we are finished.
        // We declare that the MarkovEstimate was late if we don't mark it as finished 
        // immediately in the next batch
        // ----------------------------------------------------------------------------
        first_penalty = true;
        boolean first_penalty5 = true;

        this.done_partitions.clear();
        int last_est_idx = 0;
        PartitionSet touched_partitions = new PartitionSet();
        PartitionSet new_touched_partitions = new PartitionSet();

        // Reset the idle counters
        this.idle_partition_ctrs.clear();

        for (int i = 0; i < num_estimates; i++) {
            MarkovEstimate est = (MarkovEstimate)estimates.get(i);
            MarkovVertex est_v = est.getVertex();

            // Get the path of vertices
            int start = last_est_idx;
            int stop = actual.indexOf(est_v);
            
            // So this is just a hack so that our test cases still work
            if (stop == -1) {
                LOG.warn("Failed to find MarkovVertex " + est_v + " in path!");
                continue;
            }
            assert(stop != -1);

            new_touched_partitions.clear();
            for (; start <= stop; start++) {
                MarkovVertex v = actual.get(start);
                assert (v != null);

                Statement catalog_stmt = v.getCatalogItem();
                QueryType qtype = QueryType.get(catalog_stmt.getQuerytype());
                Penalty ptype = (qtype == QueryType.SELECT ? Penalty.RETURN_READ_PARTITION : Penalty.RETURN_WRITE_PARTITION);
                for (Integer p : v.getPartitions()) {
                    // Check if we read/write at any partition that was
                    // previously declared as done
                    if (this.done_partitions.contains(p)) {
                        if (trace.val) {
                            if (first_penalty) {
                                LOG.trace("PENALTY: " + MarkovOptimization.OP4_FINISHED);
                                first_penalty = false;
                            }
                            LOG.trace(String.format("Txn #%d said that it was done at partition %d but it executed a %s", s.getTransactionId(), p, qtype.name()));
                        }
                        this.penalties.add(ptype);
                        this.done_partitions.remove(p);
                    }
                } // FOR
                new_touched_partitions.addAll(v.getPartitions());

                // For each partition that we don't touch here, we want to
                // increase their idle counter
                this.idle_partition_ctrs.put(this.catalogContext.getAllPartitionIds());
            } // FOR
            last_est_idx = stop;
            touched_partitions.addAll(new_touched_partitions);

            // This is the key part: We will only add a partition to our set of
            // "done" partitions if we touched it in the past. Otherwise, we will 
            // always mark every partition as done if there is a conditional clause
            // that causes the partition to get touched. This is because our initial 
            // estimation of what partitions we are done at will be based on the total
            // path estimation and not directly on the finished probabilities
            for (Integer finished_p : est.getDonePartitions(this.thresholds)) {
                if (touched_partitions.contains(finished_p)) {
                    // We are late with identifying that a partition is finished
                    // if it was idle for more than one batch round
                    if (this.idle_partition_ctrs.get(finished_p, 0) > 0) {
                        if (trace.val) {
                            if (first_penalty5) {
                                LOG.trace("PENALTY: " + MarkovOptimization.OP4_FINISHED);
                                first_penalty5 = false;
                            }
                            LOG.trace(String.format("Txn #%d kept partition %d idle for %d batch rounds before declaring it was done", s.getTransactionId(), finished_p,
                                    this.idle_partition_ctrs.get(finished_p)));
                        }
                        this.penalties.add(Penalty.LATE_DONE_PARTITION);
                        // Set it to basically negative infinity so that we are
                        // never penalized more than once for this partition
                        // FIXME this.idle_partition_ctrs.put(finished_p, Integer.MIN_VALUE);
                    }
                    if (this.done_partitions.contains(finished_p) == false) {
                        if (trace.val)
                            LOG.trace(String.format("Marking touched partition %d as finished for the first time in MarkovEstimate #%d", finished_p.intValue(), i));
                        this.done_partitions.add(finished_p);
                    }
                }
            } // FOR
        } // FOR

        // ----------------------------------------------------------------------------
        // UNUSED PARTITIONS
        // Check whether the transaction has declared that they would read/write
        // at a partition
        // but then they never actually did so
        // The penalty is higher if it was predicted as multi-partitioned but it
        // was actually single-partitioned
        // ----------------------------------------------------------------------------
        first_penalty = true;
        boolean could_be_singlepartitioned = (e_singlepartitioned == false && a_singlepartitioned == true);
        for (Integer p : this.e_read_partitions) {
            if (this.a_read_partitions.contains(p) == false) {
                if (trace.val) {
                    if (first_penalty) {
                        LOG.trace("PENALTY: " + MarkovOptimization.OP2_PARTITIONS);
                        first_penalty = false;
                    }
                    LOG.trace(String.format("Txn #%d predicted it would READ at partition %d but it never did", s.getTransactionId(), p));
                }
                this.penalties.add(could_be_singlepartitioned ? Penalty.UNUSED_READ_PARTITION_SINGLE : Penalty.UNUSED_READ_PARTITION_MULTI);
            }
        } // FOR
        for (Integer p : this.e_write_partitions) {
            if (this.a_write_partitions.contains(p) == false) {
                if (trace.val) {
                    if (first_penalty) {
                        LOG.trace("PENALTY: " + MarkovOptimization.OP2_PARTITIONS);
                        first_penalty = false;
                    }
                    LOG.trace(String.format("Txn #%d predicted it would WRITE at partition %d but it never did", s.getTransactionId(), p));
                }
                this.penalties.add(could_be_singlepartitioned ? Penalty.UNUSED_WRITE_PARTITION_SINGLE : Penalty.UNUSED_WRITE_PARTITION_MULTI);
            }
        } // FOR

        if (trace.val)
            LOG.info(String.format("Number of Penalties %d: %s", this.penalties.size(), this.penalties));
        double cost = 0.0d;
        for (Penalty p : this.penalties)
            cost += p.getCost();

        // if (this.penalties.isEmpty() == false) throw new
        // RuntimeException("Missed optimizations for " + s.getFormattedName());
        return (cost);
    }

    @Override
    public void clear(boolean force) {
        super.clear(force);
        this.fast_path_counter.clear();
        this.full_path_counter.clear();
        this.penalties.clear();
    }

    @Override
    public void invalidateCache(String catalogKey) {
        // Nothing...
    }

    @Override
    public void prepareImpl(CatalogContext catalogContext) {
        // This is the start of a new run through the workload, so we need to re-init 
        // our PartitionEstimator so that we are getting the proper catalog objects back
        this.p_estimator.initCatalog(catalogContext);
    }

    /**
     * @param args
     */
    @SuppressWarnings("unchecked")
    public static void main(String vargs[]) throws Exception {
        final ArgumentsParser args = ArgumentsParser.load(vargs);
        args.require(ArgumentsParser.PARAM_CATALOG,
                     ArgumentsParser.PARAM_MARKOV,
                     ArgumentsParser.PARAM_WORKLOAD,
                     ArgumentsParser.PARAM_MAPPINGS,
                     ArgumentsParser.PARAM_MARKOV_THRESHOLDS);
        HStoreConf.initArgumentsParser(args);
        final int num_partitions = args.catalogContext.numberOfPartitions;
        final int base_partition = (args.workload_base_partitions.size() == 1 ? CollectionUtil.first(args.workload_base_partitions) : HStoreConstants.NULL_PARTITION_ID);
        final int num_threads = ThreadUtil.getMaxGlobalThreads();
        final boolean stop_on_error = true;
        final boolean force_fullpath = true;
        final boolean force_regenerate = true;
        final boolean skip_processing = false;

        final ObjectHistogram<Procedure> total_h = new ObjectHistogram<Procedure>();
        final ObjectHistogram<Procedure> missed_h = new ObjectHistogram<Procedure>();
        final ObjectHistogram<Procedure> accurate_h = new ObjectHistogram<Procedure>();
        final ObjectHistogram<MarkovOptimization> optimizations_h = new ObjectHistogram<MarkovOptimization>();
        final ObjectHistogram<Penalty> penalties_h = new ObjectHistogram<Penalty>();
        final Map<Procedure, ObjectHistogram<MarkovOptimization>> proc_penalties_h = new ConcurrentHashMap<Procedure, ObjectHistogram<MarkovOptimization>>();

        final AtomicInteger total = new AtomicInteger(0);
        final AtomicInteger failures = new AtomicInteger(0);
        final List<Runnable> runnables = new ArrayList<Runnable>();
        final List<Thread> processing_threads = new ArrayList<Thread>();
        final AtomicInteger thread_finished = new AtomicInteger(0);

        // Only load the MarkovGraphs that we actually need
        final int num_transactions = args.workload.getTransactionCount();
        assert (num_transactions > 0) : "No TransactionTraces";
        final int marker = Math.max(1, (int) (num_transactions * 0.10));
        final Set<Procedure> procedures = args.workload.getProcedures(args.catalog_db);
        PartitionSet partitions = null;
        if (base_partition != HStoreConstants.NULL_PARTITION_ID) {
            partitions = new PartitionSet(base_partition);
        } else {
            partitions = args.catalogContext.getAllPartitionIds();
        }

        final File input_path = args.getFileParam(ArgumentsParser.PARAM_MARKOV);
        final Map<Integer, MarkovGraphsContainer> m = MarkovGraphsContainerUtil.load(args.catalogContext,
                                                                                     input_path, procedures, partitions);
        assert (m != null);
        final boolean global = m.containsKey(MarkovUtil.GLOBAL_MARKOV_CONTAINER_ID);
        final Map<Integer, MarkovGraphsContainer> thread_markovs[] = (Map<Integer, MarkovGraphsContainer>[]) new Map<?, ?>[num_threads];

        // If this is a GLOBAL model, load up a copy for each thread so that
        // there is no thread contention
        if (global && num_threads > 2) {
            LOG.info("Loading multiple copies of GLOBAL MarkovGraphsContainer");
            for (int i = 0; i < num_threads; i++) {
                final int thread_id = i;
                runnables.add(new Runnable() {
                    @Override
                    public void run() {
                        try {
                            thread_markovs[thread_id] = MarkovGraphsContainerUtil.load(args.catalogContext,
                                                                                       input_path, procedures, null);
                        } catch (Throwable ex) {
                            throw new RuntimeException(ex);
                        }
                        assert (thread_markovs[thread_id].containsKey(MarkovUtil.GLOBAL_MARKOV_CONTAINER_ID));
                        LOG.info(String.format("Loading Thread Finished %d / %d", thread_finished.incrementAndGet(), num_threads));
                    }
                });
            } // FOR
            ThreadUtil.runNewPool(runnables, procedures.size());
            thread_finished.set(0);
            runnables.clear();
        } else {
            for (int i = 0; i < num_threads; i++) {
                thread_markovs[i] = m;
            } // FOR
        }

        final PartitionEstimator p_estimator = new PartitionEstimator(args.catalogContext);
        final MarkovCostModel thread_costmodels[][] = new MarkovCostModel[num_threads][num_partitions];
        final ProfileMeasurement profilers[] = new ProfileMeasurement[num_threads];
        final LinkedBlockingDeque<Pair<Integer, TransactionTrace>> queues[] = (LinkedBlockingDeque<Pair<Integer, TransactionTrace>>[]) new LinkedBlockingDeque<?>[num_threads];
        for (int i = 0; i < num_threads; i++) {
            profilers[i] = new ProfileMeasurement("ESTIMATION");
            queues[i] = new LinkedBlockingDeque<Pair<Integer, TransactionTrace>>();
        } // FOR
        LOG.info(String.format("Estimating the accuracy of the MarkovGraphs using %d transactions [threads=%d]", args.workload.getTransactionCount(), num_threads));
        LOG.info("THRESHOLDS: " + args.thresholds);

        // QUEUING THREAD
        final AtomicBoolean queued_all = new AtomicBoolean(false);
        runnables.add(new Runnable() {
            @Override
            public void run() {
                List<TransactionTrace> all_txns = new ArrayList<TransactionTrace>(args.workload.getTransactions());
                Collections.shuffle(all_txns);
                int ctr = 0;
                for (TransactionTrace txn_trace : all_txns) {
                    // Make sure it goes to the right base partition
                    int partition = HStoreConstants.NULL_PARTITION_ID;
                    try {
                        partition = p_estimator.getBasePartition(txn_trace);
                    } catch (Exception ex) {
                        throw new RuntimeException(ex);
                    }
                    assert(partition != HStoreConstants.NULL_PARTITION_ID) : "Failed to get base partition for " + txn_trace + "\n" + txn_trace.debug(args.catalog_db);
                    if (base_partition != HStoreConstants.NULL_PARTITION_ID && base_partition != partition)
                        continue;

                    int queue_idx = (global ? ctr : partition) % num_threads;
                    queues[queue_idx].add(Pair.of(partition, txn_trace));
                    if (++ctr % marker == 0)
                        LOG.info(String.format("Queued %d/%d transactions", ctr, num_transactions));
                } // FOR
                queued_all.set(true);

                // Poke all our threads just in case they finished
                for (Thread t : processing_threads)
                    t.interrupt();
            }
        });

        // PROCESSING THREADS
        for (int i = 0; i < num_threads; i++) {
            final int thread_id = i;
            runnables.add(new Runnable() {
                @Override
                public void run() {
                    Thread self = Thread.currentThread();
                    processing_threads.add(self);

                    Pair<Integer, TransactionTrace> pair = null;
                    final Set<MarkovOptimization> penalty_groups = new HashSet<MarkovOptimization>();
                    final Set<Penalty> penalties = new HashSet<Penalty>();
                    ObjectHistogram<MarkovOptimization> proc_h = null;

                    ProfileMeasurement profiler = profilers[thread_id];
                    assert (profiler != null);

                    MarkovCostModel costmodels[] = thread_costmodels[thread_id];
                    for (int p = 0; p < num_partitions; p++) {
                        MarkovGraphsContainer markovs = (global ? thread_markovs[thread_id].get(MarkovUtil.GLOBAL_MARKOV_CONTAINER_ID) : thread_markovs[thread_id].get(p));
                        MarkovEstimator t_estimator = new MarkovEstimator(args.catalogContext, p_estimator, markovs);
                        costmodels[p] = new MarkovCostModel(args.catalogContext, p_estimator, t_estimator, args.thresholds);
                        if (force_fullpath)
                            thread_costmodels[thread_id][p].forceFullPathComparison();
                        if (force_regenerate)
                            thread_costmodels[thread_id][p].forceRegenerateMarkovEstimates();
                    } // FOR

                    int thread_ctr = 0;
                    while (true) {
                        try {
                            if (queued_all.get()) {
                                pair = queues[thread_id].poll();
                            } else {
                                pair = queues[thread_id].take();

                                // Steal work
                                if (pair == null) {
                                    for (int i = 0; i < num_threads; i++) {
                                        if (i == thread_id)
                                            continue;
                                        pair = queues[i].take();
                                        if (pair != null)
                                            break;
                                    } // FOR
                                }
                            }
                        } catch (InterruptedException ex) {
                            continue;
                        }
                        if (pair == null)
                            break;

                        int partition = pair.getFirst();
                        TransactionTrace txn_trace = pair.getSecond();
                        Procedure catalog_proc = txn_trace.getCatalogItem(args.catalog_db);
                        total_h.put(catalog_proc);
                        if (debug.val)
                            LOG.debug(String.format("Processing %s [%d / %d]", txn_trace, thread_ctr, thread_ctr + queues[thread_id].size()));

                        proc_h = proc_penalties_h.get(catalog_proc);
                        if (proc_h == null) {
                            synchronized (proc_penalties_h) {
                                proc_h = proc_penalties_h.get(catalog_proc);
                                if (proc_h == null) {
                                    proc_h = new ObjectHistogram<MarkovOptimization>();
                                    proc_penalties_h.put(catalog_proc, proc_h);
                                }
                            } // SYNCH
                        }

                        double cost = 0.0d;
                        Throwable error = null;
                        try {
                            profiler.start();
                            if (skip_processing == false) {
                                cost = costmodels[partition].estimateTransactionCost(args.catalogContext, txn_trace);
                            }
                        } catch (Throwable ex) {
                            error = ex;
                        } finally {
                            profiler.stop();
                        }
                        if (error != null) {
                            failures.getAndIncrement();
                            String msg = "Failed to estimate transaction cost for " + txn_trace;
                            if (stop_on_error)
                                throw new RuntimeException(msg, error);
                            LOG.warn(msg, error);
                            continue;
                        }

                        if (cost > 0) {
                            penalty_groups.clear();
                            penalties.clear();
                            for (Penalty p : costmodels[partition].getLastPenalties()) {
                                penalty_groups.add(p.getGroup());
                                penalties.add(p);
                            } // FOR
                            proc_h.put(penalty_groups);
                            optimizations_h.put(penalty_groups);
                            penalties_h.put(penalties);
                            missed_h.put(catalog_proc);
                        } else {
                            accurate_h.put(catalog_proc);
                        }
                        int global_ctr = total.incrementAndGet();
                        if (global_ctr % marker == 0)
                            LOG.info(String.format("Processed %d/%d transactions %s", global_ctr, num_transactions, (failures.get() > 0 ? String.format("[failures=%d]", failures.get()) : "")));
                        thread_ctr++;
                    } // WHILE
                    LOG.info(String.format("Processing Thread Finished %d / %d", thread_finished.incrementAndGet(), num_threads));
                }
            });
        } // FOR
        ThreadUtil.runGlobalPool(runnables);

        Map<Object, String> debugLabels = CatalogUtil.getHistogramLabels(args.catalog_db.getProcedures());

        ObjectHistogram<Procedure> fastpath_h = new ObjectHistogram<Procedure>();
        fastpath_h.setDebugLabels(debugLabels);
        ObjectHistogram<Procedure> fullpath_h = new ObjectHistogram<Procedure>();
        fullpath_h.setDebugLabels(debugLabels);
        ProfileMeasurement total_time = new ProfileMeasurement("ESTIMATION");

        for (int i = 0; i < num_threads; i++) {
            for (int p = 0; p < num_partitions; p++) {
                MarkovCostModel mc = thread_costmodels[i][p];
                fastpath_h.put(mc.fast_path_counter);
                fullpath_h.put(mc.full_path_counter);
                total_time.appendTime(profilers[i]);
            }
        } // FOR

        int accurate_cnt = total.get() - (int) missed_h.getSampleCount();
        assert (accurate_cnt == accurate_h.getSampleCount());

        // ---------------------------------------------------------------------------------------------

        Map<String, Object> m0 = new ListOrderedMap<String, Object>();
        m0.put("PARTITIONS", num_partitions);
        m0.put("FORCE FULLPATH", force_fullpath);
        m0.put("FORCE REGENERATE", force_regenerate);
        m0.put("COMPUTATION TIME", String.format("%.2f ms total / %.2f ms avg", total_time.getTotalThinkTimeMS(), total_time.getAverageThinkTimeMS()));
        m0.put("TRANSACTION COUNTS", total_h.setDebugLabels(debugLabels));

        Map<String, Object> m1 = new ListOrderedMap<String, Object>();
        m1.put("ACCURATE TRANSACTIONS", accurate_h.setDebugLabels(debugLabels));
        m1.put("MISSED TRANSACTIONS", missed_h.setDebugLabels(debugLabels));

        Map<String, Object> m2 = new ListOrderedMap<String, Object>();
        m2.put("FAST PATH", fastpath_h);
        m2.put("FULL PATH", fullpath_h);

        Map<String, Object> m3 = new ListOrderedMap<String, Object>();
        m3.put("ESTIMATE ACCURACY", String.format("%5d / %05d [%.03f]", accurate_cnt, total.get(), (accurate_cnt / (double) total.get())));
        if (failures.get() > 0) {
            m3.put("ERRORS", String.format("%5d / %05d [%.03f]", failures.get(), total_h.getSampleCount(), (failures.get() / (double) total_h.getSampleCount())));
        }

        ListOrderedMap<String, String> m4 = new ListOrderedMap<String, String>();
        final String f = "%5d [%.03f]";
        for (MarkovOptimization pg : MarkovOptimization.values()) {
            long cnt = optimizations_h.get(pg, 0);
            m4.put(pg.toString(), String.format(f, cnt, cnt / (double) total.get()));
            for (Penalty p : pg.penalties) {
                cnt = penalties_h.get(p, 0);
                m4.put(String.format("  + %s", p), String.format(f, cnt, cnt / (double) total.get()));
            } // FOR
            m4.put(m4.lastKey(), m4.get(m4.lastKey()) + "\n"); // .concat("\n");
        } // FOR

        ListOrderedMap<String, Object> m5 = new ListOrderedMap<String, Object>();
        for (Entry<Procedure, ObjectHistogram<MarkovOptimization>> e : proc_penalties_h.entrySet()) {
            if (e.getValue().isEmpty() == false)
                m5.put(e.getKey().getName(), e.getValue());
        }

        System.err.println(StringUtil.formatMaps(m0, m1, m2) + StringUtil.DOUBLE_LINE + StringUtil.formatMaps(m3, m4) + StringUtil.DOUBLE_LINE + StringUtil.formatMaps(m5));
    }
}