MarkovConflictChecker.java example

Explorer
h-store-master
package edu.brown.hstore.specexec.checkers;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;

import org.apache.log4j.Logger;
import org.voltdb.CatalogContext;
import org.voltdb.ParameterSet;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.ConflictPair;
import org.voltdb.catalog.Procedure;
import org.voltdb.catalog.Statement;
import org.voltdb.catalog.StmtParameter;
import org.voltdb.catalog.Table;

import edu.brown.catalog.CatalogUtil;
import edu.brown.catalog.conflicts.ConflictSetUtil;
import edu.brown.catalog.special.CountedStatement;
import edu.brown.hstore.estimators.Estimate;
import edu.brown.hstore.estimators.EstimatorState;
import edu.brown.hstore.txns.AbstractTransaction;
import edu.brown.hstore.txns.LocalTransaction;
import edu.brown.logging.LoggerUtil;
import edu.brown.logging.LoggerUtil.LoggerBoolean;
import edu.brown.mappings.ParameterMapping;
import edu.brown.mappings.ParameterMappingsSet;
import edu.brown.mappings.ParametersUtil;
import edu.brown.markov.EstimationThresholds;
import edu.brown.utils.CollectionUtil;
import edu.brown.utils.PredicatePairs;
import edu.brown.utils.StringUtil;

/**
 * A more fine-grained ConflictChecker based on estimations of what 
 * rows the txns will read/write.
 * @author pavlo
 */
public class MarkovConflictChecker extends TableConflictChecker {
    private static final Logger LOG = Logger.getLogger(MarkovConflictChecker.class);
    private static final LoggerBoolean debug = new LoggerBoolean();
    private static final LoggerBoolean trace = new LoggerBoolean();
    static {
        LoggerUtil.attachObserver(LOG, debug, trace);
    }
    
    private final ParameterMappingsSet paramMappings;
    @SuppressWarnings("unused")
    private EstimationThresholds thresholds;

    // ----------------------------------------------------------------------------
    // PRE-COMPUTED CACHE
    // ----------------------------------------------------------------------------

    protected static class StatementCache {
        /**
         * We maintain a mapping to another Statement that it conflicts
         * with and its corresponding ConflictPair that represents that conflict
         */
        final Map<Statement, ConflictPair> conflicts = new HashMap<Statement, ConflictPair>();  

        /**
         * We maintain a list of the StmtParameters that are used in predicates with 
         * the target table's primary key
         */
        final Map<Column, StmtParameter> colParams = new IdentityHashMap<Column, StmtParameter>();
    } // CLASS
    
    /**
     * We have a separate cache object for each Statement
     */
    protected final Map<Statement, StatementCache> stmtCache = new HashMap<Statement, StatementCache>();
    
    /**
     * Table -> Primary Keys
     */
    protected final Map<Table, Column[]> pkeysCache = new HashMap<Table, Column[]>();
    
    // ----------------------------------------------------------------------------
    // INITIALIZATION
    // ----------------------------------------------------------------------------
    
    /**
     * Constructor
     * @param catalogContext
     * @param thresholds
     */
    protected MarkovConflictChecker(CatalogContext catalogContext, EstimationThresholds thresholds) {
        super(catalogContext);
        this.paramMappings = catalogContext.paramMappings;
        this.thresholds = thresholds;
        
        if (this.paramMappings == null) {
            LOG.warn(String.format("Disabling %s because the %s in the %s is null",
                     this.getClass().getSimpleName(),
                     ParameterMappingsSet.class.getSimpleName(),
                     catalogContext.getClass().getSimpleName()));
            this.disabled = true;
        }
        
        for (Table catalog_tbl : CatalogUtil.getDataTables(this.catalogContext.database)) {
            this.pkeysCache.put(catalog_tbl, CatalogUtil.getPrimaryKeyColumns(catalog_tbl).toArray(new Column[0]));
        } // FOR (table)
        
        for (Procedure proc : this.catalogContext.getRegularProcedures()) {
            Collection<ConflictPair> conflicts = ConflictSetUtil.getAllConflictPairs(proc);
            for (Statement stmt : proc.getStatements()) {
                StatementCache cache = new StatementCache();
                for (ConflictPair cp : conflicts) {
                    if (cp.getStatement0().equals(stmt)) {
                        cache.conflicts.put(cp.getStatement1(), cp);
                    }
                } // FOR
                
                PredicatePairs cset = CatalogUtil.extractStatementPredicates(stmt, false);
                Map<Table, StmtParameter[]> tableParams = new HashMap<Table, StmtParameter[]>();
                List<StmtParameter> stmtParamOffsets = new ArrayList<StmtParameter>();
                for (Table tbl : CatalogUtil.getReferencedTables(stmt)) {
                    Column pkeys[] = this.pkeysCache.get(tbl);
                    if (trace.val) LOG.trace(tbl + " => " + Arrays.toString(pkeys));
                    if (pkeys == null) {
                        LOG.warn("Unexpected null primary keys for " + tbl);
                        continue;
                    }
                    for (Column col : pkeys) {
                        Collection<StmtParameter> params = cset.findAllForOther(StmtParameter.class, col);
                        // If there are more than one, then it should always conflict
                        if (params.size() > 1) {
                            // TODO
                            LOG.warn(String.format("There are %d %s mapped to the primary key column %s. " +
                                     "Marking %s as always conflicting with %s",
                                     params.size(), StmtParameter.class.getSimpleName(), col.fullName(),
                                     tbl, stmt.fullName()));
                        }
                        else {
                            // If there are no references, then there is nothing else that we 
                            // we need to do here. The StmtParameter that we get back will be null
                            StmtParameter stmtParam = CollectionUtil.first(params);
                            cache.colParams.put(col, stmtParam);
                        }
                    } // FOR
                    tableParams.put(tbl, stmtParamOffsets.toArray(new StmtParameter[0]));
                } // FOR
                this.stmtCache.put(stmt, cache);
            } // FOR (stmt)
        } // FOR (proc)
    }

    @Override
    public void setEstimationThresholds(EstimationThresholds t) {
        this.thresholds = t;
    }
    
    @Override
    public boolean shouldIgnoreTransaction(AbstractTransaction ts) {
        if (this.disabled) return (true);

        // NOTE: We actually don't want to check whether we have a transaction estimate here
        // because this is a global flag. There may be times when the txn doesn't 
        // actually need an estimate (e.g., when we're stalled because of 2PC), so we 
        // just want to always return false here!
        
        // We're good to go!
        return (false);
    }
    
    @Override
    public boolean skipConflictAfter() {
        return (true);
    }
    
    @Override
    public boolean hasConflictBefore(AbstractTransaction dtxn, LocalTransaction candidate, int partitionId) {
        // If the TableConflictChecker says that there is no conflict, then we know that 
        // we don't need to check anything else.
        if (super.hasConflictBefore(dtxn, candidate, partitionId) == false) {
            if (debug.val)
                LOG.debug(String.format("No table-level conflicts between %s and %s. Safe to execute!",
                          dtxn, candidate));
            return (false);
        }
        
        // Get the estimates for both of the txns
        // If we don't have an estimate, then we have to say that there is a conflict. 
        EstimatorState dtxnState = dtxn.getEstimatorState();
        EstimatorState tsState = candidate.getEstimatorState();
        if (dtxnState == null) {
            if (debug.val)
                LOG.debug(String.format("No %s available for distributed txn %s",
                          EstimatorState.class.getSimpleName(), dtxn));
            return (true);
        }
        else if (tsState == null) {
            if (debug.val)
                LOG.debug(String.format("No %s available for candidate txn %s",
                          EstimatorState.class.getSimpleName(), candidate));
            return (true);
        }
        
        // Get the current TransactionEstimate for the DTXN and the 
        // initial TransactionEstimate for the single-partition txn
        // We need to make sure that both estimates have the list of 
        // queries that the transaction is going to execute
        Estimate dtxnEst = dtxnState.getLastEstimate();
        if (dtxnEst == null) {
            if (debug.val)
                LOG.warn(String.format("Unexpected null %s in the %s for %s",
                         Estimate.class.getSimpleName(), dtxnState.getClass().getSimpleName(), dtxn));
            return (true);
        }
        else if (dtxnEst.hasQueryEstimate(partitionId) == false) {
            if (debug.val)
                LOG.warn(String.format("No query list estimate is available for dtxn %s", dtxn));
            return (true);
        }
        Estimate tsEst = tsState.getInitialEstimate();
        assert(tsEst != null);
        if (tsEst.hasQueryEstimate(partitionId) == false) {
            if (debug.val)
                LOG.warn(String.format("No query list estimate is available for candidate %s", candidate));
            return (true);
        }
        
        // If both txns are read-only, then we can let our homeboy go
//        boolean readonly0 = dtxnEst.isReadOnlyPartition(this.thresholds, partitionId);
//        boolean readonly1 = tsEst.isReadOnlyPartition(this.thresholds, partitionId);
//        if (readonly0 && readonly1) {
//            if (debug.val)
//                LOG.debug(String.format("%s<->%s are both are read-only. No conflict!", dtxn, ts));
//            return (false);
//        }
        
        List<CountedStatement> queries0 = dtxnEst.getQueryEstimate(partitionId);
        List<CountedStatement> queries1 = tsEst.getQueryEstimate(partitionId);
        
        return (this.canExecute(dtxn, queries0, candidate, queries1) == false);
    }
    
    /**
     * Internal method that checks whether the txns conflict based on their list
     * queries that they're expected to execute in the future.
     * @param ts0
     * @param queries0
     * @param ts1
     * @param queries1
     * @return
     */
    protected boolean canExecute(AbstractTransaction ts0, List<CountedStatement> queries0,
                                 AbstractTransaction ts1, List<CountedStatement> queries1) {
        ParameterSet params0 = ts0.getProcedureParameters();
        ParameterSet params1 = ts1.getProcedureParameters();
        CountedStatement stmt0, stmt1;
        StatementCache cache0, cache1;
        Map<StmtParameter, SortedSet<ParameterMapping>> mappings0, mappings1;
        
        if (params0 == null) {
            LOG.error(String.format("The ParameterSet for %s is null.", ts0));
            return (false);
        }
        else if (params1 == null) {
            LOG.error(String.format("The ParameterSet for %s is null.", ts1));
            return (false);
        }
        
        if (debug.val)
            LOG.debug(String.format("Comparing expected query execution for dtxn %s [#queries=%d] " +
            		  "with candidate txn %s [#queries=%d]",
            		  ts0, queries0.size(), ts1, queries1.size()));
        
        // TODO: Rather than checking the values referenced in each ConflictPair
        // individually, we should go through all of them first and just get the 
        // ProcParameter + Offsets that actually matter. Then we can just check the
        // values one after each other.
        
        // Now with ConflictPairs, we need to get the minimum set of ProcParameters that are
        // used as input to the conflicting Statements and then check whether they have the same
        // value. If they do, then we cannot run the candidate txn.
        
        // (1) We only need to examine the READ-WRITE conflicts and WRITE->WRITE
        
        for (int i0 = 0, cnt0 = queries0.size(); i0 < cnt0; i0++) {
            stmt0 = queries0.get(i0);
            cache0 = this.stmtCache.get(stmt0.statement);
            mappings0 = this.catalogContext.paramMappings.get(stmt0.statement, stmt0.counter);
            if (mappings0 == null) {
                LOG.warn(String.format("The ParameterMappings for %s in dtxn %s is null?\n%s",
                         stmt0, ts0, StringUtil.join("\n", queries0)));
                return (false);
            }
            
            for (int i1 = 0, cnt1 = queries1.size(); i1 < cnt1; i1++) {
                stmt1 = queries1.get(i1);
                if (stmt0.statement.getReadonly() && stmt1.statement.getReadonly()) continue;
                
                ConflictPair cp = cache0.conflicts.get(stmt1.statement);
                // If there isn't a ConflictPair, then there isn't a conflict
                if (cp == null) {
                    continue;
                }
                // If the ConflictPair is marked as always conflicting, then
                // we can stop right here
                else if (cp.getAlwaysconflicting()) {
                    if (debug.val) LOG.debug(String.format("%s - Marked as always conflicting", cp.fullName()));
                    return (false);
                }
                
                // Otherwise, at this point we know that we have two queries that both 
                // reference the same table(s). Therefore, we need to evaluate the values 
                // of the primary keys referenced in the queries to see whether they conflict
                cache1 = this.stmtCache.get(stmt1.statement);
                mappings1 = this.catalogContext.paramMappings.get(stmt1.statement, stmt1.counter);
                if (mappings1 == null) {
                    LOG.warn(String.format("The ParameterMappings for %s in candidate %s is null?\n%s",
                             stmt1, ts1, StringUtil.join("\n", queries1)));
                    return (false);
                }
                
                boolean allEqual = true;
                for (Column col : cache0.colParams.keySet()) {
                    // If either StmtParameters are null, then that's a conflict!
                    StmtParameter param0 = cache0.colParams.get(col);
                    StmtParameter param1 = cache1.colParams.get(col);
                    
                    // It's ok if we're missing one of the parameters that we need if
                    // the values are still not the same. It's only a problem if they're 
                    // all the same because we have no idea know whether they're 
                    // actually the same or not
                    if (param0 == null || param1 == null) {
                        if (trace.val)
                            LOG.trace(String.format("%s - Missing StmtParameters for %s [param0=%s / param1=%s]",
                                      cp.fullName(), col.fullName(), param0, param1));
                        continue;
                    }
                    
                    // Similarly, if we don't have a ParameterMapping then there is nothing
                    // else we can do. Again, this is ok as long as at least one of the 
                    // pkey values are different.
                    ParameterMapping pm0 = CollectionUtil.first(mappings0.get(param0));
                    ParameterMapping pm1 = CollectionUtil.first(mappings1.get(param1));
                    if (pm0 == null) {
                        if (trace.val)
                            LOG.trace(String.format("%s - No ParameterMapping for %s",
                                      cp.fullName(), param0.fullName()));
                        continue;
                    }
                    else if (pm1 == null) {
                        if (trace.val)
                            LOG.trace(String.format("%s - No ParameterMapping for %s",
                                      cp.fullName(), param1.fullName()));
                        continue;
                    }
                    
                    // If the values are not equal, then we can stop checking the 
                    // other columns right away.
                    if (this.equalParameters(params0, pm0, params1, pm1) == false) {
                        if (trace.val)
                            LOG.trace(String.format("%s - Parameter values are equal for %s [param0=%s / param1=%s]",
                                      cp.fullName(), col.fullName(), param0, param1));
                        allEqual = false;
                        break;
                    }
                } // FOR (col)
                
                // If all the parameters are equal, than means they are likely to be
                // accessing the same row in the table. That's a conflict!
                if (allEqual) {
                    if (debug.val)
                        LOG.debug(String.format("%s - All known parameter values are equal", cp.fullName()));
                    return (false);
                }
            } // FOR (stmt1)
        } // FOR (stmt0)
        return (true);
    }
    
    protected boolean equalParameters(ParameterSet params0, ParameterMapping pm0,
                                      ParameterSet params1, ParameterMapping pm1) {
        Object val0 = ParametersUtil.getValue(params0, pm0);
        Object val1 = ParametersUtil.getValue(params1, pm1);
        if (val0 == null) {
            return (val1 != null);
        } else if (val1 == null) {
            return (false);
        }
        return (val0.equals(val1));
    }
    
    // ----------------------------------------------------------------------------
    // SINGLETON
    // ----------------------------------------------------------------------------
    
    private static MarkovConflictChecker SINGLETON;
    public static MarkovConflictChecker singleton(CatalogContext catalogContext, EstimationThresholds t) {
        if (SINGLETON == null) {
            synchronized (MarkovConflictChecker.class) {
                if (SINGLETON == null) {
                    SINGLETON = new MarkovConflictChecker(catalogContext, t);
                }
            } // SYNCH
        }
        return (SINGLETON);
    }
    
    
}