package edu.brown.hstore.estimators.markov; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.collections15.keyvalue.MultiKey; import org.apache.log4j.Logger; import org.voltdb.CatalogContext; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Statement; import org.voltdb.exceptions.ServerFaultException; import org.voltdb.utils.EstTime; import org.voltdb.utils.Pair; import edu.brown.catalog.CatalogKey; import edu.brown.graphs.GraphvizExport; import edu.brown.hstore.Hstoreservice.Status; import edu.brown.hstore.estimators.EstimatorState; import edu.brown.hstore.estimators.TransactionEstimator; import edu.brown.hstore.txns.TransactionUtil; import edu.brown.interfaces.DebugContext; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.markov.MarkovEdge; import edu.brown.markov.MarkovGraph; import edu.brown.markov.MarkovGraphTimes; import edu.brown.markov.MarkovUtil; import edu.brown.markov.MarkovVertex; import edu.brown.markov.containers.MarkovGraphsContainer; import edu.brown.pools.TypedObjectPool; import edu.brown.pools.TypedPoolableObjectFactory; import edu.brown.profilers.MarkovEstimatorProfiler; import edu.brown.profilers.ProfileMeasurement; import edu.brown.utils.CollectionUtil; import edu.brown.utils.ParameterMangler; import edu.brown.utils.PartitionEstimator; import edu.brown.utils.PartitionSet; import edu.brown.utils.StringUtil; import edu.brown.workload.QueryTrace; import edu.brown.workload.TransactionTrace; /** * Markov Model-based Transaction Estimator * @author pavlo */ public class MarkovEstimator extends TransactionEstimator { private static final Logger LOG = Logger.getLogger(MarkovEstimator.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } // ---------------------------------------------------------------------------- // STATIC DATA MEMBERS // ---------------------------------------------------------------------------- /** * The amount of change in visitation of vertices we would tolerate before we need * to recompute the graph. * TODO (pavlo): Saurya says: Should this be in MarkovGraph? */ private static final double RECOMPUTE_TOLERANCE = (double) 0.5; // ---------------------------------------------------------------------------- // DATA MEMBERS // ---------------------------------------------------------------------------- private final CatalogContext catalogContext; private final MarkovGraphsContainer markovs; private final MarkovGraphTimes markovTimes = new MarkovGraphTimes(); private final TypedObjectPool<MarkovPathEstimator> pathEstimatorsPool; private final TypedObjectPool<MarkovEstimatorState> statesPool; /** * We can maintain a cache of the last successful MarkovPathEstimator per MarkovGraph */ private final Map<MarkovGraph, List<MarkovVertex>> cached_paths = new HashMap<MarkovGraph, List<MarkovVertex>>(); /** * For a given vertex, maintain a map to possible future vertices */ private final Map<MarkovVertex, ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>>> cache_batchEnd; private transient boolean enable_recomputes = false; /** * If we're using the TransactionEstimator, then we need to convert all * primitive array ProcParameters into object arrays... * ProcedureId -> ParameterMangler */ private final ParameterMangler manglers[]; private final MarkovEstimatorProfiler profiler; // ---------------------------------------------------------------------------- // CONSTRUCTORS // ---------------------------------------------------------------------------- /** * Constructor * @param p_estimator * @param mappings * @param markovs */ public MarkovEstimator(CatalogContext catalogContext, PartitionEstimator p_estimator, MarkovGraphsContainer markovs) { super(p_estimator); this.catalogContext = catalogContext; this.markovs = markovs; this.cache_batchEnd = new HashMap<MarkovVertex, ConcurrentHashMap<MultiKey<String>,Pair<MarkovEdge,MarkovVertex>>>(); if (this.markovs != null && this.markovs.getHasher() == null) this.markovs.setHasher(this.hasher); // Create all of our parameter manglers this.manglers = new ParameterMangler[this.catalogContext.procedures.size() + 1]; for (Procedure catalog_proc : this.catalogContext.procedures) { if (catalog_proc.getSystemproc()) continue; this.manglers[catalog_proc.getId()] = ParameterMangler.singleton(catalog_proc); } // FOR if (debug.val) LOG.debug("Creating MarkovPathEstimator Object Pool"); TypedPoolableObjectFactory<MarkovPathEstimator> m_factory = new MarkovPathEstimator.Factory(this.catalogContext, this.p_estimator); this.pathEstimatorsPool = new TypedObjectPool<MarkovPathEstimator>(m_factory, hstore_conf.site.pool_pathestimators_idle); if (debug.val) LOG.debug("Creating MarkovEstimatorState Object Pool"); TypedPoolableObjectFactory<MarkovEstimatorState> s_factory = new MarkovEstimatorState.Factory(this.catalogContext); int num_idle = (int)(hstore_conf.site.network_incoming_limit_txns * hstore_conf.site.pool_scale_factor); this.statesPool = new TypedObjectPool<MarkovEstimatorState>(s_factory, num_idle); if (hstore_conf.site.markov_profiling) { this.profiler = new MarkovEstimatorProfiler(); } else { this.profiler = null; } } // ---------------------------------------------------------------------------- // DATA MEMBER METHODS // ---------------------------------------------------------------------------- public void enableGraphRecomputes() { this.enable_recomputes = true; } public MarkovGraphsContainer getMarkovGraphsContainer() { return (this.markovs); } public MarkovGraphTimes getMarkovGraphTimes() { return (this.markovTimes); } // ---------------------------------------------------------------------------- // RUNTIME METHODS // ---------------------------------------------------------------------------- @SuppressWarnings("unchecked") @Override public MarkovEstimatorState startTransactionImpl(Long txn_id, int base_partition, Procedure catalog_proc, Object[] args) { long timestamp = -1l; if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); ParameterMangler mangler = this.manglers[catalog_proc.getId()]; if (mangler != null) args = mangler.convert(args); assert (catalog_proc != null); long start_time = EstTime.currentTimeMillis(); if (debug.val) LOG.debug(String.format("%s - Starting transaction estimation [partition=%d]", TransactionUtil.formatTxnName(catalog_proc, txn_id), base_partition)); // If we don't have a graph for this procedure, we should probably just return null // This will be the case for all sysprocs if (this.markovs == null) return (null); MarkovGraph markov = this.markovs.getFromParams(txn_id, base_partition, args, catalog_proc); if (markov == null) { if (debug.val) LOG.debug(String.format("%s - No MarkovGraph is available for transaction", TransactionUtil.formatTxnName(catalog_proc, txn_id))); if (this.profiler != null) this.profiler.start_time.appendTime(timestamp); return (null); } if (trace.val) LOG.trace(String.format("%s - Creating new MarkovEstimatorState", TransactionUtil.formatTxnName(catalog_proc, txn_id))); MarkovEstimatorState state = null; try { state = (MarkovEstimatorState)statesPool.borrowObject(); assert(state.isInitialized() == false); state.init(txn_id, base_partition, markov, args, start_time); } catch (Throwable ex) { throw new RuntimeException(ex); } assert(state.isInitialized()) : "Unexpectted uninitialized MarkovEstimatorState\n" + state; MarkovVertex start = markov.getStartVertex(); assert(start != null) : "The start vertex is null. This should never happen!"; MarkovEstimate initialEst = state.createNextEstimate(start, true); this.estimatePath(state, initialEst, catalog_proc, args); if (debug.val) { String txnName = TransactionUtil.formatTxnName(catalog_proc, txn_id); LOG.debug(String.format("%s - Initial MarkovEstimate\n%s", txnName, initialEst)); List<MarkovVertex> path = initialEst.getMarkovPath(); if (path.isEmpty()) { LOG.debug(String.format("%s - Initial empty path for txn is empty because the graph is new", txnName)); } else { LOG.trace(String.format("%s - Estimated Path [length=%d]\n%s", txnName, path.size(), StringUtil.join("\n----------------------\n", path))); } } // Update EstimatorState.prefetch any time we transition to a MarkovVertex where the // underlying Statement catalog object was marked as prefetchable // Do we want to put this traversal above? if (hstore_conf.site.exec_prefetch_queries) { for (MarkovVertex vertex : initialEst.getMarkovPath()) { Statement statement = (Statement) vertex.getCatalogItem(); if (statement.getPrefetchable()) { if (debug.val) LOG.debug(String.format("%s - Checking whether we can prefetch %s on partitions %s", TransactionUtil.formatTxnName(catalog_proc, txn_id), statement.fullName(), vertex.getPartitions())); if (vertex.getPartitions().isEmpty() == false && vertex.getPartitions().get() != base_partition) { state.addPrefetchableStatement(vertex.getCountedStatement()); } } } // FOR } // We want to add the estimate to the state down here after we have initialized // everything. This prevents other threads from accessing it before we have // initialized it properly. state.addInitialEstimate(initialEst); if (this.profiler != null) this.profiler.start_time.appendTime(timestamp); return (state); } @SuppressWarnings("unchecked") @Override public MarkovEstimate executeQueries(EstimatorState s, Statement catalog_stmts[], PartitionSet partitions[]) { long timestamp = -1l; if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); MarkovEstimatorState state = (MarkovEstimatorState)s; if (debug.val) LOG.debug(String.format("Processing %d queries for txn #%d", catalog_stmts.length, state.getTransactionId())); int batch_size = catalog_stmts.length; // If we get here, then we should definitely have a MarkovGraph MarkovGraph markov = state.getMarkovGraph(); assert(markov != null); MarkovVertex current = state.getCurrent(); PartitionSet touchedPartitions = state.getTouchedPartitions(); MarkovVertex next_v = null; MarkovEdge next_e = null; Statement last_stmt = null; int stmt_idxs[] = null; // We can cache what the path is based on the first and last query in the batch // We only want to do this for batches that are large enough. if (hstore_conf.site.markov_endpoint_caching && batch_size >= hstore_conf.site.markov_batch_caching_min) { assert(current != null); if (debug.val) LOG.debug("Attempting cache look-up for last statement in batch: " + Arrays.toString(catalog_stmts)); state.cache_last_partitions.clear(); state.cache_past_partitions.clear(); PartitionSet last_partitions; stmt_idxs = new int[batch_size]; for (int i = 0; i < batch_size; i++) { last_stmt = catalog_stmts[i]; last_partitions = partitions[batch_size - 1]; stmt_idxs[i] = state.updateQueryInstanceCount(last_stmt); if (i+1 != batch_size) { state.cache_past_partitions.addAll(last_partitions); } else { state.cache_last_partitions.addAll(last_partitions); } } // FOR Pair<MarkovEdge, MarkovVertex> pair = this.getCachedBatchEnd(current, last_stmt, stmt_idxs[batch_size-1], state.cache_last_partitions, state.cache_past_partitions); if (pair != null) { next_e = pair.getFirst(); assert(next_e != null); next_v = pair.getSecond(); assert(next_v != null); if (debug.val) LOG.debug(String.format("Got cached batch end for %s: %s -> %s", markov, current, next_v)); // Update the counters and other info for the next vertex and edge if (this.enable_recomputes) { this.markovTimes.addInstanceTime(next_v, state.getTransactionId(), state.getExecutionTimeOffset()); } // Update the state information state.setCurrent(next_v, next_e); touchedPartitions.addAll(state.cache_last_partitions); touchedPartitions.addAll(state.cache_past_partitions); } } // Roll through the Statements in this batch and move the current vertex // for the txn's State handle along the path in the MarkovGraph if (next_v == null) { for (int i = 0; i < batch_size; i++) { int queryCount = (stmt_idxs != null ? stmt_idxs[i] : -1); this.consume(state, markov, catalog_stmts[i], partitions[i], queryCount); if (stmt_idxs == null) touchedPartitions.addAll(partitions[i]); } // FOR // Update our cache if we tried and failed before if (hstore_conf.site.markov_endpoint_caching && stmt_idxs != null) { if (debug.val) LOG.debug(String.format("Updating cache batch end for %s: %s -> %s", markov, current, state.getCurrent())); this.addCachedBatchEnd(current, CollectionUtil.last(state.actual_path_edges), state.getCurrent(), last_stmt, stmt_idxs[batch_size-1], state.cache_past_partitions, state.cache_last_partitions); } } // 2012-10-17: This is kind of funky because we have to populate the // probabilities for the MarkovEstimate here, whereas for the initial estimate // we did it inside of the MarkovPathEstimator MarkovEstimate estimate = state.createNextEstimate(state.getCurrent(), false); assert(estimate != null); Procedure catalog_proc = markov.getProcedure(); Object procArgs[] = state.getProcedureParameters(); this.estimatePath(state, estimate, catalog_proc, procArgs); if (debug.val) LOG.debug(String.format("Next MarkovEstimate for txn #%d\n%s", state.getTransactionId(), estimate.toString())); assert(estimate.isInitialized()) : String.format("Unexpected uninitialized MarkovEstimate for txn #%d\n%s", state.getTransactionId(), estimate); assert(estimate.isValid()) : String.format("Invalid MarkovEstimate for txn #%d\n%s", state.getTransactionId(), estimate); // Once the workload shifts we detect it and trigger this method. Recomputes // the graph with the data we collected with the current workload method. if (this.enable_recomputes && markov.shouldRecompute(this.txn_count.get(), RECOMPUTE_TOLERANCE)) { markov.calculateProbabilities(catalogContext.getAllPartitionIds()); } // We want to add the estimate to the state down here after we have initialized // everything. This prevents other threads from accessing it before we have // initialized it properly. state.addEstimate(estimate); if (this.profiler != null) this.profiler.update_time.appendTime(timestamp); return (estimate); } @Override protected void completeTransaction(EstimatorState s, Status status) { long timestamp = -1l; if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); MarkovEstimatorState state = (MarkovEstimatorState)s; // The transaction for the given txn_id is in limbo, so we just want to remove it if (status == Status.ABORT_MISPREDICT) { state.getMarkovGraph().incrementMispredictionCount(); if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp); return; } Long txn_id = state.getTransactionId(); long end_time = EstTime.currentTimeMillis(); MarkovGraph markov = state.getMarkovGraph(); if (debug.val) LOG.debug(String.format("Cleaning up state info for txn #%d [status=%s]", txn_id, status)); // If there were no updates while the transaction was running, then // we don't want to try to update the model, because we will end up // connecting the START vertex to the COMMIT vertex, which is not correct if (state.isUpdatesEnabled()) { // We need to update the counter information in our MarkovGraph so that we know // that the procedure may transition to the ABORT vertex from where ever it was before MarkovVertex current = state.getCurrent(); assert(current != null) : String.format("Missing current vertex for %s\n%s", TransactionUtil.formatTxnName(markov.getProcedure(), txn_id), state); // If we don't have the terminal vertex, then we know that we don't care about // what this transaction actually did MarkovVertex next_v = markov.getFinishVertex(status); if (next_v == null) { if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp); return; } // If no edge exists to the next vertex, then we need to create one MarkovEdge next_e = null; synchronized (next_v) { next_e = markov.addToEdge(current, next_v); } // SYNCH state.setCurrent(next_v, next_e); // For post-txn processing... // Update counters // We want to update the counters for the entire path right here so that // nobody gets incomplete numbers if they recompute probabilities for (MarkovVertex v : state.actual_path) v.incrementInstanceHits(); for (MarkovEdge e : state.actual_path_edges) e.incrementInstanceHits(); if (this.enable_recomputes) { this.markovTimes.addInstanceTime(next_v, txn_id, state.getExecutionTimeOffset(end_time)); } } // Cache the path for the MarkovGraph if the path was correct for the txn if (hstore_conf.site.markov_path_caching && this.cached_paths.containsKey(markov) == false && state.getInitialEstimate().isValid()) { MarkovEstimate initialEst = s.getInitialEstimate(); synchronized (this.cached_paths) { if (this.cached_paths.containsKey(markov) == false) { if (debug.val) LOG.debug(String.format("Storing cached path through %s[#%d] that was used by txn #%d", markov, markov.getGraphId(), txn_id)); this.cached_paths.put(markov, initialEst.getMarkovPath()); } } // SYNCH } else if (trace.val && hstore_conf.site.markov_path_caching) { LOG.trace(String.format("Not caching path through %s[#%d] used by txn #%d [alreadyCached=%s / isValid=%s]", markov, markov.getGraphId(), txn_id, this.cached_paths.containsKey(markov), state.getInitialEstimate().isValid())); } if (this.profiler != null) this.profiler.finish_time.appendTime(timestamp); return; } @Override public void destroyEstimatorState(EstimatorState s) { this.statesPool.returnObject((MarkovEstimatorState)s); } // ---------------------------------------------------------------------------- // INTERNAL ESTIMATION METHODS // ---------------------------------------------------------------------------- /** * Estimate the execution path of the txn based on its current vertex in the graph * The estimate will be stored in the given MarkovEstimate. * Note that the path could be empty. * @param state The txn's TransactionEstimator state * @param est The current TransactionEstimate for the txn * @param catalog_proc The Procedure being executed by the txn * @param args Procedure arguments (mangled) */ private void estimatePath(MarkovEstimatorState state, MarkovEstimate est, Procedure catalog_proc, Object args[]) { long timestamp = -1l; assert(state.isInitialized()) : state.hashCode(); assert(est.isInitialized()) : state.hashCode(); if (debug.val) LOG.debug(String.format("%s - Estimating execution path (%s)", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), (est.isInitialEstimate() ? "INITIAL" : "BATCH #" + est.getBatchId()))); MarkovVertex currentVertex = est.getVertex(); assert(currentVertex != null); if (this.enable_recomputes) { this.markovTimes.addInstanceTime(currentVertex, state.getTransactionId(), EstTime.currentTimeMillis()); } // TODO: If the current vertex is in the initial estimate's list, // then we can just use the truncated list as the estimate, since we know // that the path will be the same. We don't need to recalculate everything MarkovGraph markov = state.getMarkovGraph(); assert(markov != null) : String.format("Unexpected null MarkovGraph for %s [hashCode=%d]\n%s", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), state.hashCode(), state); boolean compute_path = true; if (hstore_conf.site.markov_fast_path && currentVertex.isStartVertex() == false) { List<MarkovVertex> initialPath = ((MarkovEstimate)state.getInitialEstimate()).getMarkovPath(); if (initialPath.contains(currentVertex)) { if (debug.val) LOG.debug(String.format("%s - Using fast path estimation for %s[#%d]", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId())); if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); try { MarkovPathEstimator.fastEstimation(est, initialPath, currentVertex); compute_path = false; } finally { if (this.profiler != null) this.profiler.fastest_time.appendTime(timestamp); } } } // We'll reuse the last MarkovPathEstimator (and it's path) if the graph has been accurate for // other previous transactions. This prevents us from having to recompute the path every single time, // especially for single-partition transactions where the clustered MarkovGraphs are accurate else if (hstore_conf.site.markov_path_caching) { List<MarkovVertex> cached = this.cached_paths.get(markov); if (cached == null) { if (debug.val) LOG.debug(String.format("%s - No cached path available for %s[#%d]", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId())); } else if (markov.getAccuracyRatio() < hstore_conf.site.markov_path_caching_threshold) { if (debug.val) LOG.debug(String.format("%s - MarkovGraph %s[#%d] accuracy is below caching threshold [%.02f < %.02f]", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId(), markov.getAccuracyRatio(), hstore_conf.site.markov_path_caching_threshold)); } else { if (debug.val) LOG.debug(String.format("%s - Using cached path for %s[#%d]", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId())); if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); try { MarkovPathEstimator.fastEstimation(est, cached, currentVertex); compute_path = false; } finally { if (this.profiler != null) this.profiler.cachedest_time.appendTime(timestamp); } } } // Use the MarkovPathEstimator to estimate a new path for this txn if (compute_path) { if (debug.val) LOG.debug(String.format("%s - Need to compute new path in %s[#%d] using %s", TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()), markov, markov.getGraphId(), MarkovPathEstimator.class.getSimpleName())); MarkovPathEstimator pathEstimator = null; try { pathEstimator = (MarkovPathEstimator)this.pathEstimatorsPool.borrowObject(); pathEstimator.init(state.getMarkovGraph(), est, args, state.getBasePartition()); pathEstimator.setForceTraversal(hstore_conf.site.markov_force_traversal); pathEstimator.setLearningEnabled(hstore_conf.site.markov_learning_enable); } catch (Throwable ex) { String txnName = TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()); String msg = "Failed to intitialize new MarkovPathEstimator for " + txnName; LOG.error(msg, ex); throw new RuntimeException(msg, ex); } if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); try { pathEstimator.traverse(est.getVertex()); } catch (Throwable ex) { try { GraphvizExport<MarkovVertex, MarkovEdge> gv = MarkovUtil.exportGraphviz(markov, true, markov.getPath(pathEstimator.getVisitPath())); LOG.error("GRAPH #" + markov.getGraphId() + " DUMP: " + gv.writeToTempFile(catalog_proc)); } catch (Exception ex2) { throw new RuntimeException(ex2); } String msg = "Failed to estimate path for " + TransactionUtil.formatTxnName(catalog_proc, state.getTransactionId()); LOG.error(msg, ex); throw new RuntimeException(msg, ex); } finally { if (this.profiler != null) this.profiler.fullest_time.appendTime(timestamp); } // If our path was incomplete or we created new vertices during the traversal, // then we should tell the PartitionExecutor that we need updates about this // txn so that we can populate the MarkovGraph if (hstore_conf.site.markov_learning_enable && est.isInitialEstimate()) { Collection<MarkovVertex> createdVertices = pathEstimator.getCreatedVertices(); MarkovVertex v = CollectionUtil.last(est.getMarkovPath()); if ((createdVertices != null && createdVertices.isEmpty() == false) || (v.isQueryVertex() == true || v.isStartVertex())) { if (debug.val) LOG.debug(String.format("Enabling runtime updates for %s " + "[createdVertices=%s, lastVertex=%s]", state.getTransactionId(), createdVertices, v)); state.shouldAllowUpdates(true); } } this.pathEstimatorsPool.returnObject(pathEstimator); } } /** * Figure out the next vertex that the txn will transition to for the give Statement catalog object * and the partitions that it will touch when it is executed. If no vertex exists, we will create * it and dynamically add it to our MarkovGraph * @param txn_id * @param state * @param catalog_stmt * @param partitions */ private MarkovVertex consume(MarkovEstimatorState state, MarkovGraph markov, Statement catalog_stmt, PartitionSet partitions, int queryCounter) { long timestamp = -1l; if (this.profiler != null) timestamp = ProfileMeasurement.getTime(); // Update the number of times that we have executed this query in the txn if (queryCounter < 0) queryCounter = state.updateQueryInstanceCount(catalog_stmt); assert(markov != null); // Examine all of the vertices that are adjacent to our current vertex // and see which vertex we are going to move to next PartitionSet touchedPartitions = state.getTouchedPartitions(); MarkovVertex current = state.getCurrent(); assert(current != null); MarkovVertex next_v = null; MarkovEdge next_e = null; // Synchronize on the single vertex so that it's more fine-grained than the entire graph synchronized (current) { Collection<MarkovEdge> edges = markov.getOutEdges(current); if (edges != null) { if (debug.val) LOG.debug(String.format("Examining %d edges from %s for txn #%d", edges.size(), current, state.getTransactionId())); for (MarkovEdge e : edges) { MarkovVertex v = markov.getDest(e); if (v.isEqual(catalog_stmt, partitions, touchedPartitions, queryCounter)) { if (debug.val) LOG.debug("Found next vertex " + v + " for Txn #" + state.getTransactionId()); next_v = v; next_e = e; break; } } // FOR } // If we fail to find the next vertex, that means we have to dynamically create a new // one. The graph is self-managed, so we don't need to worry about whether // we need to recompute probabilities. if (next_v == null) { next_v = new MarkovVertex(catalog_stmt, MarkovVertex.Type.QUERY, queryCounter, partitions, touchedPartitions); assert(markov.containsVertex(current)) : String.format("%s does not have current vertex %s for %s", markov, current, TransactionUtil.formatTxnName(markov.getProcedure(), state.getTransactionId())); markov.addVertex(next_v); next_e = markov.addToEdge(current, next_v); if (debug.val) LOG.debug(String.format("Created new edge from %s to new vertex %s for txn #%d", state.getCurrent(), next_v, state.getTransactionId())); // assert(state.getCurrent().getPartitions().size() <= touchedPartitions.size()); } } // SYNCH if (current.isStartVertex() && next_v.isCommitVertex()) { throw new ServerFaultException("Trying to connect START->COMMIT", state.getTransactionId()); } // Update the counters and other info for the next vertex and edge if (this.enable_recomputes) { this.markovTimes.addInstanceTime(next_v, state.getTransactionId(), state.getExecutionTimeOffset()); } // Update the state information state.setCurrent(next_v, next_e); if (debug.val) LOG.debug("Updated State Information for txn #" + state.getTransactionId() + (trace.val ? "\n" + state : "")); if (this.profiler != null) this.profiler.consume_time.appendTime(timestamp); return (next_v); } // ---------------------------------------------------------------------------- // HELPER METHODS // ---------------------------------------------------------------------------- public MarkovEstimatorState processTransactionTrace(TransactionTrace txn_trace) throws Exception { Long txn_id = txn_trace.getTransactionId(); if (debug.val) { LOG.debug("Processing TransactionTrace #" + txn_id); if (trace.val) LOG.trace(txn_trace.debug(this.catalogContext.database)); } MarkovEstimatorState s = (MarkovEstimatorState)this.startTransaction(txn_id, txn_trace.getCatalogItem(this.catalogContext.database), txn_trace.getParams()); assert(s != null) : "Null EstimatorState for txn #" + txn_id; for (Entry<Integer, List<QueryTrace>> e : txn_trace.getBatches().entrySet()) { int batch_size = e.getValue().size(); if (trace.val) LOG.trace(String.format("Batch #%d: %d traces", e.getKey(), batch_size)); // Generate the data structures we will need to give to the TransactionEstimator Statement catalog_stmts[] = new Statement[batch_size]; PartitionSet partitions[] = new PartitionSet[batch_size]; this.populateQueryBatch(e.getValue(), s.getBasePartition(), catalog_stmts, partitions); synchronized (s.getMarkovGraph()) { this.executeQueries(s, catalog_stmts, partitions); } // SYNCH } // FOR (batches) if (txn_trace.isAborted()) { this.abort(s, Status.ABORT_USER); } else { this.commit(s); } assert(s.getEstimateCount() == txn_trace.getBatchCount()) : String.format("EstimateCount[%d] != BatchCount[%d]", s.getEstimateCount(), txn_trace.getBatchCount()); assert(s.actual_path.size() == (txn_trace.getQueryCount() + 2)) : String.format("Path[%d] != QueryCount[%d]", s.actual_path.size(), txn_trace.getQueryCount()); return (s); } private boolean populateQueryBatch(List<QueryTrace> queries, int base_partition, Statement catalog_stmts[], PartitionSet partitions[]) throws Exception { int i = 0; boolean readOnly = true; for (QueryTrace query_trace : queries) { assert(query_trace != null); catalog_stmts[i] = query_trace.getCatalogItem(catalogContext.database); partitions[i] = new PartitionSet(); this.p_estimator.getAllPartitions(partitions[i], query_trace, base_partition); assert(partitions[i].isEmpty() == false) : "No partitions for " + query_trace; readOnly = readOnly && catalog_stmts[i].getReadonly(); i++; } // FOR return (readOnly); } protected Pair<MarkovEdge, MarkovVertex> getCachedBatchEnd(MarkovVertex start, Statement catalog_stmt, int idx, PartitionSet partitions, PartitionSet past_partitions) { Map<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>> m = this.cache_batchEnd.get(start); Pair<MarkovEdge, MarkovVertex> found = null; if (m != null) { MultiKey<String> cache_key = new MultiKey<String>(CatalogKey.createKey(catalog_stmt), Integer.toString(idx), partitions.toString(), past_partitions.toString()); found = m.get(cache_key); } return (found); } protected void addCachedBatchEnd(MarkovVertex start, MarkovEdge e, MarkovVertex v, Statement catalog_stmt, int idx, PartitionSet partitions, PartitionSet past_partitions) { ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>> m = cache_batchEnd.get(start); if (m == null) { synchronized (this.cache_batchEnd) { m = this.cache_batchEnd.get(start); if (m == null) { m = new ConcurrentHashMap<MultiKey<String>, Pair<MarkovEdge, MarkovVertex>>(); this.cache_batchEnd.put(start, m); } } // SYNCH } MultiKey<String> cache_key = new MultiKey<String>(CatalogKey.createKey(catalog_stmt), Integer.toString(idx), partitions.toString(), past_partitions.toString()); m.putIfAbsent(cache_key, Pair.of(e, v)); } // ---------------------------------------------------------------------------- // DEBUG METHODS // ---------------------------------------------------------------------------- public class Debug implements DebugContext { public TypedObjectPool<MarkovPathEstimator> getPathEstimatorsPool() { return (pathEstimatorsPool); } public MarkovEstimatorProfiler getProfiler() { return (profiler); } } // CLASS private MarkovEstimator.Debug cachedDebugContext; public MarkovEstimator.Debug getDebugContext() { if (cachedDebugContext == null) { cachedDebugContext = new MarkovEstimator.Debug(); } return cachedDebugContext; } }