package edu.brown.mappings; import java.io.File; import java.text.ParseException; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.collections15.map.ListOrderedMap; import org.apache.log4j.Logger; import org.voltdb.VoltType; import org.voltdb.catalog.CatalogType; import org.voltdb.catalog.Database; import org.voltdb.catalog.ProcParameter; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Statement; import org.voltdb.catalog.StmtParameter; import org.voltdb.types.TimestampType; import org.voltdb.utils.Pair; import org.voltdb.utils.VoltTypeUtil; import edu.brown.catalog.CatalogUtil; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.utils.ArgumentsParser; import edu.brown.utils.StringUtil; import edu.brown.workload.AbstractTraceElement; import edu.brown.workload.QueryTrace; import edu.brown.workload.TransactionTrace; public class MappingCalculator { private static final Logger LOG = Logger.getLogger(MappingCalculator.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } public static final int QUERY_INSTANCE_MAX = 100; /** * Represents the correlation calculations for a single stored procedure */ protected class ProcedureMappings { private final Procedure catalog_proc; private final Map<Statement, AtomicInteger> query_counters = new TreeMap<Statement, AtomicInteger>(); private final Map<Statement, Map<Integer, QueryInstance>> query_instances = new TreeMap<Statement, Map<Integer,QueryInstance>>(); private final AtomicInteger xact_counter = new AtomicInteger(0); private transient boolean started = false; /** * Constructor * @param catalog_proc */ public ProcedureMappings(Procedure catalog_proc) { super(); this.catalog_proc = catalog_proc; for (Statement catalog_stmt : this.catalog_proc.getStatements()) { this.query_counters.put(catalog_stmt, new AtomicInteger(0)); this.query_instances.put(catalog_stmt, new HashMap<Integer, QueryInstance>()); } // FOR } /** * Marking the start of a new transaction * All query instance counters are reset to zero */ public void start() { assert(!this.started); if (trace.val) LOG.trace("Starting new transaction for " + this.catalog_proc + " and reseting query instance counters"); this.started = true; for (Statement catalog_stmt : this.query_counters.keySet()) { this.query_counters.get(catalog_stmt).set(0); } // FOR this.xact_counter.getAndIncrement(); } /** * Marking the end of a transaction (just for sanity) */ public void finish() { assert(this.started); if (trace.val) LOG.trace("Finished transaction for " + this.catalog_proc); this.started = false; } /** * Calculate the correlations for all underlying counters */ public void calculate() { if (trace.val) LOG.trace("Calculating correlation coefficient for " + this.query_instances.size() + " query instances in " + this.catalog_proc); for (Entry<Statement, Map<Integer, QueryInstance>> e : this.query_instances.entrySet()) { if (trace.val) LOG.trace(CatalogUtil.getDisplayName(e.getKey()) + ": " + e.getValue().size() + " query instances"); for (QueryInstance query_instance : e.getValue().values()) { query_instance.calculate(); } } // FOR if (trace.val) LOG.trace("Completed Calculations for " + this.catalog_proc); } /** * For the current transaction, return the QueryInstance object for this Statement object * @param catalog_stmt * @return */ public QueryInstance getQueryInstance(Statement catalog_stmt) { assert(this.started) : "Must call start() before grabbing query instances"; // Don't allow the counter to go beyond the limit. // We should probably do something more intelligent like merging... int current = this.query_counters.get(catalog_stmt).getAndIncrement(); if (current > QUERY_INSTANCE_MAX) { this.query_counters.get(catalog_stmt).set(QUERY_INSTANCE_MAX); current = QUERY_INSTANCE_MAX; } Map<Integer, QueryInstance> instances = this.query_instances.get(catalog_stmt); QueryInstance ret = instances.get(current); if (ret == null) { ret = new QueryInstance(catalog_stmt, current); instances.put(current, ret); if (trace.val) LOG.trace("Created new QueryInstance for record " + ret); } assert(ret != null); return (ret); } /** * Returns a mapping from QueryInstance objects to another map of StmtParameters to Correlation objects * for those ProcParameters that have a correlation greater than the given threshold. * The set of Correlation objects are sorted in descending order by their coefficients. * @param threshold * @return */ public ParameterMappingsSet getCorrelations(double threshold) { if (trace.val) LOG.trace("Extracting correlations above " + threshold + " for " + this.catalog_proc); ParameterMappingsSet results = new ParameterMappingsSet(); for (Entry<Statement, Map<Integer, QueryInstance>> e : this.query_instances.entrySet()) { if (trace.val) LOG.trace(String.format("%s: %d query instance", CatalogUtil.getDisplayName(e.getKey()), e.getValue().size())); for (QueryInstance query_instance : e.getValue().values()) { results.addAll(query_instance.getParameterMappingsSet(threshold)); } // FOR } // FOR return (results); } @Override public String toString() { String ret = this.catalog_proc.getName() + " Correlations:\n"; String format = "%-22s %d\n"; ret += String.format(format, "# of Statements:", this.query_counters.size()); ret += String.format(format, "# of Transactions:", this.xact_counter.get()); ret += "Query Instance Counters:\n"; for (Statement catalog_stmt : this.query_instances.keySet()) { ret += StringUtil.SPACER + String.format(format, catalog_stmt.getName() + ":", this.query_instances.get(catalog_stmt).size()); } // FOR ret += StringUtil.DOUBLE_LINE + StringUtil.DOUBLE_LINE; for (Statement catalog_stmt : this.query_instances.keySet()) { if (this.query_instances.get(catalog_stmt).isEmpty()) continue; ret += this.debug(catalog_stmt); ret += StringUtil.DOUBLE_LINE; } // FOR ret += StringUtil.DOUBLE_LINE + StringUtil.DOUBLE_LINE; return (ret); } public String debug(Statement catalog_stmt) { String ret = catalog_stmt.getName() + "\n"; String inner_spacer = "|" + StringUtil.SPACER; for (QueryInstance query_instance : this.query_instances.get(catalog_stmt).values()) { ret += query_instance.toString(inner_spacer); } return (ret); } } // END CLASS /** * Represents an instance of a query being executed in a transaction * The index value indicates the number of times this query was executed before in the same transaction */ protected class QueryInstance extends Pair<Statement, Integer> { private final Map<StmtParameter, Map<ProcParameter, ProcParameterCorrelation>> correlations = new ListOrderedMap<StmtParameter, Map<ProcParameter,ProcParameterCorrelation>>(); public QueryInstance(Statement catalog_stmt, Integer index) { super(catalog_stmt, index); Procedure catalog_proc = (Procedure)catalog_stmt.getParent(); for (StmtParameter catalog_stmt_param : CatalogUtil.getSortedCatalogItems(catalog_stmt.getParameters(), "index")) { Map<ProcParameter, ProcParameterCorrelation> c = new ListOrderedMap<ProcParameter, ProcParameterCorrelation>(); for (ProcParameter catalog_proc_param : CatalogUtil.getSortedCatalogItems(catalog_proc.getParameters(), "index")) { c.put(catalog_proc_param, new ProcParameterCorrelation(catalog_proc_param)); } // FOR this.correlations.put(catalog_stmt_param, c); } // FOR } public ProcParameterCorrelation getProcParameterCorrelation(StmtParameter catalog_stmt_param, ProcParameter catalog_proc_param) { return (this.correlations.get(catalog_stmt_param).get(catalog_proc_param)); } public void calculate() { if (trace.val) LOG.trace("Calculating correlation coefficients for " + this.correlations.size() + " StmtParameters in " + this.getFirst()); for (Entry<StmtParameter, Map<ProcParameter, ProcParameterCorrelation>> e : this.correlations.entrySet()) { if (trace.val) LOG.trace(CatalogUtil.getDisplayName(e.getKey()) + ": " + e.getValue().size() + " ProcParameterMappings"); for (ProcParameterCorrelation ppc : e.getValue().values()) { ppc.calculate(); } // FOR } // FOR } /** * Returns a mapping from StmtParameters to ProcParameters that have a correlation greater * than the given threshold. The set of Correlation objects are sorted in descending order * by their coefficients. * @param threshold * @return */ public ParameterMappingsSet getParameterMappingsSet(double threshold) { ParameterMappingsSet results = new ParameterMappingsSet(); if (trace.val) LOG.trace(String.format("Extracting correlations for %d StmtParameters in %s", this.correlations.size(), this.getFirst().fullName())); for (Entry<StmtParameter, Map<ProcParameter, ProcParameterCorrelation>> e : this.correlations.entrySet()) { if (trace.val) LOG.trace(String.format("%s: %d %s", CatalogUtil.getDisplayName(e.getKey()), e.getValue().size(), ProcParameterCorrelation.class.getSimpleName())); // Loop through all of the ProcParameter correlation objects and create new // Correlation objects for any results that we get back from each of them for (ProcParameterCorrelation ppc : e.getValue().values()) { for (Pair<Integer, Double> pair : ppc.getMappings(threshold)) { ParameterMapping c = new ParameterMapping( this.getFirst(), this.getSecond(), e.getKey(), ppc.getProcParameter(), pair.getFirst(), pair.getSecond() ); results.add(c); if (trace.val) LOG.trace("New Correlation: " + c); } // FOR (results) } } // FOR return (results); } @Override public String toString() { return (this.getClass().getSimpleName() + "[" + this.getFirst().getName() + "::#" + this.getSecond() + "]"); } public String toString(String spacer) { String ret = spacer + "+ " + this.toString() + " - [# of Parameters=" + this.correlations.size() + "]\n"; String inner_spacer = spacer + "|" + StringUtil.SPACER; String inner_inner_spacer = inner_spacer + "|" + StringUtil.SPACER; for (StmtParameter catalog_stmt_param : this.correlations.keySet()) { ret += inner_spacer + "+ StmtParameter[Index=" + catalog_stmt_param.getIndex() + "]\n"; for (ProcParameter catalog_proc_param : this.correlations.get(catalog_stmt_param).keySet()) { ret += this.correlations.get(catalog_stmt_param).get(catalog_proc_param).toString(inner_inner_spacer); } // FOR // ret += spacer + StringUtil.SINGLE_LINE; } // FOR return (ret); } } // END CLASS /** * For the given ProcParameter, this class maintains a AbstractCorrelation calculation for a QueryInstance * Provides a wrapper to handle array values */ protected class ProcParameterCorrelation extends TreeMap<Integer, AbstractMapping> { private static final long serialVersionUID = 1L; private final ProcParameter catalog_proc_param; private final boolean is_array; public ProcParameterCorrelation(ProcParameter catalog_param) { super(); this.catalog_proc_param = catalog_param; this.is_array = catalog_param.getIsarray(); } public AbstractMapping getAbstractCorrelation() { return (this.get(0)); } public AbstractMapping getAbstractCorrelation(int index) { assert(index == 0 || (this.is_array && index > 0)); AbstractMapping p = this.get(index); if (p == null) { p = new RatioMapping(); this.put(index, p); } return (p); } public ProcParameter getProcParameter() { return this.catalog_proc_param; } public boolean getIsArray() { return (this.is_array); } public void calculate() { if (trace.val) LOG.trace("Calculating correlation coefficients for " + this.size() + " ProcParameters instances"); for (AbstractMapping p : this.values()) { p.calculate(); } // FOR if (trace.val) LOG.trace(this.toString()); } /** * Returns all of the ProcParameters that have a correlation greater than the given threshold * @param threshold * @return */ public Set<Pair<Integer, Double>> getMappings(double threshold) { Set<Pair<Integer, Double>> ret = new HashSet<Pair<Integer,Double>>(); for (Integer index : this.keySet()) { AbstractMapping p = this.get(index); Double result = p.calculate(); if (result != null && result >= threshold) { ret.add(Pair.of(index, result)); } } // FOR return (ret); } @Override public String toString() { return this.toString(""); } public String toString(String spacer) { StringBuilder sb = new StringBuilder(); sb.append(spacer) .append(this.getClass().getSimpleName()) .append(" [") .append("Index=" + this.catalog_proc_param.getIndex() + ", ") .append("# of Entries=" + this.size()) .append("]"); return (sb.toString()); } public String debug() { String spacer = ""; StringBuilder sb = new StringBuilder(this.toString()); sb.append("\n"); for (Integer index : this.keySet()) { sb.append(spacer).append(StringUtil.SPACER) .append("[" + index + "] " + this.get(index) + "\n"); } // FOR return (sb.toString()); } } // END CLASS private final Database catalog_db; private final Map<Procedure, ProcedureMappings> mappings = new HashMap<Procedure, ProcedureMappings>(); /** * Constructor * @param catalog_db */ public MappingCalculator(Database catalog_db) { this.catalog_db = catalog_db; for (Procedure catalog_proc : this.catalog_db.getProcedures()) { if (catalog_proc.getSystemproc()) continue; this.mappings.put(catalog_proc, new ProcedureMappings(catalog_proc)); } // FOR } public ProcedureMappings getProcedureCorrelations(Procedure catalog_proc) { return (this.mappings.get(catalog_proc)); } /** * Recursively invoke the calculate method for all underlying ProcedureCorrelation objects */ public void calculate() { if (debug.val) LOG.debug(String.format("Calculating correlations for %d %s", this.mappings.size(), ProcedureMappings.class.getSimpleName())); for (ProcedureMappings pm : this.mappings.values()) { if (pm.catalog_proc.getSystemproc()) continue; pm.calculate(); } // FOR if (debug.val) LOG.debug(String.format("Completed calculations for %s", ProcedureMappings.class.getSimpleName())); } /** * Process all transaction records in a WorkloadIterator * @param it * @throws Exception */ public void process(Iterator<AbstractTraceElement<? extends CatalogType>> it) throws Exception { long xact_ctr = 0; while (it.hasNext()) { AbstractTraceElement<? extends CatalogType> element = it.next(); if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (xact_ctr++ % 100 == 0) LOG.info("Processing xact #" + xact_ctr); this.processTransaction(xact); } } // WHILE } /** * Process a single transaction trace * @param xact_trace * @throws Exception */ public void processTransaction(TransactionTrace xact_trace) throws Exception { if (trace.val) LOG.trace("Processing correlations for " + xact_trace); Procedure catalog_proc = xact_trace.getCatalogItem(this.catalog_db); assert(catalog_proc != null); ProcedureMappings correlation = this.mappings.get(catalog_proc); correlation.start(); // Cast all the ProcParameters once in the beginning Number xact_params[][] = new Number[xact_trace.getParams().length][]; for (int i = 0; i < xact_params.length; i++) { ProcParameter catalog_proc_param = catalog_proc.getParameters().get("index", i); assert(catalog_proc_param != null); VoltType proc_param_type = VoltType.get(catalog_proc_param.getType()); try { // Arrays if (catalog_proc_param.getIsarray()) { Object param_arr[] = xact_trace.getParam(i); xact_params[i] = new Number[param_arr.length]; for (int ii = 0; ii < param_arr.length; ii++) { xact_params[i][ii] = this.getParamAsNumber(proc_param_type, param_arr[ii]); } // FOR // Scalars (just store in the first element of the array } else { xact_params[i] = new Number[] { this.getParamAsNumber(proc_param_type, xact_trace.getParam(i)) }; } } catch (Exception ex) { LOG.error("Failed to process " + CatalogUtil.getDisplayName(catalog_proc_param)); throw ex; } } // FOR // Now run through all of the queries and calculate the correlation between StmtParameters and ProcParameters for (QueryTrace query_trace : xact_trace.getQueries()) { Statement catalog_stmt = query_trace.getCatalogItem(this.catalog_db); QueryInstance query_instance = correlation.getQueryInstance(catalog_stmt); Object query_params[] = query_trace.getParams(); // For each of the StmtParameter, update the correlation information for each of the ProcParameters for (int i = 0; i < query_params.length; i++) { StmtParameter catalog_stmt_param = catalog_stmt.getParameters().get(i); assert(catalog_stmt_param != null); VoltType stmt_param_type = VoltType.get(catalog_stmt_param.getJavatype()); assert(stmt_param_type != VoltType.INVALID); Number stmt_param_val = this.getParamAsNumber(stmt_param_type, query_params[i]); for (int ii = 0; ii < xact_params.length; ii++) { ProcParameter catalog_proc_param = catalog_proc.getParameters().get(ii); assert(catalog_proc_param != null) : "Missing ProcParameter in " + catalog_proc + " at index " + ii; VoltType proc_param_type = VoltType.get(catalog_proc_param.getType()); assert(proc_param_type != VoltType.INVALID); ProcParameterCorrelation ppc = query_instance.getProcParameterCorrelation(catalog_stmt_param, catalog_proc_param); for (int iii = 0; iii < xact_params[ii].length; iii++) { ppc.getAbstractCorrelation(iii).addOccurrence(stmt_param_val, xact_params[ii][iii]); } // FOR } // FOR (xact_params) } // FOR (query_params) } // FOR (query_trace) correlation.finish(); } /** * Get a ParameterMappings object * @param threshold * @return */ public ParameterMappingsSet getParameterMappings(double threshold) { ParameterMappingsSet ret = new ParameterMappingsSet(); LOG.debug("Extracting ParameterMappings above threshold " + threshold + " [# of correlations=" + this.mappings.size() + "]"); for (ProcedureMappings pc : this.mappings.values()) { if (pc.catalog_proc.getSystemproc()) continue; ret.addAll(pc.getCorrelations(threshold)); } // FOR return (ret); } /** * Helper function to cast the raw Object of a parameter into the proper Number * This will convert Dates to longs representing their time in milliseconds * @param type * @param raw_value * @return * @throws ParseException */ protected Number getParamAsNumber(VoltType type, Object raw_value) throws ParseException { if (raw_value == null) return (null); assert(type != VoltType.INVALID); Number ret = null; switch (type) { case TIMESTAMP: { Object param_obj = VoltTypeUtil.getObjectFromString(type, raw_value.toString()); ret = ((TimestampType)param_obj).getTime(); break; } case STRING: ret = raw_value.hashCode(); break; case BOOLEAN: ret = ((Boolean)raw_value ? 1 : 0); break; default: { Object param_obj = VoltTypeUtil.getObjectFromString(type, raw_value.toString()); ret = (Number)param_obj; break; } } // SWITCH return (ret); } /** * Tester * @param args */ public static void main(String[] vargs) throws Exception { ArgumentsParser args = ArgumentsParser.load(vargs); args.require( ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_MAPPINGS_OUTPUT ); LOG.info("Starting " + MappingCalculator.class.getSimpleName()); if (debug.val) LOG.debug("Workload Procedures Distribution:\n" + args.workload.getProcedureHistogram()); MappingCalculator cc = new MappingCalculator(args.catalog_db); int ctr = 0; for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { try { cc.processTransaction((TransactionTrace)element); } catch (Exception ex) { throw new Exception("Failed to process " + element, ex); } ctr++; } } // FOR LOG.info("Finished processing " + ctr + " TransactionTraces. Now calculating correlation coeffcients..."); cc.calculate(); // System.err.println("Dumping out correlations..."); // // for (Procedure catalog_proc : args.catalog_db.getProcedures()) { // if (!catalog_proc.getName().equals("neworder")) continue; // System.err.println(cc.getProcedureCorrelations(catalog_proc)); // } // FOR double threshold = 1.0d; if (args.hasDoubleParam(ArgumentsParser.PARAM_MAPPINGS_THRESHOLD)) { threshold = args.getDoubleParam(ArgumentsParser.PARAM_MAPPINGS_THRESHOLD); } ParameterMappingsSet pc = cc.getParameterMappings(threshold); File output_path = args.getFileParam(ArgumentsParser.PARAM_MAPPINGS_OUTPUT); assert(!pc.isEmpty()); if (debug.val) LOG.debug("DEBUG DUMP:\n" + pc.debug()); pc.save(output_path); LOG.info(String.format("Wrote %s to '%s'", pc.getClass().getSimpleName(), output_path)); } }