package edu.brown.hstore.estimators.markov; import java.io.File; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.log4j.Logger; import org.voltdb.VoltProcedure; import org.voltdb.benchmark.tpcc.procedures.neworder; import org.voltdb.catalog.ProcParameter; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Statement; import org.voltdb.types.ExpressionType; import edu.brown.BaseTestCase; import edu.brown.catalog.CatalogUtil; import edu.brown.hstore.estimators.EstimatorUtil; import edu.brown.mappings.ParameterMappingsSet; import edu.brown.markov.MarkovGraph; import edu.brown.markov.MarkovVertex; import edu.brown.markov.containers.MarkovGraphsContainerUtil; import edu.brown.markov.containers.MarkovGraphsContainer; import edu.brown.statistics.Histogram; import edu.brown.statistics.ObjectHistogram; import edu.brown.utils.CollectionUtil; import edu.brown.utils.MathUtil; import edu.brown.utils.PartitionSet; import edu.brown.utils.ProjectType; import edu.brown.utils.StringUtil; import edu.brown.workload.QueryTrace; import edu.brown.workload.TransactionTrace; import edu.brown.workload.Workload; import edu.brown.workload.filters.BasePartitionTxnFilter; import edu.brown.workload.filters.Filter; import edu.brown.workload.filters.NoAbortFilter; import edu.brown.workload.filters.ProcParameterArraySizeFilter; import edu.brown.workload.filters.ProcedureLimitFilter; import edu.brown.workload.filters.ProcedureNameFilter; /** * @author pavlo */ public class TestMarkovPathEstimator extends BaseTestCase { private static final int WORKLOAD_XACT_LIMIT = 100; private static final int BASE_PARTITION = 1; private static final int NUM_PARTITIONS = 10; private static final Class<? extends VoltProcedure> TARGET_PROCEDURE = neworder.class; private static Workload workload; private static MarkovGraphsContainer markovs; private static ParameterMappingsSet mappings; private static TransactionTrace singlep_trace; private static TransactionTrace multip_trace; private static final PartitionSet multip_partitions = new PartitionSet(); private static final List<MarkovVertex> multip_path = new ArrayList<MarkovVertex>(); private MarkovPathEstimator pathEstimator; private MarkovEstimate estimate; private Procedure catalog_proc; private MarkovGraph graph; public void setUp() throws Exception { super.setUp(ProjectType.TPCC); this.addPartitions(NUM_PARTITIONS); this.catalog_proc = this.getProcedure(TARGET_PROCEDURE); if (isFirstSetup()) { File file = this.getParameterMappingsFile(ProjectType.TPCC); mappings = new ParameterMappingsSet(); mappings.load(file, catalogContext.database); // Workload Filter: // (1) Only include TARGET_PROCEDURE traces // (2) Only include traces with 10 orderline items // (3) Only include traces that execute on the BASE_PARTITION // (4) Limit the total number of traces to WORKLOAD_XACT_LIMIT List<ProcParameter> array_params = CatalogUtil.getArrayProcParameters(this.catalog_proc); Filter filter = new ProcedureNameFilter(false) .include(TARGET_PROCEDURE.getSimpleName()) .attach(new NoAbortFilter()) .attach(new ProcParameterArraySizeFilter(array_params.get(0), 10, ExpressionType.COMPARE_EQUAL)) .attach(new BasePartitionTxnFilter(p_estimator, BASE_PARTITION)) .attach(new ProcedureLimitFilter(WORKLOAD_XACT_LIMIT)); file = this.getWorkloadFile(ProjectType.TPCC); workload = new Workload(catalogContext.catalog); ((Workload) workload).load(file, catalogContext.database, filter); // for (TransactionTrace xact : workload.getTransactions()) { // System.err.println(xact.debug(catalogContext.database)); // System.err.println(StringUtil.repeat("+", 100)); // } // Generate MarkovGraphs markovs = MarkovGraphsContainerUtil.createBasePartitionMarkovGraphsContainer(catalogContext, workload, p_estimator); assertNotNull(markovs); // Find a single-partition and multi-partition trace multip_partitions.add(BASE_PARTITION); for (TransactionTrace xact : workload.getTransactions()) { Object ol_supply_w_ids[] = (Object[])xact.getParam(5); assert(ol_supply_w_ids != null); boolean same_partition = true; for (Object i : ol_supply_w_ids) { Integer partition = p_estimator.getHasher().hash(Integer.valueOf(i.toString())); same_partition = same_partition && (partition == BASE_PARTITION); if (same_partition == false && multip_trace == null) { multip_partitions.add(partition); } } // FOR if (same_partition && singlep_trace == null) singlep_trace = xact; if (same_partition == false && multip_trace == null) { multip_trace = xact; multip_path.addAll(markovs.get(BASE_PARTITION, this.catalog_proc).processTransaction(xact, p_estimator)); } if (singlep_trace != null && multip_trace != null) break; } // FOR } assertNotNull(singlep_trace); assertNotNull(multip_trace); assert(multip_partitions.size() > 1); assertFalse(multip_path.isEmpty()); // Setup this.pathEstimator = new MarkovPathEstimator(catalogContext, p_estimator); this.graph = markovs.get(BASE_PARTITION, this.catalog_proc); assertNotNull("No graph exists for " + this.catalog_proc + " on Partition #" + BASE_PARTITION, this.graph); this.estimate = new MarkovEstimate(catalogContext); this.estimate.init(this.graph.getStartVertex(), 0); } /** * testAutoLearning */ public void testAutoLearning() throws Exception { Logger LOG = Logger.getRootLogger(); // Use a blank MarkovGraph and check to see whether the MarkovPathEstimator // can automatically learn what the states and transitions graph = new MarkovGraph(this.catalog_proc); graph.initialize(); pathEstimator.setLearningEnabled(true); pathEstimator.setForceTraversal(true); boolean first = true; boolean found_autolearn = false; for (TransactionTrace tt : workload) { MarkovVertex last_v = graph.getStartVertex(); // We have to inject at least one path through the system first // so that it knows what Statements it should be considering if (first) { assertEquals(estimate.toString(), 0, estimate.getMarkovPath().size()); Histogram<Statement> stmtCounter = new ObjectHistogram<Statement>(); PartitionSet allPartitions = new PartitionSet(); for (QueryTrace qt : tt.getQueries()) { Statement stmt = qt.getCatalogItem(catalogContext.database); int stmtCnt = (int)stmtCounter.get(stmt, 0); PartitionSet partitions = new PartitionSet(); p_estimator.getAllPartitions(partitions, qt, BASE_PARTITION); MarkovVertex next_v = new MarkovVertex(stmt, MarkovVertex.Type.QUERY, stmtCnt, partitions, allPartitions); graph.addVertex(next_v); graph.addToEdge(last_v, next_v); stmtCounter.put(stmt); allPartitions.addAll(partitions); last_v = next_v; } // FOR // Don't forget to connect the last vertex with the COMMIT // and then we can calculate the edge probabilities. assert(last_v != null); graph.addToEdge(last_v, graph.getCommitVertex()); graph.calculateProbabilities(catalogContext.getAllPartitionIds()); } // Then after we do that, we should always be able to get a path // even if the states aren't there. We just need to make sure that // we always have a complete path and at least one of the traces // caused new vertices to be created. else { // LOG.info(StringUtil.repeat("=", 150)); // LOG.info(StringUtil.repeat("=", 150)); // LOG.info(StringUtil.repeat("=", 150)); estimate = new MarkovEstimate(catalogContext); estimate.init(last_v, EstimatorUtil.INITIAL_ESTIMATE_BATCH); pathEstimator.init(graph, estimate, tt.getParams(), BASE_PARTITION); pathEstimator.traverse(last_v); List<MarkovVertex> path = estimate.getMarkovPath(); // System.err.println(StringUtil.join("\n", path)); assertEquals(graph.getStartVertex(), CollectionUtil.first(path)); // If the estimator created new vertices, then that means that we couldn't figure // our complete path. This is because once we create the new vertex we won't // know what the next vertex should be after that because the new vertex // won't have any children. Collection<MarkovVertex> createdVertices = pathEstimator.getCreatedVertices(); if (createdVertices != null) { found_autolearn = true; LOG.info("Automatically created new vertices:\n" + StringUtil.join("\n", createdVertices)); } else { assertEquals(graph.getCommitVertex(), CollectionUtil.last(path)); } pathEstimator.finish(); } // if (first == false) break; first = false; } // FOR assertTrue("No txn needed the estimator to create new vertices", found_autolearn); } /** * testFinish */ public void testFinish() throws Exception { pathEstimator.init(this.graph, this.estimate, singlep_trace.getParams(), BASE_PARTITION); assertTrue(pathEstimator.isInitialized()); pathEstimator.setForceTraversal(true); assertEquals(1.0f, estimate.getConfidenceCoefficient(), MarkovGraph.PROBABILITY_EPSILON); pathEstimator.traverse(this.graph.getStartVertex()); assertTrue(pathEstimator.isInitialized()); pathEstimator.finish(); assertFalse(pathEstimator.isInitialized()); } /** * testMarkovEstimate */ public void testMarkovEstimate() throws Exception { pathEstimator.init(this.graph, this.estimate, singlep_trace.getParams(), BASE_PARTITION); assert(pathEstimator.isInitialized()); pathEstimator.setForceTraversal(true); pathEstimator.traverse(this.graph.getStartVertex()); List<MarkovVertex> visitPath = pathEstimator.getVisitPath(); // System.err.println(StringUtil.columns(StringUtil.join("\n", visitPath), this.estimate.toString())); assertFalse(singlep_trace.isAborted()); assertFalse(visitPath.contains(this.graph.getAbortVertex())); // System.err.println(singlep_trace.debug(catalogContext.database)); // System.err.println("Base Partition = " + p_estimator.getBasePartition(singlep_trace)); // for (QueryTrace qtrace : singlep_trace.getQueries()) { // System.err.println(qtrace.debug(catalogContext.database) + " => " + p_estimator.getAllPartitions(qtrace, BASE_PARTITION)); // } for (int p : catalogContext.getAllPartitionIdArray()) { // assertTrue(estimate.toString(), estimate.isReadOnlyProbabilitySet(p)); assertTrue(estimate.toString(), estimate.isWriteProbabilitySet(p)); assertTrue(estimate.toString(), estimate.isDoneProbabilitySet(p)); if (estimate.getDoneProbability(p) < 0.9f) { assert(estimate.getTouchedCounter(p) > 0) : String.format("TOUCHED[%d]: %d", p, estimate.getTouchedCounter(p)); assert(MathUtil.greaterThan(estimate.getWriteProbability(p), 0.0f, 0.01f)) : String.format("WRITE[%d]: %f", p, estimate.getWriteProbability(p)); } else if (MathUtil.equals(estimate.getDoneProbability(p), 0.01f, 0.03f)) { assertEquals(0, estimate.getTouchedCounter(p)); assertEquals(0.0f, estimate.getWriteProbability(p), MarkovGraph.PROBABILITY_EPSILON); } } // FOR assert(estimate.isAbortProbabilitySet()); // assert(estimate.isSinglePartitionProbabilitySet()); // assert(estimate.getSinglePartitionProbability() < 1.0f); assertTrue(estimate.toString(), estimate.isConfidenceCoefficientSet()); assert(estimate.getConfidenceCoefficient() >= 0f); assert(estimate.getConfidenceCoefficient() <= 1f); } /** * testSinglePartition */ public void testSinglePartition() throws Exception { MarkovVertex start = this.graph.getStartVertex(); MarkovVertex commit = this.graph.getCommitVertex(); MarkovVertex abort = this.graph.getAbortVertex(); pathEstimator.init(this.graph, this.estimate, singlep_trace.getParams(), BASE_PARTITION); pathEstimator.setForceTraversal(true); pathEstimator.traverse(this.graph.getStartVertex()); assertTrue(estimate.isConfidenceCoefficientSet()); float confidence = this.estimate.getConfidenceCoefficient(); // System.err.println("INITIAL PATH:\n" + StringUtil.join("\n", path)); // System.err.println("CONFIDENCE: " + confidence); // System.err.println("DUMPED FILE: " + MarkovUtil.exportGraphviz(this.graph, false, this.graph.getPath(path)).writeToTempFile()); // System.err.println(singlep_trace.debug(catalogContext.database)); // System.err.println(StringUtil.columns(StringUtil.join("\n", path), this.estimate.toString())); ArrayList<MarkovVertex> path = new ArrayList<MarkovVertex>(this.estimate.getMarkovPath()); assertEquals(path, new ArrayList<MarkovVertex>(pathEstimator.getVisitPath())); assertEquals(start, CollectionUtil.first(path)); assertEquals(commit, CollectionUtil.last(path)); assertFalse(path.contains(abort)); assert(confidence > 0.0f); // All of the vertices should only have the base partition in their partition set for (int i = 1, cnt = path.size() - 1; i < cnt; i++) { MarkovVertex v = path.get(i); assertEquals(1, v.getPartitions().size()); assert(v.getPartitions().contains(BASE_PARTITION)); } // FOR // GraphvizExport<Vertex, Edge> gv = MarkovUtil.exportGraphviz(this.graph, true, this.graph.getPath(path)); // FileUtil.writeStringToFile("/tmp/dump.dot", gv.export(this.graph.getProcedure().getName())); } /** * testMultiPartition */ public void testMultiPartition() throws Exception { // System.err.println("MULTI-PARTITION: " + multip_trace); MarkovVertex start = this.graph.getStartVertex(); MarkovVertex commit = this.graph.getCommitVertex(); MarkovVertex abort = this.graph.getAbortVertex(); pathEstimator.init(this.graph, this.estimate, multip_trace.getParams(), BASE_PARTITION); pathEstimator.setForceTraversal(true); pathEstimator.traverse(this.graph.getStartVertex()); // System.err.println("INITIAL PATH:\n" + StringUtil.join("\n", path)); // System.err.println("CONFIDENCE: " + confidence); // System.err.println("DUMPED FILE: " + MarkovUtil.exportGraphviz(this.graph, false, this.graph.getPath(path)).writeToTempFile()); // System.err.println(multip_trace.debug(catalogContext.database)); // System.err.println(StringUtil.columns(StringUtil.join("\n", path), this.estimate.toString())); ArrayList<MarkovVertex> path = new ArrayList<MarkovVertex>(this.estimate.getMarkovPath()); assertEquals(path, new ArrayList<MarkovVertex>(pathEstimator.getVisitPath())); assertEquals(start, CollectionUtil.first(path)); assertEquals(commit, CollectionUtil.last(path)); assertFalse(path.contains(abort)); // All of the vertices should only have the base partition in their partition set PartitionSet touched_partitions = new PartitionSet(); for (MarkovVertex v : path) { touched_partitions.addAll(v.getPartitions()); } // FOR // System.err.println("Expected Partitions: " + multip_partitions); // System.err.println("Touched Partitions: " + touched_partitions); // System.err.println("MULTI-PARTITION PATH: " + path); // this.writeGraphviz(multip_path); // this.writeGraphviz(path); assertEquals(multip_partitions, touched_partitions); } }