package edu.brown.costmodel; import java.io.File; import java.lang.reflect.Field; import java.util.BitSet; import java.util.HashMap; import java.util.Map; import java.util.Random; import org.voltdb.catalog.CatalogType; import org.voltdb.catalog.Procedure; import edu.brown.BaseTestCase; import edu.brown.benchmark.tm1.procedures.DeleteCallForwarding; import edu.brown.benchmark.tm1.procedures.GetAccessData; import edu.brown.benchmark.tm1.procedures.GetSubscriberData; import edu.brown.benchmark.tm1.procedures.UpdateLocation; import edu.brown.hstore.HStoreConstants; import edu.brown.rand.RandomDistribution; import edu.brown.statistics.Histogram; import edu.brown.statistics.ObjectHistogram; import edu.brown.statistics.WorkloadStatistics; import edu.brown.utils.CollectionUtil; import edu.brown.utils.ProjectType; import edu.brown.workload.AbstractTraceElement; import edu.brown.workload.TransactionTrace; import edu.brown.workload.Workload; import edu.brown.workload.filters.Filter; import edu.brown.workload.filters.ProcedureLimitFilter; import edu.brown.workload.filters.ProcedureNameFilter; public class TestTimeIntervalCostModel extends BaseTestCase { private static final long WORKLOAD_XACT_LIMIT = 1000; private static final String MULTIPARTITION_PROCEDURES[] = { DeleteCallForwarding.class.getSimpleName(), UpdateLocation.class.getSimpleName(), }; private static final String SINGLEPARTITION_PROCEDURES[] = { GetAccessData.class.getSimpleName(), GetSubscriberData.class.getSimpleName() }; private static final int NUM_PARTITIONS = 5; private static final int NUM_INTERVALS = 10; // Reading the workload takes a long time, so we only want to do it once private static Workload multip_workload; private static Workload singlep_workload; private static WorkloadStatistics stats; private TimeIntervalCostModel<SingleSitedCostModel> cost_model; private final Random rand = new Random(); @Override protected void setUp() throws Exception { super.setUp(ProjectType.TM1); this.addPartitions(NUM_PARTITIONS); // Super hack! Walk back the directories and find out workload directory if (multip_workload == null) { File f = this.getWorkloadFile(ProjectType.TM1); // All Multi-Partition Txn Workload ProcedureNameFilter multi_filter = new ProcedureNameFilter(false); multi_filter.include(MULTIPARTITION_PROCEDURES); multi_filter.attach(new ProcedureLimitFilter(WORKLOAD_XACT_LIMIT)); multip_workload = new Workload(catalog); ((Workload)multip_workload).load(f, catalog_db, multi_filter); assert(multip_workload.getTransactionCount() > 0); // All Single-Partition Txn Workload ProcedureNameFilter single_filter = new ProcedureNameFilter(false); single_filter.include(SINGLEPARTITION_PROCEDURES); single_filter.attach(new ProcedureLimitFilter(WORKLOAD_XACT_LIMIT)); singlep_workload = new Workload(catalog); ((Workload)singlep_workload).load(f, catalog_db, single_filter); assert(singlep_workload.getTransactionCount() > 0); // Workload Statistics f = this.getStatsFile(ProjectType.TM1); stats = new WorkloadStatistics(catalog_db); stats.load(f, catalog_db); } assertNotNull(multip_workload); assertNotNull(singlep_workload); assertNotNull(stats); this.cost_model = new TimeIntervalCostModel<SingleSitedCostModel>(catalogContext, SingleSitedCostModel.class, NUM_INTERVALS); } /** * testWeightedTxnEstimation */ public void testWeightedTxnEstimation() throws Exception { int num_txns = 20; int num_intervals = 5; // Make a workload that has the same transaction in it multiple times Workload new_workload = new Workload(catalog); TransactionTrace multip_txn = CollectionUtil.first(multip_workload); Procedure catalog_proc = multip_txn.getCatalogItem(catalog_db); for (int i = 0; i < num_txns; i++) { TransactionTrace clone = (TransactionTrace)multip_txn.clone(); clone.setTransactionId(i); clone.setTimestamps(new Long((i/5)*1000), new Long((i/5)*1000 + 100)); // System.err.println(clone.debug(catalog_db) + "\n"); new_workload.addTransaction(catalog_proc, clone); } // FOR assertEquals(num_txns, new_workload.getTransactionCount()); TimeIntervalCostModel<SingleSitedCostModel> orig_costModel = new TimeIntervalCostModel<SingleSitedCostModel>(catalogContext, SingleSitedCostModel.class, num_intervals); double cost0 = orig_costModel.estimateWorkloadCost(catalogContext, new_workload); // Now change make a new workload that has the same multi-partition transaction // but this time it only has one but with a transaction weight // We should get back the exact same cost // System.err.println("+++++++++++++++++++++++++++++++++++++++++++++"); new_workload = new Workload(catalog); for (int i = 0; i < num_txns/5; i++) { TransactionTrace clone = (TransactionTrace)multip_txn.clone(); clone.setTransactionId(i); clone.setTimestamps(new Long((i*5)*1000), new Long((i*5)*1000 + 100)); clone.setWeight(5); // System.err.println(clone.debug(catalog_db) + "\n"); new_workload.addTransaction(catalog_proc, clone); } // FOR TimeIntervalCostModel<SingleSitedCostModel> new_costModel = new TimeIntervalCostModel<SingleSitedCostModel>(catalogContext, SingleSitedCostModel.class, num_intervals); double cost1 = new_costModel.estimateWorkloadCost(catalogContext, new_workload); assert(cost1 > 0); assertEquals(cost0, cost1, 0.001); // Now make sure the histograms match up Map<Field, Histogram<?>> orig_histograms = TestSingleSitedCostModel.getHistograms(orig_costModel); assertFalse(orig_histograms.isEmpty()); Map<Field, Histogram<?>> new_histograms = TestSingleSitedCostModel.getHistograms(new_costModel); assertFalse(new_histograms.isEmpty()); for (Field f : orig_histograms.keySet()) { Histogram<?> orig_h = orig_histograms.get(f); assertNotNull(orig_h); Histogram<?> new_h = new_histograms.get(f); assert(orig_h != new_h); assertNotNull(new_h); assertEquals(orig_h, new_h); } // FOR } /** * testEstimateCost */ public void testEstimateCost() throws Exception { // For now just check whether we get the same cost back for the same // workload... seems simple enough double cost0 = cost_model.estimateWorkloadCost(catalogContext, multip_workload); double cost1 = cost_model.estimateWorkloadCost(catalogContext, multip_workload); assertEquals(cost0, cost1); // Then make a new object and make sure that returns the same as well AbstractCostModel new_costmodel = new TimeIntervalCostModel<SingleSitedCostModel>(catalogContext, SingleSitedCostModel.class, NUM_INTERVALS); cost1 = new_costmodel.estimateWorkloadCost(catalogContext, multip_workload); assertEquals(cost0, cost1); } /** * testIntervals */ public void testIntervals() throws Exception { this.cost_model.estimateWorkloadCost(catalogContext, multip_workload); for (int i = 0; i < NUM_INTERVALS; i++) { SingleSitedCostModel sub_model = this.cost_model.getCostModel(i); assertNotNull(sub_model); // Check Partition Access Histogram Histogram<Integer> hist_access = sub_model.getQueryPartitionAccessHistogram(); assertNotNull(hist_access); assertEquals(NUM_PARTITIONS, hist_access.getValueCount()); // Check Java Execution Histogram Histogram<Integer> hist_execute = sub_model.getJavaExecutionHistogram(); assertNotNull(hist_execute); // System.err.println("Interval #" + i + "\n" + hist_execute); // assertEquals(1, hist_execute.getValueCount()); } // FOR } /** * testSinglePartitionedUniformWorkload */ public void testSinglePartitionedUniformWorkload() throws Exception { // This workload should will only consist of single-partition txns and // is evenly spread out across all partitions final BitSet txn_for_partition = new BitSet(NUM_PARTITIONS); Filter filter = new Filter() { @Override protected void resetImpl() { // Nothing... } @Override protected FilterResult filter(AbstractTraceElement<? extends CatalogType> element) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; try { int partition = p_estimator.getBasePartition(xact); if (partition == HStoreConstants.NULL_PARTITION_ID) System.err.println(xact.debug(catalog_db)); assert(partition != HStoreConstants.NULL_PARTITION_ID); if (txn_for_partition.get(partition)) { return (FilterResult.SKIP); } txn_for_partition.set(partition); } catch (Exception ex) { ex.printStackTrace(); assert(false); } } return (FilterResult.ALLOW); } @Override public String debugImpl() { return null; } }; // Estimate the cost and examine the state of the estimation double cost = this.cost_model.estimateWorkloadCost(catalogContext, singlep_workload, filter, null); // System.err.println(txn_for_partition); for (int i = 0; i < NUM_PARTITIONS; i++) { assert(txn_for_partition.get(i)) : "No txn in workload for partition #" + i; } // FOR assert(cost >= 0.0d) : "Invalid cost: " + cost; assert(cost <= 2.0d) : "Invalid cost: " + cost; // System.err.println("Final Cost: " + cost); // System.err.println("Execution: " + this.cost_model.last_execution_cost); // System.err.println("Entropy: " + this.cost_model.last_entropy_cost); } /** * testSinglePartitionSkewedWorkload */ public void testSinglePartitionSkewedWorkload() throws Exception { // First construct a zipfian-based histogram of partitions and then create a filter that // will selectively prune out txns based on the frequencies in the histogram Histogram<Integer> h = new ObjectHistogram<Integer>(); double sigma = 3.5d; RandomDistribution.Zipf zipf = new RandomDistribution.Zipf(this.rand, 0, NUM_PARTITIONS, sigma); for (int i = 0; i < 100; i++) { h.put(zipf.nextInt()); } // FOR final Map<Integer, Double> probs = new HashMap<Integer, Double>(); for (int i = 0; i < NUM_PARTITIONS; i++) { long cnt = h.get(i, 0); probs.put(i, cnt / 100.0d); } // FOR Filter filter = new Filter() { @Override protected void resetImpl() { // Nothing... } @Override protected FilterResult filter(AbstractTraceElement<? extends CatalogType> element) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; try { int partition = p_estimator.getBasePartition(xact); if (partition == HStoreConstants.NULL_PARTITION_ID) System.err.println(xact.debug(catalog_db)); assert(partition != HStoreConstants.NULL_PARTITION_ID); double next = rand.nextDouble(); double prob = probs.get(partition); boolean skip = (next > prob); // System.err.println("Partition=" + partition + ", Prob=" + prob + ", Next=" + next + ", Skip=" + skip); if (skip) return (FilterResult.SKIP); } catch (Exception ex) { ex.printStackTrace(); assert(false); } } return (FilterResult.ALLOW); } @Override public String debugImpl() { return null; } }; // Estimate the cost and then check whether cost falls in our expected range // We expect that the entropy portion of the cost should be greater than 0.50 double cost = this.cost_model.estimateWorkloadCost(catalogContext, singlep_workload, filter, null); // System.err.println("Final Cost: " + cost); // System.err.println("Execution: " + this.cost_model.last_execution_cost); // System.err.println("Entropy: " + this.cost_model.last_entropy_cost); assert(cost >= 0.0d) : "Invalid cost: " + cost; assert(cost <= 2.0d) : "Invalid cost: " + cost; // FIXME assert(this.cost_model.last_entropy_cost > 0.50) : "Invalid Entropy: " + this.cost_model.last_entropy_cost; } // FIXME 2012-17-10 // This is broken after changing PartitionSet internals // /** // * testConsistentCost // */ // public void testConsistentCost() throws Exception { // // We want to check that if we run the same workload multiple times that we get // // the same cost each time // int tries = 4; // // final DesignerHints hints = new DesignerHints(); // hints.limit_local_time = 1; // hints.limit_total_time = 5; // hints.enable_costmodel_caching = false; // hints.enable_costmodel_java_execution = false; // hints.max_memory_per_partition = Long.MAX_VALUE; // hints.enable_vertical_partitioning = false; // final PartitionPlan initial = PartitionPlan.createFromCatalog(catalog_db); // // // HACK: Enable debug output in BranchAndBoundPartitioner so that it slows the // // the traversal. There is a race condition since we were able to speed things up // BranchAndBoundPartitioner.LOG.setLevel(Level.DEBUG); // // System.err.println("INITIAL PARTITIONPLAN:\n" + initial); // // Double last_cost = null; // while (tries-- > 0) { // final Database clone_db = CatalogCloner.cloneDatabase(catalog_db); // CatalogContext clone_catalogContext = new CatalogContext(clone_db.getCatalog()); // // final TimeIntervalCostModel<SingleSitedCostModel> cm = new TimeIntervalCostModel<SingleSitedCostModel>(clone_db, SingleSitedCostModel.class, NUM_INTERVALS); //// AbstractCostModel cm = new SingleSitedCostModel(catalog_db, p_estimator); // AbstractCostModel cm = this.cost_model; // // double cost0 = cm.estimateWorkloadCost(clone_catalogContext, singlep_workload); // assert(cost0 > 0) : "[0] Invalid Cost: " + cost0; // if (last_cost != null) { // assertEquals("[0] Try #" + tries, cost0, last_cost.doubleValue()); // } // // DesignerInfo info = new DesignerInfo(clone_catalogContext, singlep_workload); // info.setStats(stats); // info.setNumIntervals(NUM_INTERVALS); // info.setPartitionerClass(BranchAndBoundPartitioner.class); // info.setCostModel(cm); // info.setMappingsFile(this.getParameterMappingsFile(ProjectType.TM1)); // // Designer designer = new Designer(info, hints, info.getArgs()); // BranchAndBoundPartitioner local_search = (BranchAndBoundPartitioner)designer.getPartitioner(); // local_search.setUpperBounds(hints, initial, cost0, 12345); // assertNotNull(local_search); // // // Now shovel through the Branch&Bound partitioner without actually doing anything // // We should then get the exact same PartitionPlan back as we gave it // PartitionPlan pplan = null; // try { // pplan = local_search.generate(hints); // } catch (Exception ex) { // System.err.println("GRAPH: " + FileUtil.writeStringToTempFile(GraphvizExport.export(local_search.getAcessGraph(), "tm1"), "dot")); // throw ex; // } // assertNotNull(pplan); // assertEquals(initial, pplan); // // // Which then means we should get the exact same cost back // initial.apply(clone_db); // cm.clear(true); // double cost1 = cm.estimateWorkloadCost(catalogContext, singlep_workload); // assert(cost1 > 0) : "[1] Invalid Cost: " + cost0; // assertEquals("[1] Try #" + tries, cost0, cost1); // // last_cost = cost0; // } // WHILE // // } }