package edu.brown.workload; import java.io.File; import java.io.FileOutputStream; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.apache.commons.collections15.set.ListOrderedSet; import org.apache.log4j.Logger; import org.voltdb.catalog.Database; import edu.brown.catalog.CatalogUtil; import edu.brown.rand.RandomDistribution; import edu.brown.statistics.ObjectHistogram; import edu.brown.utils.ArgumentsParser; import edu.brown.utils.ProjectType; /** * * @author pavlo * */ public abstract class FixWorkload { private static final Logger LOG = Logger.getLogger(FixWorkload.class.getName()); private static final int OL_SUPPLY_REMOTE = 50; // % private static final int OL_SUPPLY_REMOTE_ITEM = 50; // % /** * * @param catalogContext * @throws Exception */ public static void addZipfianAffinity(ArgumentsParser args, double sigma) throws Exception { // // Figure out how many warehouses we have // ListOrderedSet<Integer> warehouses = new ListOrderedSet<Integer>(); for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (!xact.getCatalogItemName().equals("neworder")) continue; warehouses.add(((Long)xact.getParam(0)).intValue()); } } // FOR // // Create a synthetic affinity between different warehouses // Map<Integer, RandomDistribution.Zipf> distributions = new HashMap<Integer, RandomDistribution.Zipf>(); Random rand = new Random(); int num_warehouses = warehouses.size(); for (Integer w_id : warehouses) { int other_id = w_id; while (other_id == w_id) other_id = warehouses.get(rand.nextInt(num_warehouses)); distributions.put(w_id, new RandomDistribution.Zipf(rand, other_id, other_id + num_warehouses, sigma)); //System.out.println(w_id + " => " + other_id); } // FOR FixWorkload.updateWorkloadWarehouses(args, distributions); return; } public static void addPairedAffinity(ArgumentsParser args) throws Exception { // // Put contiguous partition pairs together // int num_partitions = CatalogUtil.getNumberOfPartitions(args.catalog_db); Map<Integer, RandomDistribution.DiscreteRNG> distributions = new HashMap<Integer, RandomDistribution.DiscreteRNG>(); Random rand = new Random(); Integer last_partition = null; for (int i = 0; i < num_partitions; i++) { if (last_partition != null) { distributions.put(i, new RandomDistribution.Flat(rand, 0, num_partitions)); distributions.put(last_partition, new RandomDistribution.Flat(rand, 0, num_partitions)); //distributions.put(i, new RandomDistribution.Zipf(rand, last_partition, last_partition + num_partitions, 1.1)); //distributions.put(last_partition, new RandomDistribution.Zipf(rand, i, i + num_partitions, 1.1)); last_partition = null; } else { last_partition = i; } } // FOR FixWorkload.updateWorkload(args, distributions); return; } /** * * @param args * @param distributions * @param histograms */ private static void updateWorkload(ArgumentsParser args, Map<Integer, ? extends RandomDistribution.DiscreteRNG> distributions) { // Fix OL_SUPPLY_ID // // Lame: Generate a mapping of partition ids to warehouses, so that we can do a // a quick reverse lookup // Map<Integer, ObjectHistogram> histograms = new HashMap<Integer, ObjectHistogram>(); SortedMap<Integer, ListOrderedSet<Integer>> partition_warehouse_xref = new TreeMap<Integer, ListOrderedSet<Integer>>(); int num_partitions = args.hasher.getNumPartitions(); System.out.println("Num of Partitions: " + num_partitions); for (int i = 0; i < num_partitions; i++) { partition_warehouse_xref.put(i, new ListOrderedSet<Integer>()); histograms.put(i, new ObjectHistogram()); } Set<Integer> warehouse_ids = new HashSet<Integer>(); for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (!xact.getCatalogItemName().equals("neworder")) continue; int w_id = ((Long)xact.getParam(0)).intValue(); int part_id = args.hasher.hash(w_id); partition_warehouse_xref.get(part_id).add(w_id); warehouse_ids.add(w_id); } } // FOR StringBuilder buffer = new StringBuilder(); buffer.append("Partition ID -> Warehouse ID\n"); for (Integer partition_id : partition_warehouse_xref.keySet()) { buffer.append(partition_id).append(": ") .append(partition_warehouse_xref.get(partition_id).toString()).append("\n"); } LOG.info(buffer); int changed_cnt = 0; int total = 0; Random rand = new Random(); for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (!xact.getCatalogItemName().equals("neworder")) continue; int param_idx = 5; total++; Object ol_supply_ids[] = xact.getParam(param_idx); int orig_w_id = ((Long)xact.getParam(0)).intValue(); int orig_part_id = args.hasher.hash(orig_w_id); RandomDistribution.DiscreteRNG rng = distributions.get(orig_part_id); if (rng == null) { LOG.error("Original Partition Id: " + orig_part_id); LOG.error(distributions); System.exit(0); } boolean changed = false; for (int i = 0; i < ol_supply_ids.length; i++) { if (rand.nextInt(100) <= OL_SUPPLY_REMOTE) { Integer new_part_id = rng.nextInt(); if (new_part_id >= num_partitions) new_part_id = (new_part_id % num_partitions); histograms.get(orig_part_id).put(new_part_id); // For the newly select partition id, randomly select a warehouse // that will map back to this partition ListOrderedSet<Integer> w_ids = partition_warehouse_xref.get(new_part_id); assert(w_ids != null) : "Missing warehouse ids for partition " + new_part_id; assert(!w_ids.isEmpty()); ol_supply_ids[i] = w_ids.get(rand.nextInt(w_ids.size())).longValue(); changed = true; int getStockInfo_cnt = 0; int updateStock_cnt = 0; int createOrderLine_cnt = 0; for (QueryTrace query : xact.getQueries()) { if (query.getCatalogItemName().startsWith("getStockInfo")) { if (getStockInfo_cnt++ == i) query.setParam(1, ol_supply_ids[i]); } else if (query.getCatalogItemName().equals("updateStock")) { if (updateStock_cnt++ == i) query.setParam(5, ol_supply_ids[i]); } else if (query.getCatalogItemName().equals("createOrderLine")) { if (createOrderLine_cnt++ == i) query.setParam(5, ol_supply_ids[i]); } } // FOR //System.out.println(xact.debug(catalog_db)); //System.exit(1); // // If we're not changing it, at least make sure it's the same value as the procedure param // } else { ol_supply_ids[i] = orig_w_id; histograms.get(orig_part_id).put(orig_part_id); } } // FOR xact.setParam(param_idx, ol_supply_ids); if (changed) changed_cnt++; } } // FOR LOG.info("Updated " + changed_cnt + "/" + total + " transactions"); buffer = new StringBuilder(); buffer.append("Partition Histograms:\n"); for ( Entry<Integer, ObjectHistogram> entry : histograms.entrySet()) { Integer partition_id = entry.getKey(); buffer.append("Partition: " + partition_id + " [" + distributions.get(partition_id).getMin() + "]\n"); buffer.append(entry.getValue()).append("\n"); } // FOR LOG.info(buffer.toString()); return; } /** * * @param args * @param distributions * @param histograms */ private static void updateWorkloadWarehouses(ArgumentsParser args, Map<Integer, ? extends RandomDistribution.DiscreteRNG> distributions) throws Exception { // Fix OL_SUPPLY_ID // // Lame: Generate a mapping of partition ids to warehouses, so that we can do a // a quick reverse lookup // Map<Integer, ObjectHistogram> histograms = new HashMap<Integer, ObjectHistogram>(); Set<Integer> warehouse_ids = new HashSet<Integer>(); for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (!xact.getCatalogItemName().equals("neworder")) continue; int w_id = ((Long)xact.getParam(0)).intValue(); warehouse_ids.add(w_id); if (!histograms.containsKey(w_id)) { histograms.put(w_id, new ObjectHistogram()); } } } // FOR int num_warehouses = warehouse_ids.size(); System.out.println("Num of Warehouses: " + num_warehouses); assert(num_warehouses <= 40); int changed_cnt = 0; int total = 0; Random rand = new Random(); for (AbstractTraceElement<?> element : args.workload) { if (element instanceof TransactionTrace) { TransactionTrace xact = (TransactionTrace)element; if (!xact.getCatalogItemName().equals("neworder")) continue; int param_idx = 5; total++; Object ol_supply_ids[] = xact.getParam(param_idx); int orig_w_id = ((Long)xact.getParam(0)).intValue(); RandomDistribution.DiscreteRNG rng = distributions.get(orig_w_id); if (rng == null) { LOG.error("Original Warehouse Id: " + orig_w_id); LOG.error(distributions); System.exit(0); } // // Let there be some probability that we are going to go to another warehouse // to grab our data. For now we'll have all the remote items be in the same // warehouse // Integer remote_w_id = null; int updated_items = 0; if (rand.nextInt(100) <= OL_SUPPLY_REMOTE) { remote_w_id = rng.nextInt(); if (remote_w_id >= num_warehouses) remote_w_id = (remote_w_id % num_warehouses) + 1; histograms.get(orig_w_id).put(remote_w_id); } else { histograms.get(orig_w_id).put(orig_w_id); } for (int i = 0; i < ol_supply_ids.length; i++) { if (remote_w_id != null && rand.nextInt(100) <= OL_SUPPLY_REMOTE_ITEM) { ol_supply_ids[i] = remote_w_id; updated_items++; int getStockInfo_cnt = 0; int updateStock_cnt = 0; int createOrderLine_cnt = 0; for (QueryTrace query : xact.getQueries()) { if (query.getCatalogItemName().startsWith("getStockInfo")) { if (getStockInfo_cnt++ == i) query.setParam(1, ol_supply_ids[i]); } else if (query.getCatalogItemName().equals("updateStock")) { if (updateStock_cnt++ == i) query.setParam(5, ol_supply_ids[i]); } else if (query.getCatalogItemName().equals("createOrderLine")) { if (createOrderLine_cnt++ == i) query.setParam(5, ol_supply_ids[i]); } } // FOR //System.out.println(xact.debug(catalog_db)); //System.exit(1); // // If we're not changing it, at least make sure it's the same value as the procedure param // } else { ol_supply_ids[i] = orig_w_id; } } // FOR xact.setParam(param_idx, ol_supply_ids); if (updated_items > 0) changed_cnt++; } } // FOR LOG.info("Updated " + changed_cnt + "/" + total + " transactions"); StringBuilder buffer = new StringBuilder(); buffer.append("Warehouse Histograms:\n"); for (Integer w_id : histograms.keySet()) { ObjectHistogram hist = histograms.get(w_id); buffer.append("Partition: " + w_id + " [" + distributions.get(w_id).getMin() + "]\n"); buffer.append(hist).append("\n"); hist.save(new File("histograms/" + w_id + ".hist")); } // FOR LOG.info(buffer.toString()); return; } // // private static final void fixTM1(Workload workload) throws Exception { // // Fix UpdateSubscriberData // long fix_ctr = 0; // for (TransactionTrace txn_trace : workload.getTransactions()) { // if (!txn_trace.getCatalogItemName().equals("UpdateSubscriberData")) continue; // // Object params[] = txn_trace.getParams(); // Object temp = params[0]; // txn_trace.setParam(0, params[1]); // txn_trace.setParam(1, temp); // fix_ctr++; // } // LOG.info("Fixed " + fix_ctr + " UpdateSubscriberData txns!"); // return; // } private static final void updateTraceIds(Database catalog_db, Workload workload, String output_path) throws Exception { FileOutputStream output = new FileOutputStream(output_path); for (TransactionTrace txn_trace : workload.getTransactions()) { Workload.writeTransactionToStream(catalog_db, txn_trace, output); } // FOR } /** * @param args */ public static void main(String[] vargs) throws Exception { ArgumentsParser args = ArgumentsParser.load(vargs); args.require( ArgumentsParser.PARAM_CATALOG, ArgumentsParser.PARAM_WORKLOAD, ArgumentsParser.PARAM_WORKLOAD_OUTPUT ); String output_path = args.getParam(ArgumentsParser.PARAM_WORKLOAD_OUTPUT); assert(output_path != null); ProjectType type = args.catalog_type; updateTraceIds(args.catalog_db, args.workload, output_path); // // Fix the workload! // if (type.equals(ProjectType.TPCC)) { // if (args.getOptParamCount() == 3) { // String id_suffix = args.getOptParam(0); // int min_id = args.getIntOptParam(1); // int max_id = args.getIntOptParam(2); // LOG.info("Expanding partitioning parameters using '" + id_suffix + "'"); // FixWorkload.expandPartitionParameters(args, id_suffix, min_id, max_id); // } else { // double sigma = Double.valueOf(args.getOptParam(0)); // LOG.info("Adding zipfian partition affinity to workload with sigma=" + sigma); // FixWorkload.addZipfianAffinity(args, sigma); // //FixWorkload.addPairedAffinity(args); // } // } else if (type.equals(ProjectType.TM1)) { // // Expand TM1 // // int start_id = Collections.max(args.workload.element_ids) + 1; // // FixWorkload.duplicateWorkload(args, start_id); // fixTM1(args.workload); // // } // // We need to write this things somewhere now... // //System.out.println(args.workload.debug(args.catalog_db)); //args.workload.save(output_path, args.catalog_db); LOG.info("Wrote updated workload to '" + output_path + "'"); return; } }