package edu.brown.designer.partitioners; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import org.apache.commons.collections15.CollectionUtils; import org.apache.commons.collections15.map.ListOrderedMap; import org.apache.commons.collections15.set.ListOrderedSet; import org.apache.log4j.Logger; import org.voltdb.catalog.Column; import org.voltdb.catalog.Database; import org.voltdb.catalog.MaterializedViewInfo; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Statement; import org.voltdb.catalog.Table; import org.voltdb.planner.VerticalPartitionPlanner; import org.voltdb.types.QueryType; import edu.brown.catalog.CatalogUtil; import edu.brown.catalog.special.MultiColumn; import edu.brown.catalog.special.VerticalPartitionColumn; import edu.brown.designer.MemoryEstimator; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.plannodes.PlanNodeUtil; import edu.brown.statistics.TableStatistics; import edu.brown.statistics.WorkloadStatistics; import edu.brown.utils.StringUtil; public abstract class VerticalPartitionerUtil { private static final Logger LOG = Logger.getLogger(VerticalPartitionerUtil.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } /** * @param vp_col * @param stats * @return */ public static TableStatistics computeTableStatistics(VerticalPartitionColumn vp_col, WorkloadStatistics stats) { MaterializedViewInfo catalog_view = vp_col.createMaterializedView(); Table view_tbl = catalog_view.getDest(); assert (view_tbl != null) : "Destination table for " + catalog_view + " is null?"; TableStatistics tbl_stats = stats.getTableStatistics(view_tbl); if (tbl_stats == null) { tbl_stats = new TableStatistics(view_tbl); stats.addTableStatistics(view_tbl, tbl_stats); Table orig_tbl = catalog_view.getParent(); TableStatistics orig_tbl_stats = stats.getTableStatistics(orig_tbl); assert (orig_tbl_stats != null) : "Missing TableStatistics " + orig_tbl; tbl_stats.readonly = true; tbl_stats.tuple_count_total = orig_tbl_stats.tuple_count_total; long tuple_size = MemoryEstimator.estimateTupleSize(view_tbl); tbl_stats.tuple_size_avg = tuple_size; tbl_stats.tuple_size_max = tuple_size; tbl_stats.tuple_size_min = tuple_size; tbl_stats.tuple_size_total = tbl_stats.tuple_count_total * tuple_size; if (debug.val) LOG.debug("Added TableStatistics for vertical partition replica table " + view_tbl); } return (tbl_stats); } /** * Generate all of the potential VerticalPartitionColumn candidates based on * the given horizontal partition column. Each VerticalPartitionColumn * candidate will contain the optimized queries that we compute with the * VerticalPartitionPlanner. * * @param stats * @param catalog_tbl * @return * @throws Exception */ public static Collection<VerticalPartitionColumn> generateCandidates(final Column partition_col, final WorkloadStatistics stats) throws Exception { final Table catalog_tbl = partition_col.getParent(); final Database catalog_db = catalog_tbl.getParent(); final Set<VerticalPartitionColumn> candidates = new ListOrderedSet<VerticalPartitionColumn>(); // If the horizontal partition column is null, then there can't be any // vertical partition columns if (partition_col.getNullable()) { if (debug.val) LOG.warn("The horizontal partition column " + partition_col.fullName() + " is nullable. Skipping candidate generation"); return (candidates); } // Get all the read-only columns for this table Collection<Column> readOnlyColumns = CatalogUtil.getReadOnlyColumns(catalog_tbl, true); // For the given Column object, figure out what are the potential // vertical partitioning candidates // if we assume that the Table is partitioned on that Column if (debug.val) { LOG.debug(String.format("Generating VerticalPartitionColumn candidates based on using %s as the horizontal partitioning attribute", partition_col.fullName())); LOG.trace(catalog_tbl + " Read-Only Columns: " + CatalogUtil.debug(readOnlyColumns)); } for (Procedure catalog_proc : CatalogUtil.getReferencingProcedures(catalog_tbl)) { // Look for a query on this table that does not use the target // column in the predicate // But does return it in its output for (Statement catalog_stmt : catalog_proc.getStatements()) { // We can only look at SELECT statements because we have know // way to know the correspondence // between the candidate partitioning column and our target // column if (catalog_stmt.getQuerytype() != QueryType.SELECT.getValue()) continue; Collection<Column> output_cols = PlanNodeUtil.getOutputColumnsForStatement(catalog_stmt); if (partition_col instanceof MultiColumn) { if (output_cols.containsAll((MultiColumn) partition_col) == false) continue; } else if (output_cols.contains(partition_col) == false) continue; // Skip if this thing is just dumping out all columns if (output_cols.size() == catalog_tbl.getColumns().size()) continue; // We only support single-table queries now Collection<Table> stmt_tables = CatalogUtil.getReferencedTables(catalog_stmt); if (stmt_tables.size() > 1) continue; // The referenced columns are the columns that are used in the // predicate and order bys Collection<Column> stmt_cols = CollectionUtils.union(CatalogUtil.getReferencedColumns(catalog_stmt), CatalogUtil.getOrderByColumns(catalog_stmt)); if (stmt_cols.contains(partition_col)) continue; // Vertical Partition Columns Set<Column> all_cols = new TreeSet<Column>(); all_cols.addAll(stmt_cols); if (partition_col instanceof MultiColumn) { all_cols.addAll(((MultiColumn) partition_col).getAttributes()); } else { all_cols.add(partition_col); } // Include any read-only output columns for (Column col : output_cols) { if (readOnlyColumns.contains(col)) all_cols.add(col); } // FOR if (partition_col instanceof MultiColumn) { MultiColumn mc = (MultiColumn) partition_col; if (mc.size() == all_cols.size()) { boolean foundAll = true; for (Column col : mc) { foundAll = all_cols.contains(col) && foundAll; } // FOR if (foundAll) continue; // assert(foundAll) : mc + "\n" + all_cols; } } if (all_cols.size() > 1) { MultiColumn vp_col = MultiColumn.get(all_cols.toArray(new Column[all_cols.size()])); assert (partition_col.equals(vp_col) == false) : vp_col; VerticalPartitionColumn vpc = VerticalPartitionColumn.get(partition_col, vp_col); assert (vpc != null) : String.format("Failed to get VerticalPartition column for <%s, %s>", partition_col, vp_col); candidates.add(vpc); if (debug.val) { Map<String, Object> m = new ListOrderedMap<String, Object>(); m.put("Output Columns", output_cols); m.put("Predicate Columns", stmt_cols); m.put("Horizontal Partitioning", partition_col.fullName()); m.put("Vertical Partitioning", vp_col.fullName()); LOG.debug("Vertical Partition Candidate: " + catalog_stmt.fullName() + "\n" + StringUtil.formatMaps(m)); } } } // FOR (stmt) } // FOR (proc) if (debug.val && candidates.size() > 0) LOG.debug("Computing vertical partition query plans for " + candidates.size() + " candidates"); Set<VerticalPartitionColumn> final_candidates = new HashSet<VerticalPartitionColumn>(); for (VerticalPartitionColumn vpc : candidates) { // Make sure our WorkloadStatistics have something for this // MaterializedViewInfo if (stats != null) VerticalPartitionerUtil.computeTableStatistics(vpc, stats); if (vpc.hasOptimizedQueries()) { if (debug.val) LOG.debug("Skipping candidate that already has optimized queries\n" + vpc.toString()); final_candidates.add(vpc); } else if (generateOptimizedQueries(catalog_db, vpc)) { final_candidates.add(vpc); } } // FOR return (final_candidates); } /** * @param catalog_db * @param c * @return */ public static boolean generateOptimizedQueries(Database catalog_db, VerticalPartitionColumn c) { boolean ret = false; MaterializedViewInfo catalog_view = CatalogUtil.getVerticalPartition((Table) c.getParent()); if (catalog_view == null) catalog_view = c.createMaterializedView(); assert (catalog_view != null); assert (catalog_view.getGroupbycols().isEmpty() == false) : String.format("Missing columns for VerticalPartition view %s\n%s", catalog_view.fullName(), c); List<String> columnNames = c.getVerticalPartitionColumnNames(); VerticalPartitionPlanner vp_planner = new VerticalPartitionPlanner(catalog_db, catalog_view); Map<Statement, Statement> optimized = null; try { optimized = vp_planner.generateOptimizedStatements(); } catch (Exception ex) { throw new RuntimeException("Failed to generate optimized query plans:\n" + c, ex); } if (optimized != null) { c.addOptimizedQueries(optimized); ret = true; if (debug.val) LOG.debug(String.format("Generated %d optimized query plans using %s's vertical partition: %s", optimized.size(), c.getParent().getName(), columnNames)); } else if (c.hasOptimizedQueries()) { ret = true; if (debug.val) LOG.debug(String.format("Using existing %d optimized query plans using %s's vertical partition", c.getOptimizedQueries().size(), c.getParent().getName())); } else if (debug.val) { LOG.warn("No optimized queries were generated for " + c.fullName()); } return (ret); } }