/*************************************************************************** * Copyright (C) 2012 by H-Store Project * * Brown University * * Massachusetts Institute of Technology * * Yale University * * * * http://hstore.cs.brown.edu/ * * * * Permission is hereby granted, free of charge, to any person obtaining * * a copy of this software and associated documentation files (the * * "Software"), to deal in the Software without restriction, including * * without limitation the rights to use, copy, modify, merge, publish, * * distribute, sublicense, and/or sell copies of the Software, and to * * permit persons to whom the Software is furnished to do so, subject to * * the following conditions: * * * * The above copyright notice and this permission notice shall be * * included in all copies or substantial portions of the Software. * * * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * * OTHER DEALINGS IN THE SOFTWARE. * ***************************************************************************/ package edu.brown.utils; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import org.apache.commons.pool.BasePoolableObjectFactory; import org.apache.log4j.Logger; import org.voltdb.CatalogContext; import org.voltdb.StoredProcedureInvocation; import org.voltdb.VoltTableRow; import org.voltdb.VoltType; import org.voltdb.catalog.CatalogMap; import org.voltdb.catalog.CatalogType; import org.voltdb.catalog.Column; import org.voltdb.catalog.ConstantValue; import org.voltdb.catalog.PlanFragment; import org.voltdb.catalog.ProcParameter; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Statement; import org.voltdb.catalog.StmtParameter; import org.voltdb.catalog.Table; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.types.ExpressionType; import org.voltdb.types.QueryType; import org.voltdb.utils.Pair; import org.voltdb.utils.VoltTypeUtil; import edu.brown.catalog.CatalogKey; import edu.brown.catalog.CatalogPair; import edu.brown.catalog.CatalogUtil; import edu.brown.catalog.special.MultiColumn; import edu.brown.catalog.special.MultiProcParameter; import edu.brown.catalog.special.NullProcParameter; import edu.brown.catalog.special.RandomProcParameter; import edu.brown.catalog.special.VerticalPartitionColumn; import edu.brown.hashing.AbstractHasher; import edu.brown.hashing.DefaultHasher; import edu.brown.hstore.HStoreConstants; import edu.brown.logging.LoggerUtil; import edu.brown.logging.LoggerUtil.LoggerBoolean; import edu.brown.plannodes.PlanNodeUtil; import edu.brown.pools.FastObjectPool; import edu.brown.workload.QueryTrace; import edu.brown.workload.TransactionTrace; /** * This class is used to calculate what partitions various operations will need to execute on. * For example, this is used to figure out what partitions each query invocation will need to * touch at runtime. * <B>NOTE:</B> These are deterministic calculations. We call it an "estimator" because * we can get the partitions touched by an operation without actually running a txn. * or executing a query. * @author pavlo */ public class PartitionEstimator { private static final Logger LOG = Logger.getLogger(PartitionEstimator.class); private static final LoggerBoolean debug = new LoggerBoolean(); private static final LoggerBoolean trace = new LoggerBoolean(); static { LoggerUtil.attachObserver(LOG, debug, trace); } // ---------------------------------------------------------------------------- // DATA MEMBERS // ---------------------------------------------------------------------------- private CatalogContext catalogContext; private final AbstractHasher hasher; private PartitionSet all_partitions = new PartitionSet(); private int num_partitions; private final Map<Procedure, ProcParameter> cache_procPartitionParameters = new HashMap<Procedure, ProcParameter>(); private final Map<Table, Column> cache_tablePartitionColumns = new HashMap<Table, Column>(); /** * Statement -> StmtParameter Offsets */ private final Map<Statement, int[]> cache_stmtPartitionParameters = new HashMap<Statement, int[]>(); /** * PlanFragment Key -> CacheEntry(Column Key -> StmtParameter Indexes) */ private final Map<String, CacheEntry> cache_fragmentEntries = new HashMap<String, CacheEntry>(); /** * Statement Key -> CacheEntry(Column Key -> StmtParam Indexes) */ private final Map<String, CacheEntry> cache_statementEntries = new HashMap<String, CacheEntry>(); /** * Table Key -> All cache entries for Statements that reference Table */ private final Map<String, Set<CacheEntry>> table_cache_xref = new HashMap<String, Set<CacheEntry>>(); /** * CacheEntry ColumnKey -> Parameter List * The parameters could be either StmtParameters or ConstantValues */ private final class CacheEntry { private final QueryType query_type; private boolean contains_or = false; private final Map<Column, List<Pair<ExpressionType, CatalogType>>> predicates = new HashMap<Column, List<Pair<ExpressionType,CatalogType>>>(); private final Collection<String> table_keys = new HashSet<String>(); private final Collection<String> broadcast_tables = new HashSet<String>(); /** * The array of Table objects for the table_keys. * This is just a fast cache so that we don't have to use an iterable and * so that we can re-use the PartitionEstimator's cache when the catalog changes. */ private transient Table tables[]; /** * Whether the table in the tables array is replicated */ private transient boolean is_replicated[]; private transient boolean is_array[]; // parameters private transient boolean is_valid = true; private transient boolean cache_valid = false; public CacheEntry(QueryType query_type) { this.query_type = query_type; } /** * * @param key * @param param * @param expType * @param catalog_tbl */ public void put(Column key, CatalogType param, ExpressionType expType, Table catalog_tbl) { assert(param instanceof StmtParameter || param instanceof ConstantValue); List<Pair<ExpressionType, CatalogType>> params = this.predicates.get(key); if (params == null) { params = new ArrayList<Pair<ExpressionType, CatalogType>>(); this.predicates.put(key, params); } params.add(Pair.of(expType, param)); this.table_keys.add(CatalogKey.createKey(catalog_tbl)); } public void markContainsOR(boolean flag) { this.contains_or = flag; } public boolean isMarkedContainsOR() { return (this.contains_or); } /** * The catalog object for this CacheEntry references a table without any * predicates on columns, so we need to mark it as having to always be * broadcast (unless it is replicated) * @param catalog_tbls */ public void markAsBroadcast(Table...catalog_tbls) { for (Table catalog_tbl : catalog_tbls) { String table_key = CatalogKey.createKey(catalog_tbl); this.table_keys.add(table_key); this.broadcast_tables.add(table_key); } // FOR } public void markAsBroadcast(Collection<Table> catalog_tbls) { for (Table catalog_tbl : catalog_tbls) { this.markAsBroadcast(catalog_tbl); } } public boolean hasBroadcast() { return (this.broadcast_tables.isEmpty() == false); } /** * Get all of the tables referenced in this CacheEntry * @return */ public Table[] getTables() { if (this.cache_valid == false) { // We have to update the cache set if don't have all of the // entries we need or the catalog has changed synchronized (this) { if (this.cache_valid == false) { if (trace.val) LOG.trace("Generating list of tables used by cache entry"); this.tables = new Table[this.table_keys.size()]; this.is_replicated = new boolean[this.tables.length]; int i = 0; for (String table_key : this.table_keys) { Table catalog_tbl = CatalogKey.getFromKey(catalogContext.database, table_key, Table.class); this.tables[i] = catalog_tbl; this.is_replicated[i++] = catalog_tbl.getIsreplicated(); } // FOR } this.cache_valid = true; } // SYNCH } return (this.tables); } public boolean hasTable(Table catalog_tbl) { return (this.table_keys.contains(CatalogKey.createKey(catalog_tbl))); } public void setValid() { this.is_valid = true; } public boolean isValid() { return (this.is_valid); } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("[IsValid=" + this.is_valid + ", ") .append("Tables=" + this.table_keys + ", ") .append("Broadcast=" + this.broadcast_tables + ", ") .append("Predicates=" + this.predicates + "]"); return (sb.toString()); } }; // END CLASS /** * PartitionSet pool used by calculatePartitionsForCache */ private final FastObjectPool<PartitionSet> partitionSetPool = new FastObjectPool<PartitionSet>(new BasePoolableObjectFactory() { @Override public Object makeObject() throws Exception { return (new PartitionSet()); } public void passivateObject(Object obj) throws Exception { ((PartitionSet)obj).clear(); }; }, 100); /** * PartitionSet[4] pool used by calculatePartitionsForCache. * This really is only needed for MultiColumn partitioning columns. */ private final FastObjectPool<PartitionSet[]> mcPartitionSetPool = new FastObjectPool<PartitionSet[]>(new BasePoolableObjectFactory() { @Override public Object makeObject() throws Exception { // XXX: Why is this hardcoded at 4 elements? return (new PartitionSet[] { new PartitionSet(), new PartitionSet(), new PartitionSet(), new PartitionSet() }); } public void passivateObject(Object obj) throws Exception { PartitionSet sets[] = (PartitionSet[])obj; for (PartitionSet s : sets) s.clear(); }; }, 1000); // ---------------------------------------------------------------------------- // CONSTRUCTORS // ---------------------------------------------------------------------------- /** * Convenience constructor that uses DefaultHasher */ public PartitionEstimator(CatalogContext catalogContext) { this(catalogContext, new DefaultHasher(catalogContext, catalogContext.numberOfPartitions)); } /** * Constructor * * @param args */ public PartitionEstimator(CatalogContext catalogContext, AbstractHasher hasher) { this.catalogContext = catalogContext; this.hasher = hasher; this.initCatalog(catalogContext); if (trace.val) LOG.trace("Created a new PartitionEstimator with a " + hasher.getClass() + " hasher!"); } // ---------------------------------------------------------------------------- // BASE DATA MEMBERS METHODS // ---------------------------------------------------------------------------- /** * Return the current CatalogContext used in this instance * @return */ public CatalogContext getCatalogContext() { return (this.catalogContext); } /** * Return the hasher used in this instance * @return */ public AbstractHasher getHasher() { return (this.hasher); } /** * Initialize a new catalog for this PartitionEstimator * @param new_catalog_db */ public void initCatalog(CatalogContext newCatalogContext) { // Check whether any of our cache partition columns have changed // In which cache we know need to invalidate our cache entries this.catalogContext = newCatalogContext; this.hasher.init(catalogContext); this.clear(); this.buildCatalogCache(); } private synchronized void buildCatalogCache() { for (Procedure catalog_proc : this.catalogContext.database.getProcedures()) { if (catalog_proc.getParameters().size() > 0) { ProcParameter catalog_param = null; int param_idx = catalog_proc.getPartitionparameter(); if (param_idx == NullProcParameter.PARAM_IDX || catalog_proc.getParameters().isEmpty()) { catalog_param = NullProcParameter.singleton(catalog_proc); } else if (param_idx == RandomProcParameter.PARAM_IDX) { catalog_param = RandomProcParameter.singleton(catalog_proc); } else { catalog_param = catalog_proc.getParameters().get(param_idx); } this.cache_procPartitionParameters.put(catalog_proc, catalog_param); if (debug.val) LOG.debug(catalog_proc + " ProcParameter Cache: " + (catalog_param != null ? catalog_param.fullName() : catalog_param)); } } // FOR for (Table catalog_tbl : this.catalogContext.database.getTables()) { if (catalog_tbl.getSystable()) continue; Column catalog_col = catalog_tbl.getPartitioncolumn(); // Use the underlying partitioning column for views if (catalog_tbl.getMaterializer() != null) { catalog_col = catalog_tbl.getMaterializer().getPartitioncolumn(); } if (catalog_col instanceof VerticalPartitionColumn) { catalog_col = ((VerticalPartitionColumn) catalog_col).getHorizontalColumn(); assert ((catalog_col instanceof VerticalPartitionColumn) == false) : catalog_col; } this.cache_tablePartitionColumns.put(catalog_tbl, catalog_col); if (debug.val) LOG.debug(String.format("%s Partition Column Cache: %s", catalog_tbl.getName(), catalog_col)); } // FOR for (CacheEntry entry : this.cache_fragmentEntries.values()) { entry.cache_valid = false; } for (CacheEntry entry : this.cache_statementEntries.values()) { entry.cache_valid = false; } // Generate a list of all the partition ids, so that we can quickly // add them to the output when estimating later on if (this.all_partitions.size() != this.hasher.getNumPartitions()) { this.all_partitions = this.catalogContext.getAllPartitionIds(); this.num_partitions = this.all_partitions.size(); assert (this.hasher.getNumPartitions() == this.num_partitions); if (debug.val) LOG.debug(String.format("Initialized PartitionEstimator with %d partitions using the %s hasher", this.num_partitions, this.hasher.getClass().getSimpleName())); } } /** * Completely clear the PartitionEstimator's internal cache This should only * really be used for testing */ public void clear() { this.cache_procPartitionParameters.clear(); this.cache_tablePartitionColumns.clear(); this.cache_fragmentEntries.clear(); this.cache_statementEntries.clear(); this.cache_stmtPartitionParameters.clear(); } // ---------------------------------------------------------------------------- // INTERNAL CACHE METHODS // ---------------------------------------------------------------------------- /** * This is the method that actually picks the Statement apart and figures * out where the columns and parameters are used together. This is probably the most * important method in the entire code base, so tread lightly in here... * * @param catalog_stmt * @throws Exception */ private synchronized void generateCache(final Statement catalog_stmt) throws Exception { // Check whether we already have a CacheEntry for the Statement that we // can reuse String stmt_key = CatalogKey.createKey(catalog_stmt); QueryType stmt_type = QueryType.get(catalog_stmt.getQuerytype()); PartitionEstimator.CacheEntry stmt_cache = this.cache_statementEntries.get(stmt_key); if (stmt_cache == null) { stmt_cache = new PartitionEstimator.CacheEntry(stmt_type); } else { // assert(stmt_cache.isValid()) : // "Unexpected invalid cache entry for " + // CatalogUtil.getDisplayName(catalog_stmt); stmt_cache.setValid(); } Collection<Table> stmt_tables = CatalogUtil.getReferencedTables(catalog_stmt); if (debug.val) LOG.debug("Generating partitioning cache for " + catalog_stmt); // IMPORTANT: Work through the fragments in reverse so that we go from // the bottom of the tree up. // We are assuming that we can get the StmtParameter->Column mapping that we need // from either the multi-partition plan or the single-partition plan and that the mapping // will be the same in both cases. Therefore, we don't need to differentiate whether we // are picking apart one or the other, nor do we need to switch to a different cache entry // for the Statement if we realize that we are going to be single-partition or not. // We have to go through all of the fragments because we don't know which set // the system will be calling at runtime. CatalogMap<?> fragment_sets[] = new CatalogMap<?>[] { catalog_stmt.getFragments(), catalog_stmt.getMs_fragments(), }; for (int i = 0; i < fragment_sets.length; i++) { if (fragment_sets[i] == null || fragment_sets[i].isEmpty()) continue; @SuppressWarnings("unchecked") CatalogMap<PlanFragment> fragments = (CatalogMap<PlanFragment>) fragment_sets[i]; boolean singlesited = (i == 0); if (trace.val) LOG.trace("Analyzing " + fragments.size() + " " + (singlesited ? "single" : "multi") + "-sited fragments for " + catalog_stmt.fullName()); // Examine each fragment and pick apart how the tables are referenced // The order doesn't matter here for (PlanFragment catalog_frag : fragments) { // Again, always check whether we already have a CacheEntry for // the PlanFragment that we can reuse String frag_key = CatalogKey.createKey(catalog_frag); PartitionEstimator.CacheEntry frag_cache = this.cache_fragmentEntries.get(frag_key); if (frag_cache == null) { frag_cache = new PartitionEstimator.CacheEntry(stmt_type); } else if (frag_cache.isValid()) { assert (!frag_cache.isValid()) : "Cache entry for " + CatalogUtil.getDisplayName(catalog_frag) + " is marked as valid when we were expecting to be invalid\n" + this.toString(); frag_cache.setValid(); } AbstractPlanNode root = PlanNodeUtil.getPlanNodeTreeForPlanFragment(catalog_frag); Collection<Table> frag_tables = CatalogUtil.getReferencedTablesForTree(catalogContext.database, root); // Table tables_arr[] = new Table[frag_tables.size()]; // tables_arr = frag_tables.toArray(tables_arr); // assert (tables_arr.length == frag_tables.size()); if (trace.val) LOG.trace("Analyzing " + catalog_frag.fullName()); // Check whether the predicate expression in this PlanFragment contains an OR // We need to know this if we get hit with Multi-Column Partitioning // XXX: Why does this matter?? Collection<ExpressionType> exp_types = PlanNodeUtil.getScanExpressionTypes(root); if (exp_types.contains(ExpressionType.CONJUNCTION_OR)) { if (debug.val) LOG.warn(String.format("%s contains %s. Cannot be used with multi-column partitioning", catalog_frag.fullName(), ExpressionType.CONJUNCTION_OR)); stmt_cache.markContainsOR(true); frag_cache.markContainsOR(true); } // If there are no tables, then we need to double check that the "non-transactional" // flag is set for the fragment. This means that this fragment does not operate directly // on a persistent table in the database. We'll add an entry in the cache using our // special "no tables" flag. This means that the fragment needs to be executed locally. if (frag_tables.isEmpty()) { String msg = catalog_frag.fullName() + " does not reference any tables"; if (!catalog_frag.getNontransactional()) { LOG.warn(catalog_stmt.fullName() + "\n" + PlanNodeUtil.debug(PlanNodeUtil.getRootPlanNodeForStatement(catalog_stmt, false))); for (PlanFragment f : fragments) { LOG.warn("singlePartiton=" + singlesited + " - " + f.fullName() + "\n" + PlanNodeUtil.debug(PlanNodeUtil.getPlanNodeTreeForPlanFragment(f))); } throw new Exception(msg + " but the non-transactional flag is not set"); } if (trace.val) LOG.trace(msg); } if (trace.val && frag_tables.isEmpty() == false ) LOG.trace("Fragment Tables: " + frag_tables); // We only need to find where the partition column is referenced // If it's not in there, then this query has to be broadcasted to all nodes // Note that we pass all the tables that are part of the fragment, since // we need to be able to handle joins PredicatePairs predicates = CatalogUtil.extractFragmentPredicates(catalog_frag, false, frag_tables); assert (predicates != null); Map<Column, Set<Column>> column_joins = new TreeMap<Column, Set<Column>>(); if (trace.val) LOG.trace("Extracted PredicatePairs for " + frag_tables + ":\n" + predicates.debug()); // ------------------------------- // If there are no columns, then this fragment is doing a full table scan // ------------------------------- if (predicates.isEmpty() && frag_tables.size() > 0) { if (trace.val) LOG.trace("No columns accessed in " + catalog_frag + " despite reading " + frag_tables.size() + " tables"); stmt_cache.markAsBroadcast(frag_tables); frag_cache.markAsBroadcast(frag_tables); } // ------------------------------- // Fragment references the columns for our tables. Pick them apart! // ------------------------------- else { // First go through all the entries and add any mappings from // Columns to StmtParameters to our stmt_cache for (CatalogPair pair : predicates) { if (trace.val) LOG.trace(String.format("Examining extracted %s: %s", pair.getClass().getSimpleName(), pair)); // Column = Column if (pair.getFirst() instanceof Column && pair.getSecond() instanceof Column) { Column col0 = (Column) pair.getFirst(); Column col1 = (Column) pair.getSecond(); // If this table is a view, then we need to check whether // we have to point the column down to the origin column if (col0.getMatviewsource() != null) { col0 = col0.getMatviewsource(); } if (col1.getMatviewsource() != null) { col1 = col1.getMatviewsource(); } if (!pair.getComparisonExp().equals(ExpressionType.COMPARE_EQUAL)) { if (debug.val) LOG.warn(String.format("Unsupported non-equality join in %s: %s", catalog_stmt.fullName(), pair)); } else { if (!column_joins.containsKey(col0)) column_joins.put(col0, new TreeSet<Column>()); if (!column_joins.containsKey(col1)) column_joins.put(col1, new TreeSet<Column>()); column_joins.get(col0).add(col1); column_joins.get(col1).add(col0); } continue; } // Look for predicates with StmtParameters or ConstantValues for (Table catalog_tbl : frag_tables) { Column catalog_col = null; CatalogType catalog_param = null; // *********************************** DEBUG *********************************** if (trace.val) { LOG.trace("Current Table: " + catalog_tbl.hashCode()); if (pair.getFirst() != null) { LOG.trace("entry.getFirst().getParent(): " + (pair.getFirst().getParent() != null ? pair.getFirst().getParent().hashCode() : pair.getFirst() + " parent is null?")); if (pair.getFirst().getParent() instanceof Table) { Table parent = pair.getFirst().getParent(); if (parent.getName().equals(catalog_tbl.getName())) { assert(parent.equals(catalog_tbl)) : "Mismatch on " + parent.getName() + "???"; } } } else { LOG.trace("entry.getFirst(): " + null); } if (pair.getSecond() != null) { LOG.trace("entry.getSecond().getParent(): " + (pair.getSecond().getParent() != null ? pair.getSecond().getParent().hashCode() : pair.getSecond() + " parent is null?")); } else { LOG.trace("entry.getSecond(): " + null); } } // *********************************** DEBUG *********************************** // Column = (StmtParameter or ConstantValue) if (pair.getFirst().getParent() != null && pair.getFirst().getParent().equals(catalog_tbl) && (pair.getSecond() instanceof StmtParameter || pair.getSecond() instanceof ConstantValue) ) { catalog_col = (Column) pair.getFirst(); catalog_param = pair.getSecond(); } // (StmtParameter or ConstantValue) = Column else if (pair.getSecond().getParent() != null && pair.getSecond().getParent().equals(catalog_tbl) && (pair.getFirst() instanceof StmtParameter || pair.getFirst() instanceof ConstantValue)) { catalog_col = (Column) pair.getSecond(); catalog_param = pair.getFirst(); } if (catalog_col != null && catalog_param != null) { // If this table is a view, then we need to check whether // we have to point the column down to the origin column if (catalog_col.getMatviewsource() != null) { if (debug.val) LOG.debug("Found View Column: " + catalog_col.fullName() + " -> " + catalog_col.getMatviewsource().fullName()); catalog_col = catalog_col.getMatviewsource(); } if (trace.val) LOG.trace(String.format("[%s] Adding cache entry for %s: %s -> %s", CatalogUtil.getDisplayName(catalog_tbl), CatalogUtil.getDisplayName(catalog_frag), CatalogUtil.getDisplayName(catalog_col), CatalogUtil.getDisplayName(catalog_param))); stmt_cache.put(catalog_col, catalog_param, pair.getComparisonExp(), catalog_tbl); frag_cache.put(catalog_col, catalog_param, pair.getComparisonExp(), catalog_tbl); } } // FOR (tables) if (trace.val) LOG.trace("-------------------"); } // FOR (entry) // We now have to take a second pass through the column mappings // This will pick-up those columns that are joined together where one of them // is also referenced with an input parameter. So we will map the input // parameter to the second column as well PartitionEstimator.populateColumnJoinSets(column_joins); for (Column catalog_col : column_joins.keySet()) { // Otherwise, we have to examine the the ColumnSet and // look for any reference to this column if (trace.val) LOG.trace("Trying to find all references to " + CatalogUtil.getDisplayName(catalog_col)); for (Column other_col : column_joins.get(catalog_col)) { // IMPORTANT: If the other entry is a column from another table and we don't // have a reference in stmt_cache for ourselves, then we can look to see if // this guy was used against a StmtParameter some where else in the Statement // If this is the case, then we can substitute that mofo in it's place if (stmt_cache.predicates.containsKey(catalog_col)) { for (Pair<ExpressionType, CatalogType> pair : stmt_cache.predicates.get(catalog_col)) { if (trace.val) LOG.trace(String.format("Linking %s to predicate %s because of %s", other_col.fullName(), pair, catalog_col.fullName())); ExpressionType expType = pair.getFirst(); CatalogType param = pair.getSecond(); stmt_cache.put(other_col, param, expType, (Table)other_col.getParent()); frag_cache.put(other_col, param, expType, (Table)other_col.getParent()); } // FOR (StmtParameter.Index) } } // FOR (Column) } // FOR (Column) } if (trace.val) LOG.trace(frag_cache.toString()); // Loop through all of our tables and make sure that there is an entry in the PlanFragment CacheEntrry // If there isn't, then that means there was no predicate on the table and therefore the PlanFragment // must be broadcast to all partitions (unless it is replicated) for (Table catalog_tbl : frag_tables) { if (!frag_cache.hasTable(catalog_tbl)) { if (trace.val) LOG.trace(String.format("No column predicate for %s. Marking as broadcast for %s: %s", catalog_tbl.fullName(), catalog_frag.fullName(), frag_cache.getTables())); frag_cache.markAsBroadcast(catalog_tbl); stmt_cache.markAsBroadcast(catalog_tbl); } } // FOR // Store the Fragment cache and update the Table xref mapping this.cache_fragmentEntries.put(frag_key, frag_cache); this.addTableCacheXref(frag_cache, frag_tables); } // FOR (fragment) // Then for updates we need to look to see whether they are updating an attribute // that they are partitioned on. If so, then it gets dicey because we need to // know the value... if (stmt_type == QueryType.UPDATE) { List<Table> tables = new ArrayList<Table>(); PredicatePairs update_cset = new PredicatePairs(); for (Table catalog_tbl : CatalogUtil.getReferencedTables(catalog_stmt)) { update_cset.clear(); tables.clear(); tables.add(catalog_tbl); AbstractPlanNode root_node = PlanNodeUtil.getRootPlanNodeForStatement(catalog_stmt, true); CatalogUtil.extractUpdatePredicates(catalog_stmt, catalogContext.database, update_cset, root_node, true, tables); boolean found = false; for (CatalogPair pair : update_cset) { Column catalog_col = null; CatalogType catalog_param = null; // For now we only care up look-ups using StmtParameters or ConstantValues if (pair.getFirst() instanceof StmtParameter || pair.getFirst() instanceof ConstantValue) { catalog_col = (Column) pair.getSecond(); catalog_param = pair.getFirst(); } else if (pair.getSecond() instanceof StmtParameter || pair.getSecond() instanceof ConstantValue) { catalog_col = (Column) pair.getFirst(); catalog_param = pair.getSecond(); } else { if (trace.val) LOG.trace(String.format("Skipping entry %s when examing the update information for %s", pair, catalog_tbl)); continue; } assert (catalog_col != null); assert (catalog_param != null); stmt_cache.put(catalog_col, catalog_param, pair.getComparisonExp(), catalog_tbl); found = true; } // FOR if (trace.val && found) LOG.trace("UpdatePlanNode in " + catalog_stmt.fullName() + " modifies " + catalog_tbl); } // FOR } // IF (UPDATE) } // FOR (single-partition vs multi-partition) // Add the Statement cache entry and update the Table xref map this.cache_statementEntries.put(stmt_key, stmt_cache); this.addTableCacheXref(stmt_cache, stmt_tables); } /** * Update the cache entry xref mapping for a set of tables * * @param entry * @param tables */ private void addTableCacheXref(CacheEntry entry, Collection<Table> tables) { for (Table catalog_tbl : tables) { String table_key = CatalogKey.createKey(catalog_tbl); if (!this.table_cache_xref.containsKey(table_key)) { this.table_cache_xref.put(table_key, new HashSet<CacheEntry>()); } this.table_cache_xref.get(table_key).add(entry); } // FOR } // ---------------------------------------------------------------------------- // TABLE ROW METHODS // ---------------------------------------------------------------------------- /** * Return the partition for the given VoltTableRow * * @param catalog_tbl * @param row * @return * @throws Exception */ public int getTableRowPartition(final Table catalog_tbl, final VoltTableRow row) throws Exception { assert (!catalog_tbl.getIsreplicated()) : "Trying to partition replicated table: " + catalog_tbl; if (debug.val) LOG.debug("Calculating partition for VoltTableRow from " + catalog_tbl); int partition = -1; Column catalog_col = this.cache_tablePartitionColumns.get(catalog_tbl); assert (catalog_col != null) : "Null partition column: " + catalog_tbl; assert ((catalog_col instanceof VerticalPartitionColumn) == false) : "Invalid partitioning column: " + catalog_col.fullName(); // Multi-Column Partitioning if (catalog_col instanceof MultiColumn) { MultiColumn mc = (MultiColumn) catalog_col; if (debug.val) LOG.debug(catalog_tbl.getName() + " MultiColumn: " + mc); Object values[] = new Object[mc.size()]; for (int i = 0; i < values.length; i++) { Column inner = mc.get(i); VoltType type = VoltType.get(inner.getType()); values[i] = row.get(inner.getIndex(), type); } // FOR partition = this.hasher.multiValueHash(values); // Single-Column Partitioning } else { VoltType type = VoltType.get(catalog_col.getType()); Object value = row.get(catalog_col.getIndex(), type); partition = this.hasher.hash(value, catalog_col); if (debug.val) LOG.debug(String.format("%s SingleColumn: Value=%s / Partition=%d", catalog_col.fullName(), value, partition)); } assert (partition >= 0) : "Invalid partition for " + catalog_tbl; return (partition); } // ---------------------------------------------------------------------------- // BASE PARTITION METHODS // ---------------------------------------------------------------------------- /** * Returns the target partition for a StoredProcedureInvocation instance * @param invocation * @return * @throws Exception */ public int getBasePartition(StoredProcedureInvocation invocation) throws Exception { Procedure catalog_proc = this.catalogContext.database.getProcedures().get(invocation.getProcName()); if (catalog_proc == null) { catalog_proc = this.catalogContext.database.getProcedures().getIgnoreCase(invocation.getProcName()); } assert(catalog_proc != null) : "Invalid procedure name '" + invocation.getProcName() + "'"; return (this.getBasePartition(catalog_proc, invocation.getParams().toArray(), false)); } /** * Returns the target partition for a stored procedure + parameters * @param catalog_proc * @param params * @return * @throws Exception */ public int getBasePartition(final Procedure catalog_proc, final Object params[]) throws Exception { return (this.getBasePartition(catalog_proc, params, false)); } /** * Return the target partition for a TransactionTrace * @param txn_trace * @return * @throws Exception */ public int getBasePartition(final TransactionTrace txn_trace) throws Exception { if (debug.val) LOG.debug("Calculating base partition for " + txn_trace.toString()); return (this.getBasePartition(txn_trace.getCatalogItem(this.catalogContext.database), txn_trace.getParams(), true)); } /** * Main method for calculating the base partition for a stored procedure * * @param catalog_proc * @param params * @param force * @return * @throws Exception */ public int getBasePartition(final Procedure catalog_proc, final Object params[], boolean force) throws Exception { assert(catalog_proc != null); assert(params != null); // assert(catalog_proc.getParameters().size() == params.length) : // String.format("Invalid number of ProcParameters for %s: %d != %d", // catalog_proc, catalog_proc.getParameters().size(), params.length); ProcParameter catalog_param = this.cache_procPartitionParameters.get(catalog_proc); if (catalog_param == null && force) { if (force) { int idx = catalog_proc.getPartitionparameter(); if (idx == NullProcParameter.PARAM_IDX || catalog_proc.getParameters().isEmpty()) { catalog_param = NullProcParameter.singleton(catalog_proc); } else if (idx == RandomProcParameter.PARAM_IDX) { catalog_param = RandomProcParameter.singleton(catalog_proc); } else { catalog_param = catalog_proc.getParameters().get(idx); } this.cache_procPartitionParameters.put(catalog_proc, catalog_param); if (debug.val) LOG.debug("Added cached " + catalog_param + " for " + catalog_proc); } else { if (debug.val) LOG.debug(catalog_proc + " has no parameters. No base partition for you!"); return (HStoreConstants.NULL_PARTITION_ID); } } if (force == false && (catalog_param == null || catalog_param instanceof NullProcParameter)) { if (debug.val) LOG.debug(catalog_proc + " does not have a pre-defined partition parameter. No base partition!"); return (HStoreConstants.NULL_PARTITION_ID); // } else if (!force && !catalog_proc.getSinglepartition()) { // if (debug.val) LOG.debug(catalog_proc + // " is not marked as single-partitioned. Executing as multi-partition"); // return (null); } int partition = HStoreConstants.NULL_PARTITION_ID; boolean is_array = catalog_param.getIsarray(); // Special Case: RandomProcParameter if (catalog_param instanceof RandomProcParameter) { partition = RandomProcParameter.rand.nextInt(this.num_partitions); } // Special Case: MultiProcParameter else if (catalog_param instanceof MultiProcParameter) { MultiProcParameter mpp = (MultiProcParameter) catalog_param; if (debug.val) LOG.debug(catalog_proc.getName() + " MultiProcParameter: " + mpp); int hashes[] = new int[mpp.size()]; for (int i = 0; i < hashes.length; i++) { int mpp_param_idx = mpp.get(i).getIndex(); assert (mpp_param_idx >= 0) : "Invalid Partitioning MultiProcParameter #" + mpp_param_idx; assert (mpp_param_idx < params.length) : CatalogUtil.getDisplayName(mpp) + " < " + params.length; int hash = this.calculatePartition(catalog_proc, params[mpp_param_idx], is_array); hashes[i] = (hash == HStoreConstants.NULL_PARTITION_ID ? 0 : hash); if (debug.val) LOG.debug(mpp.get(i) + " value[" + params[mpp_param_idx] + "] => hash[" + hashes[i] + "]"); } // FOR partition = this.hasher.multiValueHash(hashes); if (debug.val) LOG.debug(Arrays.toString(hashes) + " => " + partition); } // Single ProcParameter else { if (debug.val) LOG.debug("Calculating base partition using " + catalog_param.fullName() + ": " + params[catalog_param.getIndex()]); assert(catalog_param.getIndex() >= 0) : "Invalid parameter offset " + catalog_param.fullName(); partition = this.calculatePartition(catalog_proc, params[catalog_param.getIndex()], is_array); } return (partition); } // ---------------------------------------------------------------------------- // DETAILED PARTITON METHODS // ---------------------------------------------------------------------------- /** * Populate the given set with all of the partition ids that all of the QueryTraces * in this TransactionTrace will touch based on the current catalog. * Note that this estimate will also include the base partition where the txn's control * code will execute. * @param partitions * @param xact * @throws Exception */ public void getAllPartitions(final PartitionSet partitions, final TransactionTrace xact) throws Exception { Procedure catalog_proc = xact.getCatalogItem(this.catalogContext.database); int base_partition = this.getBasePartition(catalog_proc, xact.getParams(), true); partitions.add(base_partition); for (QueryTrace query : xact.getQueries()) { this.getAllPartitions(partitions, query.getCatalogItem(this.catalogContext.database), query.getParams(), base_partition); } // FOR } /** * Populate the given set with all of the partition ids that this QueryTrace * will touch based on the current catalog. * @param partitions * @param query * @param base_partition * @throws Exception */ public void getAllPartitions(final PartitionSet partitions, final QueryTrace query, final int base_partition) throws Exception { Statement catalog_stmt = query.getCatalogItem(this.catalogContext.database); this.getAllPartitions(partitions, catalog_stmt, query.getParams(), base_partition); } /** * Populate the given set with all of the partition ids that this Statement with the * given parameters will touch based on the current catalog. * @param all_partitions * @param catalog_stmt * @param params * @param base_partition * @throws Exception */ public void getAllPartitions(final PartitionSet all_partitions, final Statement catalog_stmt, final Object params[], final int base_partition) throws Exception { // Note that we will use the single-sited fragments (if available) since they will be // faster for us to figure out what partitions has the data that this statement needs CatalogMap<PlanFragment> fragments = (catalog_stmt.getHas_singlesited() ? catalog_stmt.getFragments() : catalog_stmt.getMs_fragments()); this.getAllFragmentPartitions(null, all_partitions, fragments.values(), params, base_partition); } // ---------------------------------------------------------------------------- // STATEMENT PARTITION METHODS // ---------------------------------------------------------------------------- /** * Return the table -> partitions mapping for the given QueryTrace object * * @param query * @param base_partition * @return * @throws Exception */ public Map<String, PartitionSet> getTablePartitions(final QueryTrace query, final int base_partition) throws Exception { Statement catalog_stmt = query.getCatalogItem(this.catalogContext.database); return (this.getTablePartitions(catalog_stmt, query.getParams(), base_partition)); } /** * Return all of the partitions per table for the given Statement object * * @param catalog_stmt * @param params * @param base_partition * @return * @throws Exception */ public Map<String, PartitionSet> getTablePartitions(final Statement catalog_stmt, final Object params[], final int base_partition) throws Exception { Map<String, PartitionSet> all_partitions = new HashMap<String, PartitionSet>(); CatalogMap<PlanFragment> fragments = (catalog_stmt.getHas_singlesited() ? catalog_stmt.getFragments() : catalog_stmt.getMs_fragments()); for (PlanFragment catalog_frag : fragments) { try { Map<String, PartitionSet> frag_partitions = new HashMap<String, PartitionSet>(); this.calculatePartitionsForFragment(frag_partitions, null, catalog_frag, params, base_partition); for (String table_key : frag_partitions.keySet()) { if (!all_partitions.containsKey(table_key)) { all_partitions.put(table_key, frag_partitions.get(table_key)); } else { all_partitions.get(table_key).addAll(frag_partitions.get(table_key)); } } // FOR } catch (Throwable ex) { throw new Exception("Failed to calculate table partitions for " + catalog_frag.fullName(), ex); } } // FOR return (all_partitions); } // ---------------------------------------------------------------------------- // FRAGMENT PARTITON METHODS // ---------------------------------------------------------------------------- /** * Return the set of StmtParameter offsets that can be used to figure out * what partitions the Statement invocation will touch. This is used to * quickly figure out whether that invocation is single-partition or not. If * this Statement will always be multi-partition, or if the tables it * references uses a MultiColumn partitioning attribute, then the return * set will be null. This is at a coarse-grained level. You still need to * use the other PartitionEstimator methods to figure out where to send * PlanFragments. * * @param catalog_stmt * @return */ public int[] getStatementEstimationParameters(final Statement catalog_stmt) { if (debug.val) LOG.debug("Retrieving estimation parameter offsets for " + catalog_stmt.fullName()); int[] all_param_idxs = this.cache_stmtPartitionParameters.get(catalog_stmt); if (all_param_idxs == null) { List<Integer> param_idxs = new ArrayList<Integer>(); // Assume single-partition if (catalog_stmt.getHas_singlesited() == false) { if (debug.val) LOG.warn("There is no single-partition query plan for " + catalog_stmt.fullName()); return (null); } for (PlanFragment catalog_frag : catalog_stmt.getFragments().values()) { PartitionEstimator.CacheEntry cache_entry = null; try { cache_entry = this.getFragmentCacheEntry(catalog_frag); } catch (Exception ex) { throw new RuntimeException("Failed to retrieve CacheEntry for " + catalog_frag.fullName()); } // If this PlanFragment has a broadcast, then this statment // can't be used for fast look-ups if (cache_entry.hasBroadcast()) { if (debug.val) LOG.warn(String.format("%s contains an operation that must be broadcast." + "Cannot be used for fast look-ups", catalog_frag.fullName())); return (null); } for (Table catalog_tbl : cache_entry.getTables()) { if (catalog_tbl.getMaterializer() != null) { catalog_tbl = catalog_tbl.getMaterializer(); } Column partition_col = catalog_tbl.getPartitioncolumn(); if (partition_col instanceof MultiColumn) { if (debug.val) LOG.warn(String.format("%s references %s, which is partitioned on %s. " + "Cannot be used for fast look-ups", catalog_frag.fullName(), catalog_tbl.getName(), partition_col.fullName())); return (null); } else if (partition_col != null && cache_entry.predicates.containsKey(partition_col)) { for (Pair<ExpressionType, CatalogType> pair : cache_entry.predicates.get(partition_col)) { if (pair.getFirst() == ExpressionType.COMPARE_EQUAL && pair.getSecond() instanceof StmtParameter) { param_idxs.add(((StmtParameter)pair.getSecond()).getIndex()); } } // FOR } } // FOR if (param_idxs.isEmpty() == false) all_param_idxs = CollectionUtil.toIntArray(param_idxs); } // FOR this.cache_stmtPartitionParameters.put(catalog_stmt, all_param_idxs); } return (all_param_idxs); } /** * @param frag_partitions * @param fragments * @param params * @param base_partition * @return * @throws Exception */ public Map<PlanFragment, PartitionSet> getAllFragmentPartitions(final Map<PlanFragment, PartitionSet> frag_partitions, final PlanFragment fragments[], final Object params[], final int base_partition) throws Exception { this.getAllFragmentPartitions(frag_partitions, null, fragments, params, base_partition); return (frag_partitions); } /** * Populate a mapping from PlanFragments to PartitionSets. * <B>NOTE:</B> This is the one to use at runtime in the BatchPlanner because it doesn't * allocate any new Collections! * * @param frag_partitions * @param frag_all_partitions * @param fragments * @param params * @param base_partition * @return * @throws Exception */ public void getAllFragmentPartitions(final Map<PlanFragment, PartitionSet> frag_partitions, final PartitionSet frag_all_partitions, final PlanFragment fragments[], final Object params[], final int base_partition) throws Exception { // Loop through this Statement's plan fragments and get the partitions for (PlanFragment catalog_frag : fragments) { PartitionSet partitions = null; // If we have a FragPartion map, then use an entry from that if (frag_partitions != null) { partitions = frag_partitions.get(catalog_frag); if (partitions == null) { partitions = new PartitionSet(); frag_partitions.put(catalog_frag, partitions); } else { partitions.clear(); } // Otherwise use our AllPartitions set } else { partitions = frag_all_partitions; } assert(partitions != null); this.calculatePartitionsForFragment(null, partitions, catalog_frag, params, base_partition); // If there were no partitions, then the PlanFragment needs to be // execute on the base partition // Because these are the PlanFragments that aggregate the results together // XXX: Not sure if this is right, but it's 5:30pm on a snowy night // so it's good enough for me... if (partitions.isEmpty()) partitions.add(base_partition); if (frag_partitions != null && frag_all_partitions != null) frag_all_partitions.addAll(partitions); } // FOR } /** * Return the list partitions that this fragment needs to be sent to based * on the parameters * * @param catalog_frag * @param params * @param base_partition * @return * @throws Exception */ public PartitionSet getPartitions(final PartitionSet partitions, final PlanFragment catalog_frag, final Object params[], final int base_partition) throws Exception { this.calculatePartitionsForFragment(null, partitions, catalog_frag, params, base_partition); return (partitions); } // ---------------------------------------------------------------------------- // INTERNAL CALCULATION METHODS // ---------------------------------------------------------------------------- /** * @param catalog_frag * @param params * @param base_partition * @return * @throws Exception */ private void calculatePartitionsForFragment(final Map<String, PartitionSet> entry_partitions, final PartitionSet all_partitions, final PlanFragment catalog_frag, final Object params[], final int base_partition) throws Exception { if (trace.val) LOG.trace("Estimating partitions for PlanFragment #" + catalog_frag.fullName()); PartitionEstimator.CacheEntry cache_entry = this.getFragmentCacheEntry(catalog_frag); this.calculatePartitionsForCache(cache_entry, params, base_partition, entry_partitions, all_partitions); if (debug.val) { if (entry_partitions != null) LOG.debug(String.format("%s Table Partitions: %s", catalog_frag.fullName(), entry_partitions)); if (all_partitions != null) LOG.debug(String.format("%s All Partitions: %s", catalog_frag.fullName(), all_partitions)); } return; } private PartitionEstimator.CacheEntry getFragmentCacheEntry(PlanFragment catalog_frag) throws Exception { String frag_key = CatalogKey.createKey(catalog_frag); // Check whether we have generate the cache entries for this Statement // The CacheEntry object just tells us what input parameter to use for // hashing to figure out where we need to go for each table. PartitionEstimator.CacheEntry cache_entry = this.cache_fragmentEntries.get(frag_key); if (cache_entry == null) { synchronized (this) { cache_entry = this.cache_fragmentEntries.get(frag_key); if (cache_entry == null) { Statement catalog_stmt = (Statement) catalog_frag.getParent(); this.generateCache(catalog_stmt); cache_entry = this.cache_fragmentEntries.get(frag_key); } } // SYNCHRONIZED } assert (cache_entry != null) : "Failed to retrieve CacheEntry for " + catalog_frag.fullName(); return (cache_entry); } /** * This is the most important method here! This is where we actually calculate what * partitions the given element is going to touch. Given a target CacheEntry, we'll * look at what each table it accesses and then use the parameter mapping offset to find * the values that correspond to the table's partitioning columns. * * This method can either update the PartitionSets for each individual table that * the target accesses or a global PartitionSet. Both of these parameters are optional. * * @param target * @param params * @param base_partition * @param entry_table_partitions * @param entry_all_partitions * @throws Exception */ private void calculatePartitionsForCache(final CacheEntry target, final Object params[], final int base_partition, final Map<String, PartitionSet> entry_table_partitions, final PartitionSet entry_all_partitions) throws Exception { // Hash the input parameters to determine what partitions we're headed to QueryType stmt_type = target.query_type; // Update cache if (target.is_array == null) { target.is_array = new boolean[params.length]; for (int i = 0; i < target.is_array.length; i++) { target.is_array[i] = ClassUtil.isArray(params[i]); } // FOR } final PartitionSet table_partitions = this.partitionSetPool.borrowObject(); assert(table_partitions != null); // Go through each table referenced in this CacheEntry and look-up the parameters that the // partitioning columns are referenced against to determine what partitions we need to go to // IMPORTANT: If there are no tables (meaning it's some PlanFragment that combines data output // from other PlanFragments), then won't return anything because it is up to whoever // to figure out where to send this PlanFragment (it may be at the coordinator) Table tables[] = target.getTables(); if (trace.val) { Map<String, Object> m = new LinkedHashMap<String, Object>(); m.put("CacheEntry", target.toString()); m.put("Tables", tables); m.put("Params", Arrays.toString(params)); m.put("Base Partition", base_partition); LOG.trace("Calculating partitions for " + target.query_type + "\n" + StringUtil.formatMaps(m)); } for (int table_idx = 0; table_idx < target.is_replicated.length; table_idx++) { final Table catalog_tbl = tables[table_idx]; // REPLICATED TABLE if (target.is_replicated[table_idx]) { switch (stmt_type) { // If this table is replicated and this query is a scan, // then we're in the clear and there's nothing else we need to do here // for the current table (but we still need to check the other guys). case SELECT: if (trace.val) LOG.trace("Cache entry " + target + " will execute on the local partition"); if (base_partition != HStoreConstants.NULL_PARTITION_ID) table_partitions.add(base_partition); break; // Conversely, if it's replicated but we're performing an update or // a delete, then we know it's not single-sited. The modification has // to be broadcast to all partitions. case INSERT: case UPDATE: case DELETE: if (trace.val) LOG.trace("Cache entry " + target + " must be broadcast to all partitions"); table_partitions.addAll(this.all_partitions); break; // BUSTED (like your mom) default: assert (false) : "Unexpected query type: " + stmt_type; } // SWITCH } // NON-REPLICATED TABLE else { // We need to calculate the partition value based on this table's partitioning column Column catalog_col = cache_tablePartitionColumns.get(catalog_tbl); if (trace.val) LOG.trace("Partitioning Column: " + (catalog_col != null ? catalog_col.fullName() : catalog_col)); // MULTI-COLUMN PARTITIONING // Strap on your seatbelts, we're going in!!! if (catalog_col instanceof MultiColumn) { // HACK: All multi-column look-ups on queries with an OR // must be broadcast if (target.isMarkedContainsOR()) { if (debug.val) LOG.warn("Trying to use multi-column partitioning [" + catalog_col.fullName() + "] on query that contains an 'OR': " + target); table_partitions.addAll(this.all_partitions); } else { MultiColumn mc = (MultiColumn) catalog_col; PartitionSet mc_partitions[] = this.mcPartitionSetPool.borrowObject(); if (trace.val) LOG.trace("Calculating columns for multi-partition colunmn: " + mc); boolean is_valid = true; for (int i = 0, mc_cnt = mc.size(); i < mc_cnt; i++) { Column mc_column = mc.get(i); // assert(cache_entry.get(mc_column_key) != null) : // "Null CacheEntry: " + mc_column_key; if (target.predicates.containsKey(mc_column)) { this.calculatePartitions(mc_partitions[i], params, target.is_array, target.predicates.get(mc_column), mc_column); } // Unless we have partition values for both keys, // then it has to be a broadcast if (mc_partitions[i].isEmpty()) { if (debug.val) LOG.warn(String.format("No partitions for %s from %s. " + "Cache entry %s must be broadcast to all partitions", mc_column.fullName(), mc.fullName(), target)); table_partitions.addAll(this.all_partitions); is_valid = false; break; } if (trace.val) LOG.trace(CatalogUtil.getDisplayName(mc_column) + ": " + mc_partitions[i]); } // FOR // Now if we're here, then we have partitions for both // of the columns and we're legit // We therefore just need to take the cross product of // the two sets and hash them together if (is_valid) { for (int part0 : mc_partitions[0]) { for (int part1 : mc_partitions[1]) { int partition = this.hasher.multiValueHash(part0, part1); table_partitions.add(partition); if (trace.val) LOG.trace(String.format("MultiColumn Partitions[%d, %d] => %d", part0, part1, partition)); } // FOR } // FOR } this.mcPartitionSetPool.returnObject(mc_partitions); } } // SINGLE COLUMN PARTITIONING else { List<Pair<ExpressionType, CatalogType>> predicates = target.predicates.get(catalog_col); if (trace.val) LOG.trace("Param Indexes: " + predicates); // Important: If there is no entry for this partitioning // column, then we have to broadcast this mofo if (predicates == null || predicates.isEmpty()) { if (debug.val) LOG.debug(String.format("No parameter mapping for %s. Fragment must be broadcast to all partitions", CatalogUtil.getDisplayName(catalog_col))); table_partitions.addAll(this.all_partitions); // If there is nothing special, just shove off and have // this method figure things out for us } else { if (trace.val) LOG.trace("Calculating partitions normally for " + target); this.calculatePartitions(table_partitions, params, target.is_array, predicates, catalog_col); } } } // ELSE assert (table_partitions.size() <= this.num_partitions); if (entry_table_partitions != null) { String table_key = CatalogKey.createKey(catalog_tbl); PartitionSet table_p = entry_table_partitions.get(table_key); if (table_p == null) { entry_table_partitions.put(table_key, new PartitionSet(table_partitions)); } else { table_p.clear(); table_p.addAll(table_partitions); } } if (entry_all_partitions != null) { entry_all_partitions.addAll(table_partitions); } // OPTIMIZATION: If we aren't calculating the individual partitions for each table // separately (i.e., we are calculating the "global" partitions needed for the cache entry), // then we can check whether we are already touching all partitions. If so, then that means // there are no more partitions to add to the set and therefore we can stop here. if (entry_table_partitions == null && entry_all_partitions.size() == this.num_partitions) break; } // FOR this.partitionSetPool.returnObject(table_partitions); return; } /** * Calculate the partitions touched for the given column * * @param partitions * @param params * @param predicates * @param catalog_col */ private void calculatePartitions(final PartitionSet partitions, final Object params[], final boolean is_array[], final List<Pair<ExpressionType, CatalogType>> predicates, final Column catalog_col) throws Exception { // Note that we have to go through all of the mappings from the partitioning column // to parameters. This can occur when the partitioning column is referenced multiple times // This allows us to handle complex WHERE clauses and what not. for (Pair<ExpressionType, CatalogType> pair : predicates) { ExpressionType expType = pair.getFirst(); CatalogType param = pair.getSecond(); // HACK HACK HACK // If this is not an equality comparison, then it has to go to all partitions. // If we ever want to support smarter range partitioning, then // we will need to move the logic that examines the expression type into // the hasher code. if (expType != ExpressionType.COMPARE_EQUAL) { partitions.addAll(this.all_partitions); break; } // STATEMENT PARAMETER // This is the common case if (param instanceof StmtParameter) { int param_idx = ((StmtParameter)param).getIndex(); // IMPORTANT: Check if the parameter is an array. If it is, then we // have to loop through and get the hash of all of the values if (is_array[param_idx]) { int num_elements = Array.getLength(params[param_idx]); if (trace.val) LOG.trace(String.format("%s is an array. Calculating multiple partitions", param)); for (int i = 0; i < num_elements; i++) { Object value = Array.get(params[param_idx], i); int partition_id = this.hasher.hash(value, catalog_col); if (trace.val) LOG.trace(String.format("%s HASHING PARAM ARRAY[%d][%d]: %s -> %d", catalog_col.fullName(), param_idx, i, value, partition_id)); partitions.add(partition_id); } // FOR } // Primitive Value else { int partition_id = this.hasher.hash(params[param_idx], catalog_col); if (trace.val) LOG.trace(String.format("%s HASHING PARAM [%d]: %s -> %d", catalog_col.fullName(), param_idx, params[param_idx], partition_id)); partitions.add(partition_id); } } // CONSTANT VALUE // This is more rare else if (param instanceof ConstantValue) { ConstantValue const_param = (ConstantValue)param; VoltType vtype = VoltType.get(const_param.getType()); Object const_value = VoltTypeUtil.getObjectFromString(vtype, const_param.getValue()); int partition_id = this.hasher.hash(const_value); partitions.add(partition_id); } // BUSTED! else { throw new RuntimeException("Unexpected parameter type: " + param.fullName()); } } // FOR return; } /** * Return the partition touched for a given procedure's parameter value. * If the given parameter is an array, then we will just use the first element. * @param catalog_proc * @param partition_param_val * @param is_array Whether the value is an array. * @return * @throws Exception */ private int calculatePartition(final Procedure catalog_proc, Object param_val, final boolean is_array) throws Exception { // If the parameter is an array, then just use the first value if (is_array) { int num_elements = Array.getLength(param_val); if (num_elements == 0) { if (debug.val) LOG.warn("Empty partitioning parameter array for " + catalog_proc); return (HStoreConstants.NULL_PARTITION_ID); } else { param_val = Array.get(param_val, 0); } } else if (param_val == null) { if (debug.val) LOG.warn("Null ProcParameter value: " + catalog_proc); return (HStoreConstants.NULL_PARTITION_ID); } return (this.hasher.hash(param_val, catalog_proc)); } // ---------------------------------------------------------------------------- // UTILITY METHODS // ---------------------------------------------------------------------------- /** * Debug output */ @Override public String toString() { String ret = ""; for (Procedure catalog_proc : this.catalogContext.database.getProcedures()) { StringBuilder sb = new StringBuilder(); boolean has_entries = false; sb.append(CatalogUtil.getDisplayName(catalog_proc)).append(":\n"); for (Statement catalog_stmt : catalog_proc.getStatements()) { String stmt_key = CatalogKey.createKey(catalog_stmt); CacheEntry stmt_cache = this.cache_statementEntries.get(stmt_key); if (stmt_cache == null) continue; has_entries = true; sb.append(" " + catalog_stmt.getName() + ": ").append(stmt_cache).append("\n"); for (PlanFragment catalog_frag : CatalogUtil.getAllPlanFragments(catalog_stmt)) { String frag_key = CatalogKey.createKey(catalog_frag); CacheEntry frag_cache = this.cache_fragmentEntries.get(frag_key); if (frag_cache == null) continue; sb.append(" PlanFragment[" + catalog_frag.getName() + "]: ").append(frag_cache).append("\n"); } } // FOR if (has_entries) ret += sb.toString() + StringUtil.SINGLE_LINE; } // FOR return (ret); } /** * For each Column key in the given map, recursively populate their sets to contain * the Cartesian product of all the other Columns' sets. * @param column_joins */ protected static void populateColumnJoinSets(final Map<Column, Set<Column>> column_joins) { int orig_size = 0; for (Collection<Column> cols : column_joins.values()) { orig_size += cols.size(); } // First we have to take the Cartesian product of all mapped joins for (Column c0 : column_joins.keySet()) { // For each column that c0 is joined with, add a reference to c0 for // all the columns that the other column references for (Column c1 : column_joins.get(c0)) { assert (!c1.equals(c0)); for (Column c2 : column_joins.get(c1)) { if (!c0.equals(c2)) column_joins.get(c2).add(c0); } // FOR } // FOR } // FOR int new_size = 0; for (Collection<Column> cols : column_joins.values()) { new_size += cols.size(); } if (new_size != orig_size) populateColumnJoinSets(column_joins); } /** * Pre-load the cache entries for all Statements * * @param catalog_db */ public void preload() { assert (this.catalogContext != null); for (Procedure catalog_proc : this.catalogContext.database.getProcedures()) { for (Statement catalog_stmt : catalog_proc.getStatements()) { try { this.generateCache(catalog_stmt); this.getStatementEstimationParameters(catalog_stmt); } catch (Exception ex) { LOG.fatal("Failed to generate cache for " + catalog_stmt.fullName(), ex); System.exit(1); } } // FOR } // FOR for (CacheEntry entry : this.cache_fragmentEntries.values()) { entry.getTables(); } for (CacheEntry entry : this.cache_statementEntries.values()) { entry.getTables(); } } }