package edu.brown.statistics;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.collections15.map.ListOrderedMap;
import org.apache.log4j.Logger;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Table;
import org.voltdb.types.ExpressionType;
import edu.brown.designer.DependencyGraph;
import edu.brown.designer.DesignerEdge;
import edu.brown.designer.DesignerVertex;
import edu.brown.designer.MemoryEstimator;
import edu.brown.graphs.VertexTreeWalker;
import edu.brown.graphs.VertexTreeWalker.TraverseOrder;
import edu.brown.utils.ArgumentsParser;
import edu.brown.utils.ClassUtil;
import edu.brown.utils.ProjectType;
import edu.brown.utils.StringUtil;
import edu.uci.ics.jung.graph.util.EdgeType;
/**
* @author pavlo
*/
public abstract class AbstractTableStatisticsGenerator {
protected static final Logger LOG = Logger.getLogger(AbstractTableStatisticsGenerator.class);
protected final Database catalog_db;
protected final ProjectType project_type;
protected final double scale_factor;
private final Map<Table, TableProfile> table_profiles = new ListOrderedMap<Table, TableProfile>();
/**
* DependencyOperation
*/
private static class DependencyOperation {
private final Table catalog_tbl;
private final ExpressionType type;
private final double scale_factor;
public DependencyOperation(Table catalog_tbl, ExpressionType type, double scale_factor) {
this.catalog_tbl = catalog_tbl;
this.type = type;
this.scale_factor = scale_factor;
}
@Override
public String toString() {
return this.type + " (" + this.catalog_tbl.getName() + " * " + this.scale_factor + ")";
}
} // END CLASS
/**
* TableProfile
*/
protected static class TableProfile {
private Table catalog_tbl = null;
private Long tuple_count = null;
private boolean is_fixed = false;
private final List<DependencyOperation> dependencies = new ArrayList<DependencyOperation>();
/**
* Default Constructor
*
* @param catalog_tbl
* @param tuple_size
*/
public TableProfile(Table catalog_tbl, boolean is_fixed, Long tuple_count) {
this.catalog_tbl = catalog_tbl;
this.is_fixed = is_fixed;
this.tuple_count = tuple_count;
}
/**
* Convenience Constructor
*
* @param catalog_db
* @param table_name
*/
public TableProfile(Database catalog_db, String table_name, boolean is_fixed) {
this(catalog_db.getTables().get(table_name), is_fixed, 1l);
}
/**
* Convenience Constructor
*
* @param catalog_db
* @param table_name
* @param size
*/
public TableProfile(Database catalog_db, String table_name, boolean is_fixed, long tuple_count) {
this(catalog_db.getTables().get(table_name), is_fixed, tuple_count);
}
/**
* Adds a dependency between this table and another table where
* TABLE_COUNT = TABLE_COUNT + (PARENT_TABLE_COUNT * SCALE_FACTOR)
*
* @param catalog_db
* @param parent_table_name
* @param scale_factor
*/
public void addAdditionDependency(Database catalog_db, String parent_table_name, double scale_factor) {
Table parent_tbl = catalog_db.getTables().get(parent_table_name);
assert (!this.catalog_tbl.equals(parent_tbl)) : "Trying to make table " + this.catalog_tbl + " depend on itself";
this.dependencies.add(new DependencyOperation(parent_tbl, ExpressionType.OPERATOR_PLUS, scale_factor));
}
/**
* Adds a dependency between this table and another table where
* TABLE_COUNT = TABLE_COUNT * (PARENT_TABLE_COUNT * SCALE_FACTOR)
*
* @param catalog_db
* @param parent_table_name
* @param scale_factor
*/
public void addMultiplicativeDependency(Database catalog_db, String parent_table_name, double scale_factor) {
Table parent_tbl = catalog_db.getTables().get(parent_table_name);
assert (!this.catalog_tbl.equals(parent_tbl)) : "Trying to make table " + this.catalog_tbl + " depend on itself";
this.dependencies.add(new DependencyOperation(parent_tbl, ExpressionType.OPERATOR_MULTIPLY, scale_factor));
}
public boolean hasDependencies() {
return (!this.dependencies.isEmpty());
}
/**
* Return the set of tables this TableProfile is dependent on
*
* @return
*/
public Set<Table> getDependentTables() {
Set<Table> tables = new HashSet<Table>();
for (DependencyOperation d : this.dependencies) {
tables.add(d.catalog_tbl);
} // FOR
return (Collections.unmodifiableSet(tables));
}
}
/**
* Constructor
*
* @param catalog_db
* @param project_type
* @param scale_factor
*/
public AbstractTableStatisticsGenerator(Database catalog_db, ProjectType project_type, double scale_factor) {
this.catalog_db = catalog_db;
this.project_type = project_type;
this.scale_factor = scale_factor;
assert (this.scale_factor > 0);
this.createProfiles();
}
/**
* All child clases must implement this method that will populate the
* generator with TableProfiles
*/
public abstract void createProfiles();
/**
* @param profile
*/
public void addTableProfile(TableProfile profile) {
Table catalog_tbl = profile.catalog_tbl;
assert (!this.table_profiles.containsKey(catalog_tbl)) : "Duplicate TableProfile for " + catalog_tbl;
LOG.debug("Adding table profile for " + catalog_tbl);
this.table_profiles.put(catalog_tbl, profile);
}
/**
* Generate a DependencyGraph using the TableProfile records A table will
* have an edge coming into it from another table if the number of tuples
* for it is dependent on the number of tuples of the other table
*
* @return
*/
private DependencyGraph generateDependencyGraph() {
DependencyGraph dgraph = new DependencyGraph(this.catalog_db);
for (Table catalog_tbl : this.table_profiles.keySet()) {
dgraph.addVertex(new DesignerVertex(catalog_tbl));
} // FOR
for (Entry<Table, TableProfile> e : this.table_profiles.entrySet()) {
Table catalog_tbl = e.getKey();
TableProfile profile = e.getValue();
DesignerVertex v = dgraph.getVertex(catalog_tbl);
for (Table other_tbl : profile.getDependentTables()) {
boolean ret = dgraph.addEdge(new DesignerEdge(dgraph), dgraph.getVertex(other_tbl), v, EdgeType.DIRECTED);
assert (ret) : "Failed to add edge from " + other_tbl + " to " + catalog_tbl;
} // FOR
} // FOR
return (dgraph);
}
/**
* @return
* @throws Exception
*/
public Map<Table, TableStatistics> generate() throws Exception {
LOG.info("Generating TableStatistics for " + this.table_profiles.size() + " tables with scale factor " + this.scale_factor);
final String f = "%-30s %-15d [%.2fGB]"; // TableName -> TupleCount
// TableSize
final double gb = 1073741824d;
// First we need to generate a DependencyGraph
final DependencyGraph dgraph = this.generateDependencyGraph();
assert (dgraph.getVertexCount() == this.table_profiles.size());
// GraphVisualizationPanel.createFrame(dgraph).setVisible(true);
// Now loop through and generate our TableStatistics
final Map<Table, TableStatistics> stats = new HashMap<Table, TableStatistics>();
// First generate all the TableStatistics for tables without any
// dependencies
for (Entry<Table, TableProfile> e : this.table_profiles.entrySet()) {
Table catalog_tbl = e.getKey();
TableProfile profile = e.getValue();
if (profile.hasDependencies())
continue;
LOG.debug("Generating FIXED TableStatistics for " + e.getKey());
// There's not much we can do here other than this...
// If the table is not fixed, then modify the number of tuples by
// the scale factor
TableStatistics ts = new TableStatistics(catalog_tbl);
ts.tuple_count_total = Math.round(profile.tuple_count / (profile.is_fixed ? 1.0 : this.scale_factor));
ts.tuple_size_max = ts.tuple_size_min = ts.tuple_size_avg = MemoryEstimator.estimateTupleSize(catalog_tbl);
ts.tuple_size_total = ts.tuple_size_avg * ts.tuple_count_total;
stats.put(catalog_tbl, ts);
LOG.info(String.format(f, catalog_tbl.getName(), ts.tuple_count_total, ts.tuple_size_total / gb));
} // FOR
// Now traverse the DependencyGraph and generate the rest of the tables
for (DesignerVertex root : dgraph.getRoots()) {
new VertexTreeWalker<DesignerVertex, DesignerEdge>(dgraph, TraverseOrder.LONGEST_PATH) {
protected boolean hasVisited(DesignerVertex element) {
return (super.hasVisited(element) || stats.containsKey(element.getCatalogItem()));
};
protected void callback(DesignerVertex element) {
if (stats.containsKey(element.getCatalogItem()))
return;
Table catalog_tbl = element.getCatalogItem();
TableProfile profile = table_profiles.get(catalog_tbl);
TableStatistics ts = new TableStatistics(catalog_tbl);
ts.tuple_count_total = profile.tuple_count;
// Dependencies
if (profile.hasDependencies()) {
LOG.debug("Calculating tuple count for " + catalog_tbl.getName() + " using " + profile.dependencies.size() + " dependencies");
for (DependencyOperation d : profile.dependencies) {
LOG.debug(catalog_tbl.getName() + " => " + ts.tuple_count_total + " " + d);
TableStatistics parent_ts = stats.get(d.catalog_tbl);
assert (parent_ts != null) : "Missing parent stats '" + d.catalog_tbl + "' for '" + catalog_tbl + "'";
long parent_tuples = Math.round(parent_ts.tuple_count_total * d.scale_factor);
switch (d.type) {
case OPERATOR_MULTIPLY:
ts.tuple_count_total *= parent_tuples;
break;
case OPERATOR_PLUS:
ts.tuple_count_total += parent_tuples;
break;
default:
assert (false) : "Unexpected DependencyOperation type " + d.type;
} // SWITCH
} // FOR
}
// Final calculations
ts.tuple_count_total = Math.round(ts.tuple_count_total / (profile.is_fixed ? 1.0 : scale_factor));
ts.tuple_size_max = ts.tuple_size_min = ts.tuple_size_avg = MemoryEstimator.estimateTupleSize(catalog_tbl);
ts.tuple_size_total = ts.tuple_size_avg * ts.tuple_count_total;
stats.put(catalog_tbl, ts);
LOG.info(String.format(f, catalog_tbl.getName(), ts.tuple_count_total, ts.tuple_size_total / gb));
};
}.traverse(root);
} // FOR
// Validate
long total_tuples = 0;
long total_size = 0;
for (Table catalog_tbl : this.table_profiles.keySet()) {
TableStatistics ts = stats.get(catalog_tbl);
assert (ts != null) : "Failed to create TableStatistics for " + catalog_tbl;
total_tuples += ts.tuple_count_total;
total_size += ts.tuple_size_total;
} // FOR
LOG.info(StringUtil.repeat("-", 60));
LOG.info(String.format(f, "TOTAL SIZE", total_tuples, total_size / gb));
return (stats);
}
/**
* Generate table stats and apply them to
*
* @param stats
* @throws Exception
*/
public void apply(WorkloadStatistics stats) throws Exception {
Map<Table, TableStatistics> table_stats = this.generate();
assert (table_stats != null);
stats.apply(table_stats);
}
/**
* Create a new instance of a TableStatisticsGenerator for the given
* ProjectType
*
* @param catalog_db
* @param ptype
* @param scale_factor
* @return
*/
public static AbstractTableStatisticsGenerator factory(Database catalog_db, ProjectType ptype, double scale_factor) {
String generator_className = String.format("%s.%sTableStatisticsGenerator", ptype.getPackageName(), ptype.getBenchmarkPrefix());
AbstractTableStatisticsGenerator generator = (AbstractTableStatisticsGenerator) ClassUtil.newInstance(generator_className, new Object[] { catalog_db, scale_factor }, new Class<?>[] {
Database.class, double.class });
assert (generator != null);
return (generator);
}
public static void main(String[] vargs) throws Exception {
ArgumentsParser args = ArgumentsParser.load(vargs);
args.require(ArgumentsParser.PARAM_CATALOG_TYPE, ArgumentsParser.PARAM_STATS_SCALE_FACTOR, ArgumentsParser.PARAM_STATS_OUTPUT);
double scale_factor = args.getDoubleParam(ArgumentsParser.PARAM_STATS_SCALE_FACTOR);
File output = args.getFileParam(ArgumentsParser.PARAM_STATS_OUTPUT);
AbstractTableStatisticsGenerator generator = factory(args.catalog_db, args.catalog_type, scale_factor);
Map<Table, TableStatistics> table_stats = generator.generate();
assert (table_stats != null);
WorkloadStatistics stats = new WorkloadStatistics(args.catalog_db);
stats.apply(table_stats);
stats.save(output);
}
}