package com.tesora.dve.tools.aitemplatebuilder; /* * #%L * Tesora Inc. * Database Virtualization Engine * %% * Copyright (C) 2011 - 2014 Tesora Inc. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * #L% */ import java.io.PrintStream; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import org.apache.commons.lang.BooleanUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import com.tesora.dve.common.MathUtils; import com.tesora.dve.common.MultiMap; import com.tesora.dve.exceptions.PEException; import com.tesora.dve.sql.schema.Column; import com.tesora.dve.sql.schema.PEColumn; import com.tesora.dve.sql.schema.QualifiedName; import com.tesora.dve.sql.template.TemplateBuilder; import com.tesora.dve.sql.template.jaxb.ModelType; import com.tesora.dve.sql.template.jaxb.TableTemplateType; import com.tesora.dve.sql.template.jaxb.Template; import com.tesora.dve.sql.util.Pair; import com.tesora.dve.tools.CLIBuilder; import com.tesora.dve.tools.CLIBuilder.ColorStringBuilder; import com.tesora.dve.tools.CLIBuilder.ConsoleColor; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.JoinStats; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.Relationship; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.Relationship.RelationshipSpecification; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.StatementType; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.TableSizeComparator; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.TableStats; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.TableStats.ForeignRelationship; import com.tesora.dve.tools.aitemplatebuilder.CorpusStats.TableStats.TableColumn; import com.tesora.dve.tools.aitemplatebuilder.FuzzyLinguisticVariable.FlvName; import com.tesora.dve.tools.aitemplatebuilder.FuzzyTableDistributionModel.Variables; public final class AiTemplateBuilder { private static final Set<TemplateModelItem> AVAILABLE_MODELS = ImmutableSet.<TemplateModelItem> of( Broadcast.SINGLETON_TEMPLATE_ITEM, Random.SINGLETON_TEMPLATE_ITEM, Range.SINGLETON_TEMPLATE_ITEM); public static TemplateModelItem getModelForName(final String name) throws PEException { for (final TemplateModelItem model : AVAILABLE_MODELS) { if (model.getTemplateItemName().equalsIgnoreCase(name)) { return model; } } throw new PEException("Invalid distribution model '" + String.valueOf(name) + "' specified"); } public static enum MessageSeverity { INFO(ConsoleColor.DEFAULT, Level.INFO), ALERT(ConsoleColor.BLUE, Level.INFO), WARNING(ConsoleColor.YELLOW, Level.WARN), SEVERE(ConsoleColor.RED, Level.ERROR); private final ConsoleColor textColor; private final Level log4jLevel; private MessageSeverity(final ConsoleColor textColor, final Level log4jLevel) { this.textColor = textColor; this.log4jLevel = log4jLevel; } public ConsoleColor getColor() { return this.textColor; } public Level getLogLevel() { return this.log4jLevel; } } public static enum ScoreBonus { AI_RANGE_COLUMN("Range backed by an AI column", 0.15f, true), INTEGRAL_RANGE_COLUMN("Range backed by an integral column", 0.05f, true), UPDATED_RANGE_COLUMN("Update on the distribution vector", -0.10f, false), UNSAFE_RANGE_COLUMN("Column not safe for ranging on", -1.0f, true); private final String description; private final float bonus; private final boolean perRangeOnly; private ScoreBonus(final String description, final float bonus, final boolean perRangeOnly) { this.description = description; this.bonus = bonus; this.perRangeOnly = perRangeOnly; } public String print(final TableColumn column) { return this.description.concat(": ").concat(column.getQualifiedName()); } public float getBonusFactor() { return this.bonus; } /** * Some bonuses are per whole range only. * * The per-range bonuses generally get applied on the range score only * once (e.g. there can be several AI columns involved in a single * range, but the bonus gets applied only once). * * Others apply to individual relationships/joins within a range. */ public boolean isPerRangeOnly() { return this.perRangeOnly; } /** * A negative bonus is a penalty. */ public boolean isNegative() { return this.bonus < 0.0; } @Override public String toString() { return this.description; } } private static enum RedistCause { ORDER_BY("Sort on a non-broadcast table") { @Override protected String causingObjectToString(final Object object) { return "[F:" + String.valueOf(object) + "x]"; } }, UNIQUE_UPDATE("Update on a unique key") { @SuppressWarnings("unchecked") @Override protected String causingObjectToString(final Object object) { final Map.Entry<TableColumn, Long> entry = (Map.Entry<TableColumn, Long>) object; return "[" + String.valueOf(entry.getKey()) + ": U:" + String.valueOf(entry.getValue()) + "x]"; } }, DV_UPDATE("Update on the distribution vector") { @SuppressWarnings("unchecked") @Override protected String causingObjectToString(final Object object) { final Map.Entry<TableColumn, Long> entry = (Map.Entry<TableColumn, Long>) object; return "[" + String.valueOf(entry.getKey()) + ": U:" + String.valueOf(entry.getValue()) + "x]"; } }, NON_COLLOCATED_JOIN("Non-collocated join") { @Override protected String causingObjectToString(final Object object) { return String.valueOf(object); } }; protected final String description; private RedistCause(final String description) { this.description = description; } public String print(final Object object) { return this.description.concat(": ").concat(causingObjectToString(object)); } protected abstract String causingObjectToString(final Object cause); @Override public String toString() { return this.description; } } public static final String LINE_SEPARATOR = System.getProperty("line.separator"); public static final String LINE_INDENT = "\t"; public static final int NUMBER_DISPLAY_PRECISION = 2; private static final Logger logger = Logger.getLogger(AiTemplateBuilder.class); private static final double LOWER_CORPUS_COVERAGE_THRESHOLD_PC = 40.0; private static final double UPPER_CORPUS_COVERAGE_THRESHOLD_PC = 60.0; private static final double MOSTLY_WRITTEN_THRESHOLD_SCORE = 60.0; private static int TABLE_NAME_MIN_PREFIX_LENGTH = 4; private static String TABLE_NAME_WILDCARD = ".*"; private static final class Ranges { private static final class RangeSizeSorter implements Comparator<CommonRange> { @Override public int compare(CommonRange a, CommonRange b) { return a.getSize() - b.getSize(); } } private static final RangeSizeSorter RANGE_SIZE_SORTER = new RangeSizeSorter(); private final List<CommonRange> ranges = new ArrayList<CommonRange>(); private final boolean isSafeMode; private final boolean isRowWidthWeightingEnabled; final List<Long> frequencies = new ArrayList<Long>(); final List<Long> cardinalities = new ArrayList<Long>(); public Ranges(final boolean isSafeMode, final boolean isRowWidthWeightingEnabled) { this.isSafeMode = isSafeMode; this.isRowWidthWeightingEnabled = isRowWidthWeightingEnabled; } public void add(final CommonRange range) { this.ranges.add(range); } public boolean isEmpty() { return this.frequencies.isEmpty(); } public List<Long> getPredictedFutureJoinCardinalities() { return Collections.unmodifiableList(this.cardinalities); } public List<Long> getJoinFrequencies() { return Collections.unmodifiableList(this.frequencies); } public void generateAllPossibleCombinations() { flattenJoins(); diversify(); } /** * OUTER JOINs can be merged with compatible INNER JOINs provided that * they are not joins to Broadcast that trigger redistribution - those * have to be handled separately. */ private void flattenJoins() { for (final CommonRange range : this.ranges) { range.flattenJoins(); } } /** * For all overlapping ranges add their versions without the overlaps. */ private void diversify() { final List<CommonRange> diversifiedRanges = new ArrayList<CommonRange>(); for (final CommonRange range : this.ranges) { for (final CommonRange overlappingRange : getOverlapping(this.ranges, range)) { if (overlappingRange != range) { if (range.getSize() > 1) { final CommonRange copy = new CommonRange(range); for (final TableStats table : overlappingRange.getTables()) { copy.remove(table); } if (copy.getSize() > 0) { diversifiedRanges.add(copy); } } } } } this.ranges.addAll(diversifiedRanges); } private void addJoin(final Relationship join) throws PEException { if (join.isRangeCompatible()) { this.frequencies.add(join.getFrequency()); this.cardinalities.add(join.getPredictedFutureSize(this.isRowWidthWeightingEnabled)); final List<CommonRange> affectedRanges = addJoinToSuitableRanges(join); if (affectedRanges.isEmpty()) { final CommonRange range = new CommonRange(join, this.isSafeMode); /* * Add suitable joins from other ranges. */ for (final CommonRange other : this.ranges) { other.mergeInto(range); } this.ranges.add(range); } else { /* * Two-way merge all affected ranges with their overlaps. */ final Set<CommonRange> uniqueMergedRanges = new HashSet<CommonRange>(); for (final CommonRange range : affectedRanges) { uniqueMergedRanges.addAll(doTwoWayMergeOnOverlappingRanges(range)); } /* * Check for and remove completely overlapping ranges. */ this.ranges.removeAll(findAllSubranges(new ArrayList<CommonRange>(uniqueMergedRanges))); } } } private List<CommonRange> doTwoWayMergeOnOverlappingRanges(final CommonRange range) throws PEException { final List<CommonRange> overlappingRanges = getOverlapping(this.ranges, range); for (final CommonRange overlappingRange : overlappingRanges) { if (overlappingRange != range) { range.mergeInto(overlappingRange); overlappingRange.mergeInto(range); } } return overlappingRanges; } private Set<CommonRange> findAllSubranges(final List<CommonRange> ranges) { final int numRanges = ranges.size(); final Set<CommonRange> subranges = new HashSet<CommonRange>(numRanges); for (int i = 0; i < (numRanges - 1); ++i) { /* Keep the merged ranges sorted by size. */ Collections.sort(ranges, RANGE_SIZE_SORTER); final CommonRange range = ranges.get(i); for (int j = i + 1; j < numRanges; ++j) { final CommonRange next = ranges.get(j); if (range.isSubrangeOf(next)) { subranges.add(range); break; } } } return subranges; } private List<CommonRange> addJoinToSuitableRanges(final Relationship join) throws PEException { final List<CommonRange> affectedRanges = new ArrayList<CommonRange>(); for (final CommonRange range : this.ranges) { if (range.isSuitableFor(join)) { range.addJoin(join); affectedRanges.add(range); } } return affectedRanges; } private void evaluate(final Set<Long> uniqueJoinFrequency, final SortedSet<Long> sortedJoinCardinalities) { for (final CommonRange range : this.ranges) { range.evaluate(uniqueJoinFrequency, sortedJoinCardinalities, this.isRowWidthWeightingEnabled); } } private List<CommonRange> getSortedCommonRanges() { final List<CommonRange> sortedRanges = new ArrayList<CommonRange>(this.ranges); Collections.sort(sortedRanges, FuzzyLinguisticVariable.getScoreComparator()); return sortedRanges; } private static List<CommonRange> getOverlapping(final List<CommonRange> ranges, final CommonRange range) { final List<CommonRange> overlappingRanges = new ArrayList<CommonRange>(); for (final CommonRange item : ranges) { if (item.hasIntersectionWith(range)) { overlappingRanges.add(item); } } return overlappingRanges; } } public static List<Template> buildAllBroadcastTemplates(final List<String> databases) { return buildSingleModelTemplates(databases, Broadcast.SINGLETON_TEMPLATE_ITEM); } public static List<Template> buildAllRandomTemplates(final List<String> databases) { return buildSingleModelTemplates(databases, Random.SINGLETON_TEMPLATE_ITEM); } private static List<Template> buildSingleModelTemplates(final List<String> databases, final TemplateItem distributionModel) { final List<Template> templates = new ArrayList<Template>(); for (final String database : databases) { templates.add(buildSingleModelTemplate(database, distributionModel)); } return templates; } private static Template buildSingleModelTemplate(final String databaseName, final TemplateItem distributionModel) { final TemplateBuilder builder = new TemplateBuilder(databaseName); builder.withTable(TABLE_NAME_WILDCARD, distributionModel.getTemplateItemName()); return builder.toTemplate(); } private static String getWelcomeMessage(final List<String> databases, final Long broadcastCardinalityCutoff, final boolean followForeignKeys, final boolean isSafeMode) { final StringBuilder welcomeMessage = new StringBuilder(); welcomeMessage.append("Generating templates for '" + StringUtils.join(databases, "', '") + "'."); welcomeMessage.append(" Broadcast cardinality cutoff: ").append((broadcastCardinalityCutoff != null) ? broadcastCardinalityCutoff : "automatic"); welcomeMessage.append(" Following FKs: ").append(followForeignKeys); welcomeMessage.append(" Safe mode: ").append(isSafeMode); welcomeMessage.append("..."); return welcomeMessage.toString(); } private static String getTableNameAndDistributionModel(final TableStats table) { final StringBuilder value = new StringBuilder(); value.append(table).append(": ").append(table.getTableDistributionModel()); if (table.hasDistributionModelFreezed()) { value.append(" (").append("user defined").append(")"); } return value.toString(); } /** * Get bonus % for a given column. * Bonus AI columns which are generally safe and ideal for the Range * distribution. * Also slightly bonus integral types which are often good candidates for * Ranging. * * In the "Safe Mode" only AI columns get non-zero score. */ public static Set<ScoreBonus> getBonusesForColumn(final TableColumn column, final boolean isSafeMode) { if (isSafeMode) { if (!isAutoIncrement(column)) { return Collections.singleton(ScoreBonus.UNSAFE_RANGE_COLUMN); } } else { final Set<ScoreBonus> bonuses = new LinkedHashSet<ScoreBonus>(); if (isAutoIncrement(column)) { bonuses.add(ScoreBonus.AI_RANGE_COLUMN); } else if (column.getType().isIntegralType()) { bonuses.add(ScoreBonus.INTEGRAL_RANGE_COLUMN); } if (column.getUpdateCount() > 0) { bonuses.add(ScoreBonus.UPDATED_RANGE_COLUMN); } return bonuses; } return Collections.EMPTY_SET; } public static boolean isAutoIncrement(final TableColumn column) { final Column<?> columnInstance = column.getColumnInstance(); return ((columnInstance instanceof PEColumn) && ((PEColumn) columnInstance).isAutoIncrement()); } public static boolean hasAutoIncrement(final Set<TableColumn> columns) { for (final TableColumn column : columns) { if (isAutoIncrement(column)) { return true; } } return false; } public static boolean hasRangeCompatible(final Set<TableColumn> columns) { for (final TableColumn column : columns) { if (column.getType().isAcceptableRangeType()) { return true; } } return false; } public static boolean isRangeToRangeRelationship(final Relationship relationship) { final TemplateItem lhsModel = relationship.getLHS().getTableDistributionModel(); final TemplateItem rhsModel = relationship.getRHS().getTableDistributionModel(); return ((lhsModel instanceof Range) && (rhsModel instanceof Range)); } /** * Although collocated, OUTER JOINs to Broadcast require redistribution * if the Broadcast table is the first table in the join. */ public static boolean isJoinToBroadcastAndRequiresRedist(final Relationship join) { final RelationshipSpecification type = join.getType(); if (type.isOuterJoin()) { final TemplateItem lhsModel = join.getLHS().getTableDistributionModel(); final TemplateItem rhsModel = join.getRHS().getTableDistributionModel(); if (type.isLeftOuterJoin()) { return ((lhsModel instanceof Broadcast) && (rhsModel instanceof Range)); } else if (type.isRightOuterJoin()) { return ((rhsModel instanceof Broadcast) && (lhsModel instanceof Range)); } } return false; } public static MultiMap<RedistCause, Object> getRedistOperations(final TableStats table, final Set<JoinStats> joins, final Set<? extends TemplateRangeItem> availableRanges) { final MultiMap<RedistCause, Object> operations = new MultiMap<RedistCause, Object>(); final TemplateModelItem model = table.getTableDistributionModel(); if (!model.isBroadcast()) { final TemplateRangeItem range = findRangeForTable(availableRanges, table); final Set<TableColumn> dv = (range != null) ? range.getRangeColumnsFor(table) : Collections.EMPTY_SET; if (table.hasStatements(StatementType.ORDERBY)) { operations.put(RedistCause.ORDER_BY, table.getStatementCounts(StatementType.ORDERBY)); } if (table.hasStatements(StatementType.UPDATE)) { final Map<TableColumn, Long> updateColumns = table.getUpdateColumns(); for (final Entry<TableColumn, Long> entry : updateColumns.entrySet()) { final TableColumn column = entry.getKey(); if (column.isPrimary() || column.isUnique()) { operations.put(RedistCause.UNIQUE_UPDATE, entry); } else if (!dv.isEmpty() && dv.contains(column)) { operations.put(RedistCause.DV_UPDATE, entry); } } } if (table.hasStatements(StatementType.JOIN)) { for (final JoinStats join : CorpusStats.findJoinsForTable(joins, table)) { if ((model instanceof Random) || (isRangeToRangeRelationship(join) || isJoinToBroadcastAndRequiresRedist(join))) { if (dv.isEmpty() || (table.equals(join.getLHS()) && !dv.equals(join.getLeftColumns())) || (table.equals(join.getRHS()) && !dv.equals(join.getRightColumns()))) { operations.put(RedistCause.NON_COLLOCATED_JOIN, join); } } } } } return operations; } public static Set<ForeignRelationship> getNonCollocatedFks(final TableStats table, final Set<? extends TemplateRangeItem> availableRanges) { final Set<ForeignRelationship> nonCollocatedRelationships = new LinkedHashSet<ForeignRelationship>(); for (final ForeignRelationship relationship : table.getForwardRelationships()) { final TableStats lhs = relationship.getLHS(); final TableStats rhs = relationship.getRHS(); if (rhs.getTableDistributionModel().isBroadcast()) { continue; // Range -> Broadcast and Broadcast -> Broadcast are always collocated. } else if (lhs.getTableDistributionModel().isBroadcast()) { nonCollocatedRelationships.add(relationship); continue; // Broadcast -> Range cannot be collocated. } final TemplateRangeItem leftRange = findRangeForTable(availableRanges, lhs); final TemplateRangeItem rightRange = findRangeForTable(availableRanges, rhs); if ((leftRange == null) || (rightRange == null)) { nonCollocatedRelationships.add(relationship); // Tables not in a single range. continue; } final Set<TableColumn> leftColumns = leftRange.getRangeColumnsFor(lhs); final Set<TableColumn> rightColumns = rightRange.getRangeColumnsFor(rhs); if (!leftRange.equals(rightRange) || !leftColumns.equals(relationship.getLeftColumns()) || !rightColumns.equals(relationship.getRightColumns())) { nonCollocatedRelationships.add(relationship); } } return nonCollocatedRelationships; } /** * Frequent writes to a broadcast table without granular locking support may * lead to excessive table locking within XA transactions. */ public static boolean hasExcessiveBroadcastLocking(final TableStats table, final boolean avoidAllWriteBroadcasting) { return (table.getTableDistributionModel().isBroadcast() && hasExcessiveLocking(table, avoidAllWriteBroadcasting)); } public static boolean hasExcessiveLocking(final TableStats table, final boolean avoidAllWriteBroadcasting) { return isUsingWrites(table, avoidAllWriteBroadcasting) && isMostlyWritten(table); } public static boolean isUsingWrites(final TableStats table, final boolean avoidAllWriteBroadcasting) { return (avoidAllWriteBroadcasting || !table.supportsRowLocking()); } public static boolean isMostlyWritten(final TableStats table) { if (table.hasStatements() && (table.getWritePercentage() > 0.0)) { // Make a copy of the table's current distribution model. final Broadcast asBroadcast = new Broadcast((FuzzyTableDistributionModel) table.getTableDistributionModel()); // Disable unwanted scoring parameters. asBroadcast.setWeightOnRule(Variables.SORTS_FLV_NAME, 0.0); asBroadcast.setWeightOnRule(Variables.CARDINALITY_FLV_NAME, 0.0); asBroadcast.evaluate(); return (asBroadcast.getScore() < MOSTLY_WRITTEN_THRESHOLD_SCORE); } return false; } public static boolean isFkCompatibleJoin(final JoinStats join) { /* * If this is a join to Broadcast verify that it can be collocated * with FK relationships pointing into this table (@see PE-1504). */ if (isJoinToBroadcastAndRequiresRedist(join)) { final RelationshipSpecification type = join.getType(); final TableStats rangeSide = (type.isLeftOuterJoin()) ? join.getRHS() : join.getLHS(); final Set<Set<TableColumn>> targetColumns = rangeSide.getUniqueTargetColumnGroups(); if (!targetColumns.isEmpty()) { final Set<TableColumn> rangeSideJoinColumns = (type.isLeftOuterJoin()) ? join.getRightColumns() : join.getLeftColumns(); return (join.isFkCompatible() && targetColumns.contains(rangeSideJoinColumns)); } } return join.isFkCompatible(); } private static TemplateRangeItem findRangeForTable(final Set<? extends TemplateRangeItem> ranges, final TableStats table) { for (final TemplateRangeItem range : ranges) { if (range.contains(table)) { return range; } } return null; } private static boolean hasRangeForTable(final Set<? extends TemplateRangeItem> ranges, final TableStats table) { return (findRangeForTable(ranges, table) != null); } private final CorpusStats schemaStats; private final Template base; private final PrintStream outputStream; private final Collection<TableStats> tableStatistics; private final Set<JoinStats> joinStatistics; private boolean enableWildcards; private boolean isVerbose; private boolean enableFksAsJoins; private boolean enableIdentTuples; private boolean enableUsingSorts; private boolean enableUsingWrites; private TemplateModelItem fallbackModel; public AiTemplateBuilder(final CorpusStats schemaStats, final Template base, final TemplateModelItem fallbackModel, final PrintStream outputStream) throws PEException { if ((schemaStats == null) || (fallbackModel == null) || (outputStream == null)) { throw new IllegalArgumentException(); } this.schemaStats = schemaStats; this.base = base; this.fallbackModel = fallbackModel; this.outputStream = outputStream; this.tableStatistics = schemaStats.getStatistics(); this.joinStatistics = schemaStats.getJoinsStatistics(); if (this.tableStatistics.isEmpty()) { throw new PEException("The schema contains no tables."); } } public void setWildcardsEnabled(final boolean enableWildcards) { this.enableWildcards = enableWildcards; } public void setVerbose(final boolean setVerbose) { this.isVerbose = setVerbose; } public void setForeignKeysAsJoins(final boolean enableFksAsJoins) { this.enableFksAsJoins = enableFksAsJoins; } public void setUseIdentTuples(final boolean enableIdentTuples) { this.enableIdentTuples = enableIdentTuples; } public void setUseSorts(final boolean enableUsingSorts) { this.enableUsingSorts = enableUsingSorts; } public void setUseWrites(final boolean enableUsingWrites) { this.enableUsingWrites = enableUsingWrites; } public void setFallbackModel(final TemplateModelItem model) { this.fallbackModel = model; } public List<Template> buildBroadcastCutoffTemplates(final List<String> databases, final long broadcastCardinalityCutoff, boolean isRowWidthWeightingEnabled) throws PEException { final List<Template> templates = new ArrayList<Template>(); for (final String database : databases) { templates.add(getTemplate(database, this.tableStatistics, broadcastCardinalityCutoff, isRowWidthWeightingEnabled)); } return templates; } public List<Template> buildTemplates(final List<String> databases, final Long broadcastCardinalityCutoff, final boolean followForeignKeys, final boolean isSafeMode, final boolean isRowWidthWeightingEnabled) throws Exception { log(getWelcomeMessage(databases, broadcastCardinalityCutoff, followForeignKeys, isSafeMode)); runPrePassAnalysis(databases, followForeignKeys, isSafeMode); identifyCandidateModels(broadcastCardinalityCutoff, isRowWidthWeightingEnabled); final Set<? extends TemplateRangeItem> ranges = identifyBestRanges(this.tableStatistics, this.joinStatistics, followForeignKeys, isSafeMode, isRowWidthWeightingEnabled); runPostPassAnalysis(this.tableStatistics, this.joinStatistics, ranges, followForeignKeys); final List<Template> templates = new ArrayList<Template>(); for (final String database : databases) { templates.add(getTemplate(database, this.tableStatistics, ranges)); } return templates; } private void identifyCandidateModels(final Long broadcastCardinalityCutoff, final boolean isRowWidthWeightingEnabled) throws Exception { log("Identifying candidate distribution models..."); if (broadcastCardinalityCutoff != null) { identifyCandidateModels(this.tableStatistics, broadcastCardinalityCutoff, isRowWidthWeightingEnabled); } else { identifyCandidateModels(this.tableStatistics, isRowWidthWeightingEnabled); } } private TemplateModelItem findBaseModel(final TableStats table) throws PEException { final TableTemplateType item = findBaseTemplateItem(table); if (item != null) { return getModelForName(item.getModel().value()); } return null; } /** * Check if the relationship is compatible with all user-specified ranges * (if any). */ private boolean isBaseRangeCompatible(final Relationship relationship) throws PEException { final Set<TableColumn> leftColumns = findBaseRangeColumns(relationship.getLHS()); final Set<TableColumn> rightColumns = findBaseRangeColumns(relationship.getRHS()); return (((leftColumns == null) || leftColumns.equals(relationship.getLeftColumns())) && ((rightColumns == null) || rightColumns.equals(relationship.getRightColumns()))); } private Set<TableColumn> findBaseRangeColumns(final TableStats table) throws PEException { final TableTemplateType item = findBaseTemplateItem(table); if (isRangeTableItem(item)) { final Set<String> dv = new LinkedHashSet<String>(item.getColumn()); return table.getColumns(dv); } return null; } private Set<CommonRange> getBaseRanges(final boolean isSafeMode) throws PEException { final Map<String, UserDefinedCommonRange> baseRanges = new LinkedHashMap<String, UserDefinedCommonRange>(); if (this.base != null) { for (final TableTemplateType tableItem : this.base.getTabletemplate()) { if (isRangeTableItem(tableItem)) { final TableStats table = this.schemaStats.findTable(new QualifiedName(tableItem.getMatch())); if (table != null) { final List<String> userDefinedDv = tableItem.getColumn(); if ((userDefinedDv != null) && !userDefinedDv.isEmpty()) { final String rangeName = tableItem.getRange(); UserDefinedCommonRange range = baseRanges.get(rangeName); if (range == null) { range = new UserDefinedCommonRange(rangeName, isSafeMode); baseRanges.put(rangeName, range); } final Set<String> dv = new LinkedHashSet<String>(tableItem.getColumn()); range.addUserDefinedDistribution(table, dv); } } } } } return new LinkedHashSet<CommonRange>(baseRanges.values()); } private static boolean isRangeTableItem(final TableTemplateType item) { return ((item != null) && item.getModel().equals(ModelType.RANGE)); } private TableTemplateType findBaseTemplateItem(final TableStats table) { if (this.base != null) { for (final TableTemplateType tableItem : this.base.getTabletemplate()) { final String name = tableItem.getMatch(); if (name.equals(table.getFullTableName())) { return tableItem; } } } return null; } private Template getTemplate(final String databaseName, final Collection<TableStats> tables, final long broadcastCardinalityCutoff, final boolean isRowWidthWeightingEnabled) throws PEException { for (final TableStats table : tables) { if (databaseName.equals(table.getSchemaName())) { setCardinalityBasedDistributionModel(table, broadcastCardinalityCutoff, Broadcast.SINGLETON_TEMPLATE_ITEM, Random.SINGLETON_TEMPLATE_ITEM, isRowWidthWeightingEnabled); } } // Ignore the safe mode - use whatever specified by the user. return getTemplate(databaseName, tables, this.getBaseRanges(false)); } /** * Set table distribution model based on its predicted cardinality. */ private void setCardinalityBasedDistributionModel(final TableStats table, final long broadcastCardinalityCutoff, final TemplateModelItem smallTableModel, final TemplateModelItem largeTableModel, final boolean isRowWidthWeightingEnabled) throws PEException { final TemplateModelItem baseModel = findBaseModel(table); if (baseModel != null) { table.setTableDistributionModel(baseModel); table.setDistributionModelFreezed(true); } else { if (table.getPredictedFutureSize(isRowWidthWeightingEnabled) > broadcastCardinalityCutoff) { table.setTableDistributionModel(largeTableModel); } else { table.setTableDistributionModel(smallTableModel); } } logTableDistributionModel(table, MessageSeverity.ALERT); } private Template getTemplate(final String databaseName, final Collection<TableStats> tables, final Set<? extends TemplateRangeItem> ranges) throws PEException { log("Building a template for '" + databaseName + "'..."); final SortedSet<TableStats> databaseTables = new TreeSet<TableStats>(); final SortedSet<TemplateRangeItem> databaseRanges = new TreeSet<TemplateRangeItem>( new Comparator<TemplateRangeItem>() { @Override public int compare(TemplateRangeItem a, TemplateRangeItem b) { return a.getTemplateItemName().compareTo(b.getTemplateItemName()); } }); for (final TableStats table : tables) { if (databaseName.equals(table.getSchemaName())) { final TemplateItem distributionModel = table.getTableDistributionModel(); databaseTables.add(table); if (distributionModel instanceof Range) { databaseRanges.add(findRangeForTable(ranges, table)); } } } final String commonNamePrefix = getCommonTableNamePrefix(databaseTables); final TemplateBuilder builder = new TemplateBuilder(databaseName); /* Append range declarations. */ for (final TemplateRangeItem range : databaseRanges) { final String rangeName = removeTableNamePrefix(range.getTemplateItemName(), commonNamePrefix); builder.withRequirement(builder.toCreateRangeStatement(rangeName, "#sg#", range.getUniqueColumnTypes())); } /* Append table items. */ for (final TableStats table : databaseTables) { final String tableName = replaceTableNamePrefix(table.getTableName(), commonNamePrefix); final TemplateItem distributionModel = table.getTableDistributionModel(); if (distributionModel instanceof Range) { final TemplateRangeItem tableRange = findRangeForTable(databaseRanges, table); final String rangeName = removeTableNamePrefix(tableRange.getTemplateItemName(), commonNamePrefix); final String[] rangeColumnNames = getColumnNames(tableRange.getRangeColumnsFor(table)).toArray(new String[] {}); builder.withRangeTable(tableName, rangeName, rangeColumnNames); } else { builder.withTable(tableName, distributionModel.getTemplateItemName()); } } return builder.toTemplate(); } private void runPrePassAnalysis(final List<String> databases, final boolean followForeignKeys, final boolean isSafeMode) { log("Performing pre-pass analysis..."); if (followForeignKeys && isSafeMode) { log("Template is generated with both <fk> and <safe> modes ON. This is a very constrained configuration which is likely to cause excessive broadcasting. Consider relaxing one of the constraints.", MessageSeverity.SEVERE); } for (final String database : databases) { checkFkRelationships(database, followForeignKeys); } checkCorpusCoverage(); } private void checkFkRelationships(final String database, final boolean followForeignKeys) { final Pair<Integer, Integer> numRelationshipsAndActions = getNumOfFkRelationshipsIn(database); final int numFkRelationships = numRelationshipsAndActions.getFirst(); final int numFkRelationshipsWithActions = numRelationshipsAndActions.getSecond(); if (followForeignKeys && (numFkRelationships == 0)) { log("There are no FK relationships in the schema '" + database + "'. The the <fk> flag is not necessary.", MessageSeverity.ALERT); } else if (!followForeignKeys && (numFkRelationships > 0)) { final MessageSeverity severity = (numFkRelationshipsWithActions > 0) ? MessageSeverity.SEVERE : MessageSeverity.WARNING; log("There are FK relationships in the schema '" + database + "'. Running the analysis without the <fk> flag ON is likely to produce invalid (non-collocated) templates.", severity); } if ((numFkRelationships > 0) && (numFkRelationshipsWithActions == 0)) { log("The FK relationships in the schema '" + database + "' do not have referential actions and may potentially be ignored.", MessageSeverity.ALERT); } } /** * Return the total number of FK relationships and the number of those with * referential actions found in the schema. */ private Pair<Integer, Integer> getNumOfFkRelationshipsIn(final String database) { int numFkRelationships = 0; int numFkRelationshipsWithActions = 0; for (final TableStats table : this.tableStatistics) { if (database.equals(table.getSchemaName())) { final Set<ForeignRelationship> tableFkRelationships = table.getForwardRelationships(); numFkRelationships += tableFkRelationships.size(); for (final ForeignRelationship relationship : tableFkRelationships) { if (relationship.hasReferentialActions()) { ++numFkRelationshipsWithActions; } } } } return new Pair<Integer, Integer>(numFkRelationships, numFkRelationshipsWithActions); } private void checkCorpusCoverage() { final double pcTablesWithStatements = MathUtils.round(this.schemaStats.getCorpusCoverage(), NUMBER_DISPLAY_PRECISION); MessageSeverity severity = MessageSeverity.INFO; if (pcTablesWithStatements < LOWER_CORPUS_COVERAGE_THRESHOLD_PC) { severity = MessageSeverity.SEVERE; } else if (pcTablesWithStatements < UPPER_CORPUS_COVERAGE_THRESHOLD_PC) { severity = MessageSeverity.WARNING; } log("Corpus coverage of " + this.schemaStats + " is " + pcTablesWithStatements + "%.", severity); } private void identifyCandidateModels(final Collection<TableStats> tables, final boolean isRowWidthWeightingEnabled) throws Exception { final SortedSet<Long> sortedCardinalities = new TreeSet<Long>(); final SortedSet<Long> uniqueOperationFrequencies = new TreeSet<Long>(); for (final TableStats table : tables) { sortedCardinalities.add(table.getPredictedFutureSize(isRowWidthWeightingEnabled)); uniqueOperationFrequencies.add(table.getWriteStatementCount()); } for (final TableStats table : tables) { final TemplateModelItem baseModel = findBaseModel(table); if (baseModel != null) { table.setTableDistributionModel(baseModel); table.setDistributionModelFreezed(true); logTableDistributionModel(table, MessageSeverity.ALERT); } else { final double sortsWeight = BooleanUtils.toInteger(this.enableUsingSorts); final double writesWeight = BooleanUtils.toInteger(isUsingWrites(table, this.enableUsingWrites)); final ImmutableMap<FlvName, Double> ruleWeights = ImmutableMap.<FlvName, Double> of( FuzzyTableDistributionModel.Variables.SORTS_FLV_NAME, sortsWeight, FuzzyTableDistributionModel.Variables.WRITES_FLV_NAME, writesWeight ); final List<FuzzyTableDistributionModel> modelsSortedByScore = FuzzyLinguisticVariable .evaluateDistributionModels(ruleWeights, new Broadcast(table, uniqueOperationFrequencies, sortedCardinalities, isRowWidthWeightingEnabled), new Range(table, uniqueOperationFrequencies, sortedCardinalities, isRowWidthWeightingEnabled)); table.setTableDistributionModel(Collections.max(modelsSortedByScore, FuzzyLinguisticVariable.getScoreComparator())); log(table.toString().concat(": ").concat(StringUtils.join(modelsSortedByScore, ", "))); } } } private void identifyCandidateModels(final Collection<TableStats> tables, final long broadcastCardinalityCutoff, final boolean isRowWidthWeightingEnabled) throws Exception { for (final TableStats table : tables) { setCardinalityBasedDistributionModel(table, broadcastCardinalityCutoff, Broadcast.SINGLETON_TEMPLATE_ITEM, Range.SINGLETON_TEMPLATE_ITEM, isRowWidthWeightingEnabled); } } /* * Try to identify the best ranges based on the following filter order. * 1. JOIN * 2. WHERE * 3. GROUP BY */ private Set<? extends TemplateRangeItem> identifyBestRanges( final Collection<TableStats> tables, final Set<JoinStats> joins, boolean followForeignKeys, final boolean isSafeMode, final boolean isRowWidthWeightingEnabled) throws PEException { log("Identifying distribution ranges..."); log("Processing join statistics..."); final Ranges rangeToRangeRanges = new Ranges(isSafeMode, isRowWidthWeightingEnabled); /* Preload user defined ranges. */ for (final CommonRange range : getBaseRanges(isSafeMode)) { rangeToRangeRanges.add(range); } Set<TemplateRangeItem> topRangeToRangeTopRanges = null; if (followForeignKeys) { /* Handle special foreign relationship cases. */ resolveForeignCollocationConflicts(tables, isRowWidthWeightingEnabled); /* Add joins compatible with the FK relationships. */ for (final JoinStats join : joins) { if (isFkCompatibleJoin(join)) { addRangeToRangeRelationship(join, rangeToRangeRanges); } } rangeToRangeRanges.generateAllPossibleCombinations(); /* Add FK relationships. */ for (final TableStats table : tables) { for (final ForeignRelationship relationship : table.getForwardRelationships()) { addRangeToRangeRelationship(relationship, rangeToRangeRanges); } } topRangeToRangeTopRanges = getTopRanges(rangeToRangeRanges); /* All Range tables with FK and without a range -> Broadcast. */ for (final TableStats table : tables) { final FuzzyTableDistributionModel tableModel = (FuzzyTableDistributionModel) table.getTableDistributionModel(); if ((tableModel instanceof Range) && !table.getBackwardRelationships().isEmpty() && !hasRangeForTable(topRangeToRangeTopRanges, table)) { table.setTableDistributionModel(new Broadcast(tableModel)); } } } else { for (final JoinStats join : joins) { addRangeToRangeRelationship(join, rangeToRangeRanges); } if (this.enableFksAsJoins) { /* Add FK relationships. */ for (final TableStats table : tables) { for (final ForeignRelationship relationship : table.getForwardRelationships()) { addRangeToRangeRelationship(relationship, rangeToRangeRanges); } } } rangeToRangeRanges.generateAllPossibleCombinations(); topRangeToRangeTopRanges = getTopRanges(rangeToRangeRanges); } log("Processing orphaned range tables..."); for (final TableStats table : tables) { final TemplateModelItem model = table.getTableDistributionModel(); /* * This Range candidate has no range. * Either its proposed range was not identified as optimal * or it is only being joined to Broadcast tables. */ if ((model instanceof Range) && !hasRangeForTable(topRangeToRangeTopRanges, table)) { final ColorStringBuilder logMessage = new ColorStringBuilder(); logMessage.append(getTableNameAndDistributionModel(table)).append(" ("); /* * Try to range on OUTER JOIN columns if to a Broadcast * table and would require redistribution. */ TemplateRangeItem newRange = PrivateRange.fromOuterJoinColumns(table, joins, topRangeToRangeTopRanges, isSafeMode); if (newRange != null) { topRangeToRangeTopRanges.add(newRange); logMessage.append("ranged on OUTER JOIN column(s): ").append(toStringOfDelimitedColumnNames(newRange.getRangeColumnsFor(table))); } final boolean hasRedistOperations = !getRedistOperations(table, joins, topRangeToRangeTopRanges).isEmpty(); final boolean mayCauseExcessiveLocking = hasExcessiveLocking(table, this.enableUsingWrites); if (!this.fallbackModel.isBroadcast() || !hasRedistOperations || mayCauseExcessiveLocking) { if (this.fallbackModel.isBroadcast()) { logMessage.append("fallback model override: ", MessageSeverity.ALERT.getColor()); if (!hasRedistOperations) { logMessage.append(" no redistribution required", MessageSeverity.ALERT.getColor()); } else if (mayCauseExcessiveLocking) { logMessage.append(" broadcasting may lead to reduced concurrency", MessageSeverity.ALERT.getColor()); } logMessage.append(" -> "); } /* * Try to Range on identity columns instead. */ if (newRange == null) { if (table.hasIdentColumns() || table.hasGroupByColumns()) { newRange = PrivateRange.fromWhereColumns(table, topRangeToRangeTopRanges, isSafeMode, this.enableIdentTuples); if (newRange == null) { newRange = PrivateRange.fromGroupByColumns(table, topRangeToRangeTopRanges, isSafeMode); } if (newRange != null) { topRangeToRangeTopRanges.add(newRange); logMessage.append("constant filter column ").append(toStringOfDelimitedColumnNames(newRange.getRangeColumnsFor(table))) .append(" found"); } } } /* * Look for AI columns and similar columns used in existing * ranges. */ if (newRange == null) { newRange = PrivateRange.fromAllColumns(table, topRangeToRangeTopRanges, isSafeMode); if (newRange != null) { final Set<TableColumn> columnSet = newRange .getRangeColumnsFor(table); if (hasAutoIncrement(columnSet)) { topRangeToRangeTopRanges.add(newRange); logMessage.append("AI range column ").append(toStringOfDelimitedColumnNames(newRange.getRangeColumnsFor(table))) .append(" found"); } else { boolean found = false; for (final TemplateRangeItem range : topRangeToRangeTopRanges) { if (range.hasCommonColumn(columnSet)) { topRangeToRangeTopRanges.add(newRange); found = true; logMessage.append("ranged on foreign range column(s): ", MessageSeverity.WARNING.getColor()).append( toStringOfDelimitedColumnNames(newRange.getRangeColumnsFor(table)), MessageSeverity.WARNING.getColor()); break; } } if (!found) { newRange = null; } } } } } /* * Fall back. * Frozen Range distribution model at this stage means we have to make the table Random. */ if (newRange == null) { table.setTableDistributionModel((!table.hasDistributionModelFreezed() && this.fallbackModel.isBroadcast() && !mayCauseExcessiveLocking) ? Broadcast.SINGLETON_TEMPLATE_ITEM : Random.SINGLETON_TEMPLATE_ITEM); logMessage.append("no suitable range columns found", MessageSeverity.WARNING.getColor()); } log(logMessage.append(")").toString()); } } return topRangeToRangeTopRanges; } private void runPostPassAnalysis( final Collection<TableStats> tables, final Set<JoinStats> joins, Set<? extends TemplateRangeItem> ranges, boolean followForeignKeys) throws PEException { log("Performing post-pass analysis..."); for (final TableStats table : tables) { final Set<ForeignRelationship> nonCollocatedFks = getNonCollocatedFks(table, ranges); if (!nonCollocatedFks.isEmpty()) { final Set<ForeignRelationship> userOverriden = new LinkedHashSet<ForeignRelationship>(nonCollocatedFks.size()); for (final ForeignRelationship fk : nonCollocatedFks) { final TableStats lhs = fk.getLHS(); final TableStats rhs = fk.getRHS(); if (lhs.hasDistributionModelFreezed() || rhs.hasDistributionModelFreezed()) { userOverriden.add(fk); } } MessageSeverity severity = MessageSeverity.WARNING; if (followForeignKeys) { final Set<ForeignRelationship> nonOverriden = Sets.difference(nonCollocatedFks, userOverriden); if (!nonOverriden.isEmpty()) { throw new PEException("Failed to collocate all foreign keys: " + StringUtils.join(nonOverriden, ", ")); } severity = MessageSeverity.SEVERE; log("The custom base template you specified breaks the collocation rules on the following FK relationships.", severity); } else { log("The following FKs are not collocated.", severity); log("You may need to re-run the analysis with <fk> mode ON.", severity); } for (final ForeignRelationship fk : nonCollocatedFks) { log(fk.toString(), severity, 1); } } if (hasExcessiveBroadcastLocking(table, this.enableUsingWrites)) { log("Broadcasting a frequently written table " + table.toString() + " may lead to reduced concurrency.", MessageSeverity.WARNING); } final MultiMap<RedistCause, Object> operations = getRedistOperations(table, joins, ranges); if (!operations.isEmpty()) { final StringBuilder message = new StringBuilder(); message.append("The following operations on table ").append(table); if (table.hasDistributionModelFreezed()) { message.append(" distributed based on a user-defined model ").append(table.getTableDistributionModel()); } message.append(" still require redistribution."); log(message.toString(), MessageSeverity.WARNING); for (final RedistCause cause : operations.keySet()) { for (final Object operation : operations.get(cause)) { log(cause.print(operation), MessageSeverity.WARNING, 1); } } } } } /** * Joins to Broadcast tables will always be collocated. * INNER JOINs to Broadcast tables should not trigger redistribution. * OUTER JOINs with the first table being Broadcast tables will trigger * redistribution of the second (Range) table unless it is ranged on the * joined column. This is to prevent getting duplicate rows from the first * (Broadcast) table. */ private void addRangeToRangeRelationship(final Relationship relationship, final Ranges rangeToRangeRanges) throws PEException { if ((isRangeToRangeRelationship(relationship) || isJoinToBroadcastAndRequiresRedist(relationship)) && isBaseRangeCompatible(relationship)) { rangeToRangeRanges.addJoin(relationship); } } private Set<TemplateRangeItem> getTopRanges(final Ranges ranges) { if (!ranges.isEmpty()) { final SortedSet<Long> sortedJoinCardinalities = new TreeSet<Long>(ranges.getPredictedFutureJoinCardinalities()); final Set<Long> uniqueJoinFrequencies = new HashSet<Long>(ranges.getJoinFrequencies()); ranges.evaluate(uniqueJoinFrequencies, sortedJoinCardinalities); } /* * Retain only the highest score ranges, * removing the lower score alternatives. */ final List<CommonRange> sortedRanges = ranges.getSortedCommonRanges(); final Set<TemplateRangeItem> topRanges = new TreeSet<TemplateRangeItem>(new Comparator<TemplateRangeItem>() { @Override public int compare(TemplateRangeItem a, TemplateRangeItem b) { return a.getTemplateItemName().compareTo(b.getTemplateItemName()); } }); while (!sortedRanges.isEmpty()) { final CommonRange top = Collections.max(sortedRanges, FuzzyLinguisticVariable.getScoreComparator()); if ((top.getScore() > 0.0) || (top instanceof UserDefinedCommonRange)) { topRanges.add(top); final ColorStringBuilder progress = new ColorStringBuilder(); progress.append("Overlapping common ranges: ").append(LINE_SEPARATOR); final List<CommonRange> overlappingRanges = Ranges.getOverlapping(sortedRanges, top); sortedRanges.removeAll(overlappingRanges); for (final CommonRange groupRange : overlappingRanges) { progress.append(groupRange); progress.append(LINE_SEPARATOR); } log(progress.toString()); } else { /* * All ranges have zero score. * Most likely because we are in the safe mode * considering only auto-increment columns. * Move on and try to find private ranges for the tables * based on other (ident) columns or fall back to Random. */ sortedRanges.clear(); } } return topRanges; } /** * Handles collocation special cases. * * 1. A table is being referenced on a single column group. * * a) The table is referenced by two or more unique column groups from a * single table. * The solution which preserves collocation is to make the * table and all the tables it points at Broadcast. * * b) The table has unique foreign and target column groups. In other words, * the table is being pointed at and points on two or more unique column * groups. * The only solution is making the pointed tables * and all their descendants Broadcast. * * 2. A table is being referenced on two or more unique column groups. The * only solution in this case is making the table and all the tables it * points at Broadcast. * * * NOTE: Basic one-to-one collocation cases: * * a) Range -> Broadcast: always collocated * b) Range -> Range: collocated only if in the same range. * c) Broadcast -> Range: make the referenced table Broadcast (a), or * colocate the two tables on the same range (b). * * NOTE: Same rules hold for self-referencing relationships (table with a * foreign key into itself). */ private void resolveForeignCollocationConflicts(final Collection<TableStats> tables, final boolean isRowWidthWeightingEnabled) throws PEException { log("Resolving FK collocation..."); /* * Make sure there are no Broadcast -> Range relationships (c) by making * both tables Range or Broadcast if cannot range (user defined). */ for (final TableStats table : tables) { if (table.getTableDistributionModel() instanceof Range) { for (final TableStats childTable : table.getReferencingForeignTables()) { if (childTable.getTableDistributionModel() instanceof Broadcast) { if (!childTable.hasDistributionModelFreezed()) { final Set<TableStats> affectedTables = makeBackwardTableTreeRange(childTable); log("FK forced range: range table '" + table.getFullTableName() + "' is referenced by a broadcast table '" + childTable.getFullTableName() + "'. Had to range '" + affectedTables.size() + "' table(s).", MessageSeverity.ALERT); } else { final Set<TableStats> forcedBroadcastTables = makeForwardTableTreeBroadcast(childTable); log("FK forced broadcast: Could not range table '" + childTable.getFullTableName() + "' (user defined). Had to broadcast '" + forcedBroadcastTables.size() + "' table(s) with total size of '" + CorpusStats.computeTotalSizeKb(forcedBroadcastTables) + "KB'", MessageSeverity.WARNING); forcedBroadcastTables.addAll(forcedBroadcastTables); } } } } } /* * Now, we should have only Range -> Broadcast (a) and Range -> Range * (b) relationships. */ final SortedSet<TableStats> forcedBroadcastTables = new TreeSet<TableStats>( Collections.reverseOrder(new TableSizeComparator(isRowWidthWeightingEnabled))); /* Resolve the special cases. */ for (final TableStats table : tables) { /* * Here we handle only the Range -> Range (b) case. * Range -> Broadcast (a) is always collocated and we never change * the parent table's model to anything other than Broadcast. */ if (!table.hasDistributionModelFreezed() && (table.getTableDistributionModel() instanceof Range)) { final Set<Set<TableColumn>> uniqueTargetColumnGroups = table .getUniqueTargetColumnGroups(); if (uniqueTargetColumnGroups.size() == 1) { // Case (1) /* Case (1a) */ final Set<ForeignRelationship> backwardRelationships = table.getBackwardRelationships(); for (@SuppressWarnings("unused") final Set<TableColumn> targetColumnGroup : uniqueTargetColumnGroups) { final Set<TableStats> visitedReferencingTables = new HashSet<TableStats>(); for (final ForeignRelationship relationship : backwardRelationships) { final TableStats targetTable = relationship.getRHS(); if (!visitedReferencingTables.add(targetTable)) { final Set<TableStats> affectedTables = makeForwardTableTreeBroadcast(table); log("FK forced broadcast: table '" + table.getFullTableName() + "' referenced by '" + targetTable.getFullTableName() + "' on two or more unique column groups. Had to broadcast '" + affectedTables.size() + "' table(s) with total size of '" + CorpusStats.computeTotalSizeKb(affectedTables) + "KB'", MessageSeverity.WARNING); forcedBroadcastTables.addAll(affectedTables); break; } } } if (!(table.getTableDistributionModel() instanceof Range)) { continue; // The case already resolved from above. } /* Case (1b) */ final Set<ForeignRelationship> forwardRelationships = table.getForwardRelationships(); final Set<TableStats> affectedTables = new LinkedHashSet<TableStats>(); for (final Set<TableColumn> targetColumnGroup : uniqueTargetColumnGroups) { for (final ForeignRelationship relationship : forwardRelationships) { if (!targetColumnGroup.equals(relationship.getForeignColumns())) { final TableStats targetTable = relationship.getRHS(); affectedTables.addAll(makeForwardTableTreeBroadcast(targetTable)); } } } if (!affectedTables.isEmpty()) { log("FK forced broadcast: table '" + table.getFullTableName() + "' has unique foreign and target column groups. Had to broadcast '" + affectedTables.size() + "' table(s) with total size of '" + CorpusStats.computeTotalSizeKb(affectedTables) + "KB'", MessageSeverity.WARNING); forcedBroadcastTables.addAll(affectedTables); } } else if (uniqueTargetColumnGroups.size() > 1) { // Case (2) final Set<TableStats> affectedTables = makeForwardTableTreeBroadcast(table); log("FK forced broadcast: table '" + table.getFullTableName() + "' referenced on two or more unique column groups. Had to broadcast '" + affectedTables.size() + "' table(s) with total size of '" + CorpusStats.computeTotalSizeKb(affectedTables) + "KB'", MessageSeverity.WARNING); forcedBroadcastTables.addAll(affectedTables); } } } /* Print out broadcasted tables. */ log("The following tables were forced broadcast:", MessageSeverity.WARNING); for (final TableStats table : forcedBroadcastTables) { log(table.toString(), MessageSeverity.WARNING, 1); } } private Set<TableStats> makeBackwardTableTreeRange(final TableStats root) throws PEException { final Set<TableStats> traversed = new LinkedHashSet<TableStats>(); makeBackwardTableTreeRange(root, traversed); return traversed; } private void makeBackwardTableTreeRange(final TableStats root, final Set<TableStats> traversedNodes) throws PEException { traversedNodes.add(root); for (final TableStats table : root.getReferencingForeignTables()) { if (!(table.getTableDistributionModel() instanceof Range)) { traversedNodes.add(table); /* Change model first, then recurse to avoid cycles. */ table.setTableDistributionModel(new Range((FuzzyTableDistributionModel) table.getTableDistributionModel())); makeBackwardTableTreeRange(table, traversedNodes); } } root.setTableDistributionModel(new Range((FuzzyTableDistributionModel) root.getTableDistributionModel())); } private Set<TableStats> makeForwardTableTreeBroadcast(final TableStats root) throws PEException { final Set<TableStats> traversed = new LinkedHashSet<TableStats>(); makeForwardTableTreeBroadcast(root, traversed); return traversed; } private void makeForwardTableTreeBroadcast(final TableStats root, final Set<TableStats> traversedNodes) throws PEException { traversedNodes.add(root); for (final TableStats table : root.getReferencedForeignTables()) { if (!(table.getTableDistributionModel() instanceof Broadcast)) { traversedNodes.add(table); /* Change model first, then recurse to avoid cycles. */ table.setTableDistributionModel(new Broadcast((FuzzyTableDistributionModel) table.getTableDistributionModel())); makeForwardTableTreeBroadcast(table, traversedNodes); } } if (!root.hasDistributionModelFreezed() || root.getTableDistributionModel().isBroadcast()) { root.setTableDistributionModel(new Broadcast((FuzzyTableDistributionModel) root.getTableDistributionModel())); } else { // We cannot safely collocate due to user defined model constraints. // There is likely a non-resolvable cycle in the base template models. // Skip this table and proceed like normal with whatever the user gave us. log("Could not broadcast table '" + root.getFullTableName() + "' (user defined). The resultant template may not be collocated.", MessageSeverity.SEVERE); } } private String toStringOfDelimitedColumnNames(final Set<TableColumn> columns) { return "'" + StringUtils.join(getColumnNames(columns), "', '") + "'"; } private List<String> getColumnNames(final Set<TableColumn> columns) { final List<String> names = new ArrayList<String>(); for (final TableColumn column : columns) { names.add(column.getName().getUnquotedName().get()); } return names; } private String getCommonTableNamePrefix(final SortedSet<TableStats> tables) { final String firstName = tables.first().getTableName(); final String lastName = tables.last().getTableName(); final int minLength = Math.min(firstName.length(), lastName.length()); for (int i = 0; i < minLength; ++i) { if ((i > (TABLE_NAME_MIN_PREFIX_LENGTH - 1)) && (firstName.charAt(i) != lastName.charAt(i))) { return firstName.substring(0, i); } } return firstName.substring(0, minLength); } private String removeTableNamePrefix(final String value, final String prefix) { return replaceTableNamePrefix(value, prefix, ""); } private String replaceTableNamePrefix(final String value, final String prefix) { return replaceTableNamePrefix(value, prefix, TABLE_NAME_WILDCARD); } private String replaceTableNamePrefix(final String value, final String prefix, final String replacement) { // Exclude prefixes shorter than 3 chars. if (this.enableWildcards && !value.equals(prefix) && (prefix.length() > TABLE_NAME_MIN_PREFIX_LENGTH)) { return value.replaceFirst(prefix, replacement); } return value; } private void logTableDistributionModel(final TableStats table, final MessageSeverity severity) { log(getTableNameAndDistributionModel(table), severity); } private void log(final String message) { log(message, MessageSeverity.INFO); } private void log(final String message, final MessageSeverity severity) { log(message, severity, 0); } private void log(final String message, final MessageSeverity severity, final int numIndents) { logger.log(severity.getLogLevel(), message); if (this.isVerbose && (this.outputStream != null)) { CLIBuilder.printInColor(StringUtils.repeat(LINE_INDENT, numIndents).concat(severity.toString()).concat(": ").concat(message), severity.getColor(), this.outputStream); } } }