/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.ewh.examples; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.components.theta.ThetaJoinComponentFactory; import ch.epfl.data.squall.ewh.components.DummyComponent; import ch.epfl.data.squall.expressions.ColumnReference; import ch.epfl.data.squall.expressions.Multiplication; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.expressions.ValueSpecification; import ch.epfl.data.squall.operators.PrintOperator; import ch.epfl.data.squall.operators.ProjectOperator; import ch.epfl.data.squall.predicates.ComparisonPredicate; import ch.epfl.data.squall.query_plans.QueryBuilder; import ch.epfl.data.squall.query_plans.QueryPlan; import ch.epfl.data.squall.query_plans.ThetaQueryPlansParameters; import ch.epfl.data.squall.types.DateIntegerType; import ch.epfl.data.squall.types.IntegerType; import ch.epfl.data.squall.types.NumericType; import ch.epfl.data.squall.types.StringType; import ch.epfl.data.squall.types.Type; import ch.epfl.data.squall.utilities.MyUtilities; import ch.epfl.data.squall.utilities.SystemParameters; import ch.epfl.data.squall.utilities.SystemParameters.HistogramType; public class ThetaEWHBandOrdersOrderkeyCustkeyJoin extends QueryPlan { private static Logger LOG = Logger .getLogger(ThetaEWHBandOrdersOrderkeyCustkeyJoin.class); private QueryBuilder _queryBuilder = new QueryBuilder(); private static final Type<String> _stringConv = new StringType(); private static final IntegerType _ic = new IntegerType(); private DateIntegerType _dic = new DateIntegerType(); // Bicb public ThetaEWHBandOrdersOrderkeyCustkeyJoin(String dataPath, String extension, Map conf) { // ORDERS * ORDERS on orderkey equi // I = 2 * 15M = 30M; O = // Variability is [0, 10] * skew // baseline z1: MBucket 102s with 8 joiners doing nothing, EWH uses only // 3-4 joiners due to large candidate no-output-sample rounded matrix // cells // B: z1 + firstProject / 10: Output is around 120M tuples. From 283s to // 235s (bsp-i). Should compare with 1B as this is output-dominated // C(based on A): z2 + secondProject * 10: Output is around 11M tuples. // From 140s to 128s (bsp-i). // D(based on A): z3 + secondProject * 10: Output is around 11M tuples. // From 153s to 147s (bsp-i). // BEST: E(based on A): z1 + secondProject * 10, comparisonValue = 2: // Output is around 18M tuples. From 152s to 131s (bsp-i), great // result!!!. // in // orders_orderkey_custkey_band/orderkey_custkey_band_16j_z1_abs2_project10 // A: z1 + secondProject * 10: Output is around 11M tuples. From 120s to // 105s (bsp-i), great result! // F(based on A): z0 + secondProject * 10, comparisonValue = 2: Output // is around 18M tuples. From 134s to 122s (bsp-i). // creates materialized relations boolean printSelected = MyUtilities.isPrintFilteredLast(conf); String matName1 = "n_bbosc_1"; String matName2 = "n_bbosc_2"; PrintOperator print1 = printSelected ? new PrintOperator(matName1 + extension, conf) : null; PrintOperator print2 = printSelected ? new PrintOperator(matName2 + extension, conf) : null; // read from materialized relations boolean isMaterialized = SystemParameters.isExisting(conf, "DIP_MATERIALIZED") && SystemParameters.getBoolean(conf, "DIP_MATERIALIZED"); boolean isOkcanSampling = SystemParameters.isExisting(conf, "DIP_SAMPLING") && SystemParameters.getBoolean(conf, "DIP_SAMPLING"); boolean isEWHSampling = SystemParameters.isExisting(conf, "DIP_EWH_SAMPLING") && SystemParameters.getBoolean(conf, "DIP_EWH_SAMPLING"); boolean isEWHD2Histogram = SystemParameters.getBooleanIfExist(conf, HistogramType.D2_COMB_HIST.genConfEntryName()); boolean isEWHS1Histogram = SystemParameters.getBooleanIfExist(conf, HistogramType.S1_RES_HIST.genConfEntryName()); boolean isSrcHistogram = isEWHD2Histogram || isEWHS1Histogram; Component relationOrders1, relationOrders2; // Project on shipdate , receiptdate, commitdate, shipInstruct, quantity ColumnReference col1 = new ColumnReference(_ic, 0); ColumnReference col2 = new ColumnReference(_ic, 2); ColumnReference col3 = new ColumnReference(_ic, 3); ColumnReference col4 = new ColumnReference(_ic, 4); ColumnReference col5 = new ColumnReference(_ic, 5); // A ColumnReference j1 = new ColumnReference(_ic, 0); ValueExpression j2 = new Multiplication( new ValueSpecification(_ic, 10), new ColumnReference(_ic, 1)); // B /* * ValueExpression j1 = new Division( new ColumnReference(_ic, 0), new * ValueSpecification(_ic, 10) ); ColumnReference j2 = new * ColumnReference(_ic, 1); */ ProjectOperator projectionLineitem1 = new ProjectOperator(col1, col2, col3, col4, col5, j1); ProjectOperator projectionLineitem2 = new ProjectOperator(col1, col2, col3, col4, col5, j2); // ProjectOperator projectionLineitem1 = new ProjectOperator(new int[] // {0, 2, 3, 4, 5, 0}); // ProjectOperator projectionLineitem2 = new ProjectOperator(new int[] // {0, 2, 3, 4, 5, 1}); final List<Integer> hashLineitem = Arrays.asList(5); if (!isMaterialized) { relationOrders1 = new DataSourceComponent("ORDERS1", dataPath + "orders" + extension).add(print1) .add(projectionLineitem1).setOutputPartKey(hashLineitem); _queryBuilder.add(relationOrders1); relationOrders2 = new DataSourceComponent("ORDERS2", dataPath + "orders" + extension).add(print2) .add(projectionLineitem2).setOutputPartKey(hashLineitem); _queryBuilder.add(relationOrders2); } else { // WATCH OUT ON PROJECTIONS AFTER MATERIALIZATIONS relationOrders1 = new DataSourceComponent("ORDERS1", dataPath + matName1 + extension).add(projectionLineitem1) .setOutputPartKey(hashLineitem); _queryBuilder.add(relationOrders1); relationOrders2 = new DataSourceComponent("LINEITEM2", dataPath + matName2 + extension).add(projectionLineitem2) .setOutputPartKey(hashLineitem); _queryBuilder.add(relationOrders2); } NumericType keyType = _ic; int comparisonValue = 2; if (SystemParameters.isExisting(conf, "COMPARISON_VALUE")) { comparisonValue = SystemParameters.getInt(conf, "COMPARISON_VALUE"); LOG.info("ComparisonValue read from the config file: " + comparisonValue); } ComparisonPredicate comparison = new ComparisonPredicate( ComparisonPredicate.SYM_BAND_WITH_BOUNDS_OP, comparisonValue, keyType); int firstKeyProject = 5; int secondKeyProject = 5; /* * ValueExpression ve1 = new ColumnReference(keyType, firstKeyProject); * ValueExpression ve2 = new Multiplication( new * ValueSpecification(keyType, secondMult), new ColumnReference(keyType, * secondKeyProject)); ProjectOperator project1 = new * ProjectOperator(ve1); ProjectOperator project2 = new * ProjectOperator(ve2); */ if (printSelected) { relationOrders1.setPrintOut(false); relationOrders2.setPrintOut(false); } else if (isSrcHistogram) { _queryBuilder = MyUtilities.addSrcHistogram(relationOrders1, firstKeyProject, relationOrders2, secondKeyProject, keyType, comparison, isEWHD2Histogram, isEWHS1Histogram, conf); } else if (isOkcanSampling) { _queryBuilder = MyUtilities.addOkcanSampler(relationOrders1, relationOrders2, firstKeyProject, secondKeyProject, _queryBuilder, keyType, comparison, conf); } else if (isEWHSampling) { _queryBuilder = MyUtilities.addEWHSampler(relationOrders1, relationOrders2, firstKeyProject, secondKeyProject, _queryBuilder, keyType, comparison, conf); } else { final int Theta_JoinType = ThetaQueryPlansParameters .getThetaJoinType(conf); final ColumnReference colO1 = new ColumnReference(keyType, firstKeyProject); final ColumnReference colO2 = new ColumnReference(keyType, secondKeyProject); ComparisonPredicate pred5 = new ComparisonPredicate( ComparisonPredicate.NONGREATER_OP, colO1, colO2, comparisonValue, ComparisonPredicate.BPLUSTREE); // AggregateCountOperator agg = new AggregateCountOperator(conf); Component lastJoiner = ThetaJoinComponentFactory .createThetaJoinOperator(Theta_JoinType, relationOrders1, relationOrders2, _queryBuilder) .setJoinPredicate(pred5) .setContentSensitiveThetaJoinWrapper(keyType); // .addOperator(agg) // lastJoiner.setPrintOut(false); DummyComponent dummy = new DummyComponent(lastJoiner, "DUMMY"); _queryBuilder.add(dummy); } } @Override public QueryBuilder getQueryPlan() { return _queryBuilder; } }