/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.ewh.examples; import java.util.Arrays; import java.util.List; import java.util.Map; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.components.theta.ThetaJoinComponentFactory; import ch.epfl.data.squall.ewh.components.DummyComponent; import ch.epfl.data.squall.expressions.ColumnReference; import ch.epfl.data.squall.expressions.LongPhone; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.operators.PrintOperator; import ch.epfl.data.squall.operators.ProjectOperator; import ch.epfl.data.squall.predicates.ComparisonPredicate; import ch.epfl.data.squall.query_plans.QueryBuilder; import ch.epfl.data.squall.query_plans.QueryPlan; import ch.epfl.data.squall.query_plans.ThetaQueryPlansParameters; import ch.epfl.data.squall.types.LongType; import ch.epfl.data.squall.types.NumericType; import ch.epfl.data.squall.types.StringType; import ch.epfl.data.squall.types.Type; import ch.epfl.data.squall.utilities.MyUtilities; import ch.epfl.data.squall.utilities.SystemParameters; import ch.epfl.data.squall.utilities.SystemParameters.HistogramType; // a candidate for new Eocd for the new Linux cluster public class ThetaEWHCustomerJoin extends QueryPlan { private QueryBuilder _queryBuilder = new QueryBuilder(); private static final Type<String> _stringConv = new StringType(); private static final LongType _lc = new LongType(); // phone and acctbal public ThetaEWHCustomerJoin(String dataPath, String extension, Map conf) { // creates materialized relations boolean printSelected = MyUtilities.isPrintFilteredLast(conf); String matName1 = "cphone_1"; String matName2 = "cphone_2"; PrintOperator print1 = printSelected ? new PrintOperator(matName1 + extension, conf) : null; PrintOperator print2 = printSelected ? new PrintOperator(matName2 + extension, conf) : null; // read from materialized relations boolean isMaterialized = SystemParameters.isExisting(conf, "DIP_MATERIALIZED") && SystemParameters.getBoolean(conf, "DIP_MATERIALIZED"); boolean isOkcanSampling = SystemParameters.isExisting(conf, "DIP_SAMPLING") && SystemParameters.getBoolean(conf, "DIP_SAMPLING"); boolean isEWHSampling = SystemParameters.isExisting(conf, "DIP_EWH_SAMPLING") && SystemParameters.getBoolean(conf, "DIP_EWH_SAMPLING"); boolean isEWHD2Histogram = SystemParameters.getBooleanIfExist(conf, HistogramType.D2_COMB_HIST.genConfEntryName()); boolean isEWHS1Histogram = SystemParameters.getBooleanIfExist(conf, HistogramType.S1_RES_HIST.genConfEntryName()); boolean isSrcHistogram = isEWHD2Histogram || isEWHS1Histogram; Component relationCustomer1, relationCustomer2; // Project on phone(key), custkey and name // all this was with z1 // ValueExpression keyField = new LongPhone(4, 6); // MB works perfectly // - not enough output skew // ValueExpression keyField = new LongPhone(4, 5); // too large output + // MB works perfectly - not enough output skew // ValueExpression keyField = new DoubleToInt(5); // acctbal: too large // output // ValueExpression keyField = new DoubleToInt(5); // acctbal with // selectivity 1 MKSEGMENT: too large output // ComparisonPredicate comp1 = new // ComparisonPredicate(ComparisonPredicate.EQUAL_OP, // new ColumnReference(_stringConv, 6), new // ValueSpecification(_stringConv, "BUILDING")); // SelectOperator selectionCustomer1 = new SelectOperator(comp1); // ComparisonPredicate comp2 = new // ComparisonPredicate(ComparisonPredicate.EQUAL_OP, // new ColumnReference(_stringConv, 6), new // ValueSpecification(_stringConv, "MACHINERY")); // SelectOperator selectionCustomer2 = new SelectOperator(comp2); // all this is with z2 // ValueExpression keyField = new LongPhone(4, 7); // MBucket on 10G // faster than 1Bucket - no output skew // ValueExpression keyField = new LongPhone(4, 6); // MBucket on 80G // slower only 50% than 1Bucket - too large output (7291M) ValueExpression keyField = new LongPhone(4); // output = input ValueExpression custKey = new ColumnReference(_stringConv, 0); ValueExpression name = new ColumnReference(_stringConv, 1); ProjectOperator projectionCustomer = new ProjectOperator(keyField, custKey, name); final List<Integer> hashCustomer = Arrays.asList(0); if (!isMaterialized) { relationCustomer1 = new DataSourceComponent("CUSTOMER1", dataPath + "customer" + extension).add(print1) .add(projectionCustomer).setOutputPartKey(hashCustomer); _queryBuilder.add(relationCustomer1); relationCustomer2 = new DataSourceComponent("CUSTOMER2", dataPath + "customer" + extension).add(print2) .add(projectionCustomer).setOutputPartKey(hashCustomer); _queryBuilder.add(relationCustomer2); } else { relationCustomer1 = new DataSourceComponent("CUSTOMER1", dataPath + matName1 + extension).add(projectionCustomer) .setOutputPartKey(hashCustomer); _queryBuilder.add(relationCustomer1); relationCustomer2 = new DataSourceComponent("CUSTOMER2", dataPath + matName2 + extension).add(projectionCustomer) .setOutputPartKey(hashCustomer); _queryBuilder.add(relationCustomer2); } NumericType keyType = _lc; ComparisonPredicate comparison = new ComparisonPredicate( ComparisonPredicate.EQUAL_OP); int firstKeyProject = 0; int secondKeyProject = 0; if (printSelected) { relationCustomer1.setPrintOut(false); relationCustomer2.setPrintOut(false); } else if (isSrcHistogram) { _queryBuilder = MyUtilities.addSrcHistogram(relationCustomer1, firstKeyProject, relationCustomer2, secondKeyProject, keyType, comparison, isEWHD2Histogram, isEWHS1Histogram, conf); } else if (isOkcanSampling) { _queryBuilder = MyUtilities.addOkcanSampler(relationCustomer1, relationCustomer2, firstKeyProject, secondKeyProject, _queryBuilder, keyType, comparison, conf); } else if (isEWHSampling) { _queryBuilder = MyUtilities.addEWHSampler(relationCustomer1, relationCustomer2, firstKeyProject, secondKeyProject, _queryBuilder, keyType, comparison, conf); } else { final int Theta_JoinType = ThetaQueryPlansParameters .getThetaJoinType(conf); final ColumnReference colC1 = new ColumnReference(keyType, firstKeyProject); final ColumnReference colC2 = new ColumnReference(keyType, secondKeyProject); // Addition expr2 = new Addition(colO2, new ValueSpecification(_ic, // keyOffset)); final ComparisonPredicate C1_C2_comp = new ComparisonPredicate( ComparisonPredicate.EQUAL_OP, colC1, colC2); // AggregateCountOperator agg = new AggregateCountOperator(conf); Component lastJoiner = ThetaJoinComponentFactory .createThetaJoinOperator(Theta_JoinType, relationCustomer1, relationCustomer2, _queryBuilder) .setJoinPredicate(C1_C2_comp) .setContentSensitiveThetaJoinWrapper(keyType); // .addOperator(agg) // lastJoiner.setPrintOut(false); DummyComponent dummy = new DummyComponent(lastJoiner, "DUMMY"); _queryBuilder.add(dummy); } } @Override public QueryBuilder getQueryPlan() { return _queryBuilder; } }