/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.ewh.examples;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import ch.epfl.data.squall.components.Component;
import ch.epfl.data.squall.components.DataSourceComponent;
import ch.epfl.data.squall.components.theta.ThetaJoinComponentFactory;
import ch.epfl.data.squall.ewh.components.DummyComponent;
import ch.epfl.data.squall.expressions.ColumnReference;
import ch.epfl.data.squall.expressions.ValueSpecification;
import ch.epfl.data.squall.operators.PrintOperator;
import ch.epfl.data.squall.operators.ProjectOperator;
import ch.epfl.data.squall.operators.SelectOperator;
import ch.epfl.data.squall.predicates.ComparisonPredicate;
import ch.epfl.data.squall.query_plans.QueryBuilder;
import ch.epfl.data.squall.query_plans.QueryPlan;
import ch.epfl.data.squall.query_plans.ThetaQueryPlansParameters;
import ch.epfl.data.squall.types.DateIntegerType;
import ch.epfl.data.squall.types.IntegerType;
import ch.epfl.data.squall.types.NumericType;
import ch.epfl.data.squall.types.StringType;
import ch.epfl.data.squall.types.Type;
import ch.epfl.data.squall.utilities.MyUtilities;
import ch.epfl.data.squall.utilities.SystemParameters;
import ch.epfl.data.squall.utilities.SystemParameters.HistogramType;
//Eocd
public class ThetaEWHEquiOrdersCustkeyCustkeyJoin extends QueryPlan {
private QueryBuilder _queryBuilder = new QueryBuilder();
private static final Type<String> _stringConv = new StringType();
private static final IntegerType _ic = new IntegerType();
private DateIntegerType _dic = new DateIntegerType();
public ThetaEWHEquiOrdersCustkeyCustkeyJoin(String dataPath,
String extension, Map conf) {
// ORDERS * ORDERS on orderkey equi
// I = 2 * 15M = 30M; O = 15M * 10 = 150M
// Variability is [0, 10] * skew and is too small
// baseline (z4 offset =0) takes forever to execute because of humongous
// output; there is output skew
// baseline + offset=500 has 115M output, but Okcan does not have output
// skew
// baseline + offset=1 has 115M output, but Okcan does not have output
// skew
// baseline with z1 takes forever to execute due to humongous output
// baseline + offset=1 (?)takes forever to execute due to humongous
// output
// baseline + offset=1 + select disjoint has no output skew (30M output
// tuples in total)
// baseline + offset=1 + selectKey too small output + no output skew
// baseline + offset=1 + selectFirstOnly 45M output with no output skew
// baseline + offset=1 + selectFirstOnly) urgent 60M with no output skew
// baseline + z1 + select disjoint humongous output, a little bit of
// skew
// baseline + z1 + select disjoin (6m + 3m): humongous output, there is
// skew
// baseline + z1 + select date (4 and > 19960103, 1): (1.2m, 3m, 2m):
// small output
// baseline + z1 + select date (< 19960101, 1-2): (8m, 6m, 5m): no
// output skew
// BEST baseline + z1 + select disjoin (3m + 3m (4, 1)): 240M output,
// there is output skew
// (we are 5% better than 1Bucket and several time better than MBUcket!)
// orders_custkey_custkey_equi/orders_custkey_self_equi_16j_off0_z1_disjoint
// GOOD baseline + z1 + select date (4 and < 19960103, 1): (1.9m, 3m, >
// 215m): there is good output skew
// GOOD baseline + z1 + select date (4 and > 19960101, 1): (1.2m, 3m,
// seems similar to before
// baseline + uniform + select disjoin (3m + 3m (4, 1)): 10M output,
// there is no (very little) output skew
// baseline + z1 + select date (4 and < 19960101, 1): (1.7m, 3m, 5m): no
// output skew
// (we are 2seconds better than MBucket and much better (several times)
// than 1Bucket!)
// creates materialized relations
boolean printSelected = MyUtilities.isPrintFilteredLast(conf);
String matName1 = "bosc_1";
String matName2 = "bosc_2";
PrintOperator print1 = printSelected ? new PrintOperator(matName1
+ extension, conf) : null;
PrintOperator print2 = printSelected ? new PrintOperator(matName2
+ extension, conf) : null;
// read from materialized relations
boolean isMaterialized = SystemParameters.isExisting(conf,
"DIP_MATERIALIZED")
&& SystemParameters.getBoolean(conf, "DIP_MATERIALIZED");
boolean isOkcanSampling = SystemParameters.isExisting(conf,
"DIP_SAMPLING")
&& SystemParameters.getBoolean(conf, "DIP_SAMPLING");
boolean isEWHSampling = SystemParameters.isExisting(conf,
"DIP_EWH_SAMPLING")
&& SystemParameters.getBoolean(conf, "DIP_EWH_SAMPLING");
boolean isEWHD2Histogram = SystemParameters.getBooleanIfExist(conf,
HistogramType.D2_COMB_HIST.genConfEntryName());
boolean isEWHS1Histogram = SystemParameters.getBooleanIfExist(conf,
HistogramType.S1_RES_HIST.genConfEntryName());
boolean isSrcHistogram = isEWHD2Histogram || isEWHS1Histogram;
Component relationOrders1, relationOrders2;
// Project on shipdate , receiptdate, commitdate, shipInstruct, quantity
ProjectOperator projectionLineitem = new ProjectOperator(new int[] { 0,
2, 3, 4, 5, 1 });
final List<Integer> hashLineitem = Arrays.asList(5);
if (!isMaterialized) {
// ORDERDATE NO: startdate - enddate - 151, but for z4 mostly are
// 1996-01-02
// STARTDATE = 1992-01-01 CURRENTDATE = 1995-06-17 ENDDATE =
// 1998-12-31
/*
* Integer dateBoundary = 19960101; //ComparisonPredicate sel11 =
* new ComparisonPredicate(ComparisonPredicate.EQUAL_OP, // new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "4-NOT SPECIFIED"));
* ComparisonPredicate sel12 = new
* ComparisonPredicate(ComparisonPredicate.LESS_OP, new
* ColumnReference(_dic, 4), new ValueSpecification(_dic,
* dateBoundary)); //AndPredicate andOrders1 = new
* AndPredicate(sel11, sel12); SelectOperator selectionOrders1 = new
* SelectOperator(sel12);
*/
// 3m
ComparisonPredicate sel11 = new ComparisonPredicate(
ComparisonPredicate.EQUAL_OP, new ColumnReference(
_stringConv, 5), new ValueSpecification(
_stringConv, "4-NOT SPECIFIED"));
SelectOperator selectionOrders1 = new SelectOperator(sel11);
// 6M
/*
* ComparisonPredicate sel11 = new
* ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "4-NOT SPECIFIED"));
* ComparisonPredicate sel12 = new
* ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "5-LOW")); OrPredicate orOrders1
* = new OrPredicate(sel11, sel12); SelectOperator selectionOrders1
* = new SelectOperator(sel11);
*/
// sel first urgent 9m
/*
* ComparisonPredicate sel11 = new
* ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "1-URGENT")); ComparisonPredicate
* sel12 = new ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "2-HIGH")); ComparisonPredicate
* sel13 = new ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "3-MEDIUM")); OrPredicate
* orOrders1 = new OrPredicate(sel11, sel12, sel13); SelectOperator
* selectionOrders1 = new SelectOperator(orOrders1);
*/
relationOrders1 = new DataSourceComponent("ORDERS1", dataPath
+ "orders" + extension).add(selectionOrders1).add(print1)
.add(projectionLineitem).setOutputPartKey(hashLineitem);
_queryBuilder.add(relationOrders1);
// selectKey 8.5M
/*
* SelectOperator selectionOrders2 = new SelectOperator(new
* ComparisonPredicate( ComparisonPredicate.LESS_OP, new
* ColumnReference(_ic, 0), new ValueSpecification(_ic, 8500000)));
*/
// disjoint 9M
/*
* ComparisonPredicate sel21 = new
* ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "1-URGENT")); ComparisonPredicate
* sel22 = new ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "2-HIGH")); ComparisonPredicate
* sel23 = new ComparisonPredicate(ComparisonPredicate.EQUAL_OP, new
* ColumnReference(_stringConv, 5), new
* ValueSpecification(_stringConv, "3-MEDIUM")); OrPredicate
* orOrders2 = new OrPredicate(sel21, sel22, sel23); SelectOperator
* selectionOrders2 = new SelectOperator(sel21);
*/
// 3M
ComparisonPredicate sel21 = new ComparisonPredicate(
ComparisonPredicate.EQUAL_OP, new ColumnReference(
_stringConv, 5), new ValueSpecification(
_stringConv, "1-URGENT"));
// ComparisonPredicate sel22 = new
// ComparisonPredicate(ComparisonPredicate.EQUAL_OP,
// new ColumnReference(_stringConv, 5), new
// ValueSpecification(_stringConv, "2-HIGH"));
// OrPredicate orOrders2 = new OrPredicate(sel21, sel22);
SelectOperator selectionOrders2 = new SelectOperator(sel21);
relationOrders2 = new DataSourceComponent("ORDERS2", dataPath
+ "orders" + extension).add(selectionOrders2).add(print2)
.add(projectionLineitem).setOutputPartKey(hashLineitem);
_queryBuilder.add(relationOrders2);
} else {
relationOrders1 = new DataSourceComponent("ORDERS1", dataPath
+ matName1 + extension).add(projectionLineitem)
.setOutputPartKey(hashLineitem);
_queryBuilder.add(relationOrders1);
relationOrders2 = new DataSourceComponent("LINEITEM2", dataPath
+ matName2 + extension).add(projectionLineitem)
.setOutputPartKey(hashLineitem);
_queryBuilder.add(relationOrders2);
}
// int keyOffset = 1;
NumericType keyType = _ic;
ComparisonPredicate comparison = new ComparisonPredicate(
ComparisonPredicate.EQUAL_OP);
int firstKeyProject = 5;
int secondKeyProject = 5;
if (printSelected) {
relationOrders1.setPrintOut(false);
relationOrders2.setPrintOut(false);
} else if (isSrcHistogram) {
_queryBuilder = MyUtilities.addSrcHistogram(relationOrders1,
firstKeyProject, relationOrders2, secondKeyProject,
keyType, comparison, isEWHD2Histogram, isEWHS1Histogram,
conf);
} else if (isOkcanSampling) {
_queryBuilder = MyUtilities.addOkcanSampler(relationOrders1,
relationOrders2, firstKeyProject, secondKeyProject,
_queryBuilder, keyType, comparison, conf);
} else if (isEWHSampling) {
_queryBuilder = MyUtilities.addEWHSampler(relationOrders1,
relationOrders2, firstKeyProject, secondKeyProject,
_queryBuilder, keyType, comparison, conf);
} else {
final int Theta_JoinType = ThetaQueryPlansParameters
.getThetaJoinType(conf);
final ColumnReference colO1 = new ColumnReference(keyType,
firstKeyProject);
final ColumnReference colO2 = new ColumnReference(keyType,
secondKeyProject);
// Addition expr2 = new Addition(colO2, new ValueSpecification(_ic,
// keyOffset));
final ComparisonPredicate O1_O2_comp = new ComparisonPredicate(
ComparisonPredicate.EQUAL_OP, colO1, colO2);
// AggregateCountOperator agg = new AggregateCountOperator(conf);
Component lastJoiner = ThetaJoinComponentFactory
.createThetaJoinOperator(Theta_JoinType, relationOrders1,
relationOrders2, _queryBuilder)
.setJoinPredicate(O1_O2_comp)
.setContentSensitiveThetaJoinWrapper(keyType);
// .addOperator(agg)
// lastJoiner.setPrintOut(false);
DummyComponent dummy = new DummyComponent(lastJoiner, "DUMMY");
_queryBuilder.add(dummy);
}
}
@Override
public QueryBuilder getQueryPlan() {
return _queryBuilder;
}
}