/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to you under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.calcite.adapter.tpcds; import org.apache.calcite.jdbc.CalciteConnection; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelTraitDef; import org.apache.calcite.prepare.Prepare; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.runtime.Hook; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.test.CalciteAssert; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Program; import org.apache.calcite.tools.Programs; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Bug; import org.apache.calcite.util.Holder; import org.apache.calcite.util.Pair; import com.google.common.base.Function; import net.hydromatic.tpcds.query.Query; import org.junit.Ignore; import org.junit.Test; import java.util.List; import java.util.Random; /** Unit test for {@link org.apache.calcite.adapter.tpcds.TpcdsSchema}. * * <p>Only runs if {@code -Dcalcite.test.slow} is specified on the * command-line. * (See {@link org.apache.calcite.test.CalciteAssert#ENABLE_SLOW}.)</p> */ public class TpcdsTest { private static Function<Pair<List<Prepare.Materialization>, Holder<Program>>, Void> handler(final boolean bushy, final int minJoinCount) { return new Function<Pair<List<Prepare.Materialization>, Holder<Program>>, Void>() { public Void apply( Pair<List<Prepare.Materialization>, Holder<Program>> pair) { pair.right.set( Programs.sequence( Programs.heuristicJoinOrder(Programs.RULE_SET, bushy, minJoinCount), Programs.CALC_PROGRAM)); return null; } }; } private static String schema(String name, String scaleFactor) { return " {\n" + " type: 'custom',\n" + " name: '" + name + "',\n" + " factory: 'org.apache.calcite.adapter.tpcds.TpcdsSchemaFactory',\n" + " operand: {\n" + " columnPrefix: true,\n" + " scale: " + scaleFactor + "\n" + " }\n" + " }"; } public static final String TPCDS_MODEL = "{\n" + " version: '1.0',\n" + " defaultSchema: 'TPCDS',\n" + " schemas: [\n" + schema("TPCDS", "1.0") + ",\n" + schema("TPCDS_01", "0.01") + ",\n" + schema("TPCDS_5", "5.0") + "\n" + " ]\n" + "}"; private CalciteAssert.AssertThat with() { return CalciteAssert.model(TPCDS_MODEL) .enable(CalciteAssert.ENABLE_SLOW); } @Test public void testCallCenter() { with() .query("select * from tpcds.call_center") .returnsUnordered(); } @Ignore("add tests like this that count each table") @Test public void testLineItem() { with() .query("select * from tpcds.lineitem") .returnsCount(6001215); } /** Tests the customer table with scale factor 5. */ @Ignore("add tests like this that count each table") @Test public void testCustomer5() { with() .query("select * from tpcds_5.customer") .returnsCount(750000); } @Test public void testQuery01() { checkQuery(1).runs(); } @Test public void testQuery17Plan() { //noinspection unchecked checkQuery(17) .withHook(Hook.PROGRAM, handler(true, 2)) .explainMatches("including all attributes ", CalciteAssert.checkMaskedResultContains("" + "EnumerableCalcRel(expr#0..11=[{inputs}], expr#12=[/($t5, $t4)], expr#13=[/($t8, $t7)], expr#14=[/($t11, $t10)], proj#0..5=[{exprs}], STORE_SALES_QUANTITYCOV=[$t12], AS_STORE_RETURNS_QUANTITYCOUNT=[$t6], AS_STORE_RETURNS_QUANTITYAVE=[$t7], AS_STORE_RETURNS_QUANTITYSTDEV=[$t8], STORE_RETURNS_QUANTITYCOV=[$t13], CATALOG_SALES_QUANTITYCOUNT=[$t9], CATALOG_SALES_QUANTITYAVE=[$t10], CATALOG_SALES_QUANTITYSTDEV=[$t14], CATALOG_SALES_QUANTITYCOV=[$t14]): rowcount = 5.434029018852197E26, cumulative cost = {1.618185849567114E30 rows, 1.2672155671963324E30 cpu, 0.0 io}\n" + " EnumerableSortRel(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC]): rowcount = 5.434029018852197E26, cumulative cost = {1.6176424466652288E30 rows, 1.2509134801397759E30 cpu, 0.0 io}\n" + " EnumerableCalcRel(expr#0..12=[{inputs}], expr#13=[/($t4, $t5)], expr#14=[CAST($t13):JavaType(class java.lang.Integer)], expr#15=[*($t4, $t4)], expr#16=[/($t15, $t5)], expr#17=[-($t6, $t16)], expr#18=[1], expr#19=[=($t5, $t18)], expr#20=[null], expr#21=[-($t5, $t18)], expr#22=[CASE($t19, $t20, $t21)], expr#23=[/($t17, $t22)], expr#24=[0.5], expr#25=[POWER($t23, $t24)], expr#26=[CAST($t25):JavaType(class java.lang.Integer)], expr#27=[/($t8, $t7)], expr#28=[CAST($t27):JavaType(class java.lang.Integer)], expr#29=[*($t8, $t8)], expr#30=[/($t29, $t7)], expr#31=[-($t9, $t30)], expr#32=[=($t7, $t18)], expr#33=[-($t7, $t18)], expr#34=[CASE($t32, $t20, $t33)], expr#35=[/($t31, $t34)], expr#36=[POWER($t35, $t24)], expr#37=[CAST($t36):JavaType(class java.lang.Integer)], expr#38=[/($t11, $t10)], expr#39=[CAST($t38):JavaType(class java.lang.Integer)], expr#40=[*($t11, $t11)], expr#41=[/($t40, $t10)], expr#42=[-($t12, $t41)], expr#43=[=($t10, $t18)], expr#44=[-($t10, $t18)], expr#45=[CASE($t43, $t20, $t44)], expr#46=[/($t42, $t45)], expr#47=[POWER($t46, $t24)], expr#48=[CAST($t47):JavaType(class java.lang.Integer)], proj#0..3=[{exprs}], STORE_SALES_QUANTITYAVE=[$t14], STORE_SALES_QUANTITYSTDEV=[$t26], AS_STORE_RETURNS_QUANTITYCOUNT=[$t7], AS_STORE_RETURNS_QUANTITYAVE=[$t28], AS_STORE_RETURNS_QUANTITYSTDEV=[$t37], CATALOG_SALES_QUANTITYCOUNT=[$t10], CATALOG_SALES_QUANTITYAVE=[$t39], $f11=[$t48]): rowcount = 5.434029018852197E26, cumulative cost = {1.1954863841615548E28 rows, 1.2503700772378907E30 cpu, 0.0 io}\n" + " EnumerableAggregateRel(group=[{0, 1, 2}], STORE_SALES_QUANTITYCOUNT=[COUNT()], agg#1=[SUM($3)], agg#2=[COUNT($3)], agg#3=[SUM($6)], AS_STORE_RETURNS_QUANTITYCOUNT=[COUNT($4)], agg#5=[SUM($4)], agg#6=[SUM($7)], CATALOG_SALES_QUANTITYCOUNT=[COUNT($5)], agg#8=[SUM($5)], agg#9=[SUM($8)]): rowcount = 5.434029018852197E26, cumulative cost = {1.1411460939730328E28 rows, 1.2172225002228922E30 cpu, 0.0 io}\n" + " EnumerableCalcRel(expr#0..211=[{inputs}], expr#212=[*($t89, $t89)], expr#213=[*($t140, $t140)], expr#214=[*($t196, $t196)], I_ITEM_ID=[$t58], I_ITEM_DESC=[$t61], S_STATE=[$t24], SS_QUANTITY=[$t89], SR_RETURN_QUANTITY=[$t140], CS_QUANTITY=[$t196], $f6=[$t212], $f7=[$t213], $f8=[$t214]): rowcount = 5.434029018852197E27, cumulative cost = {1.0868058037845108E28 rows, 1.2172225002228922E30 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[AND(=($82, $133), =($81, $132), =($88, $139))], joinType=[inner]): rowcount = 5.434029018852197E27, cumulative cost = {5.434029018992911E27 rows, 1.8579845E7 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[=($0, $86)], joinType=[inner]): rowcount = 2.3008402586892598E13, cumulative cost = {4.8588854672853766E13 rows, 7354409.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE]]): rowcount = 12.0, cumulative cost = {12.0 rows, 13.0 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[=($0, $50)], joinType=[inner]): rowcount = 1.2782445881607E13, cumulative cost = {1.279800620431234E13 rows, 7354396.0 cpu, 0.0 io}\n" + " EnumerableCalcRel(expr#0..27=[{inputs}], expr#28=[CAST($t15):VARCHAR(6) CHARACTER SET \"ISO-8859-1\" COLLATE \"ISO-8859-1$en_US$primary\"], expr#29=['1998Q1'], expr#30=[=($t28, $t29)], proj#0..27=[{exprs}], $condition=[$t30]): rowcount = 10957.35, cumulative cost = {84006.35 rows, 4455990.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[=($0, $24)], joinType=[inner]): rowcount = 7.7770908E9, cumulative cost = {7.783045975286664E9 rows, 2898406.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, ITEM]]): rowcount = 18000.0, cumulative cost = {18000.0 rows, 18001.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE_SALES]]): rowcount = 2880404.0, cumulative cost = {2880404.0 rows, 2880405.0 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[AND(=($31, $79), =($30, $91))], joinType=[inner]): rowcount = 6.9978029381741304E16, cumulative cost = {6.9978054204658736E16 rows, 1.1225436E7 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[=($0, $28)], joinType=[inner]): rowcount = 7.87597881975E8, cumulative cost = {7.884434222216867E8 rows, 5035701.0 cpu, 0.0 io}\n" + " EnumerableCalcRel(expr#0..27=[{inputs}], expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, STORE_RETURNS]]): rowcount = 287514.0, cumulative cost = {287514.0 rows, 287515.0 cpu, 0.0 io}\n" + " EnumerableJoinRel(condition=[=($0, $28)], joinType=[inner]): rowcount = 3.94888649445E9, cumulative cost = {3.9520401026966867E9 rows, 6189735.0 cpu, 0.0 io}\n" + " EnumerableCalcRel(expr#0..27=[{inputs}], expr#28=['1998Q1'], expr#29=[=($t15, $t28)], expr#30=['1998Q2'], expr#31=[=($t15, $t30)], expr#32=['1998Q3'], expr#33=[=($t15, $t32)], expr#34=[OR($t29, $t31, $t33)], proj#0..27=[{exprs}], $condition=[$t34]): rowcount = 18262.25, cumulative cost = {91311.25 rows, 4748186.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, DATE_DIM]]): rowcount = 73049.0, cumulative cost = {73049.0 rows, 73050.0 cpu, 0.0 io}\n" + " EnumerableTableScan(table=[[TPCDS, CATALOG_SALES]]): rowcount = 1441548.0, cumulative cost = {1441548.0 rows, 1441549.0 cpu, 0.0 io}\n")); } @Test public void testQuery27() { checkQuery(27).runs(); } @Test public void testQuery58() { checkQuery(58).explainContains("PLAN").runs(); } @Ignore("takes too long to optimize") @Test public void testQuery72() { checkQuery(72).runs(); } @Ignore("work in progress") @Test public void testQuery72Plan() { checkQuery(72) .withHook(Hook.PROGRAM, handler(true, 2)) .planContains("xx"); } @Test public void testQuery95() { checkQuery(95) .withHook(Hook.PROGRAM, handler(false, 6)) .runs(); } private CalciteAssert.AssertQuery checkQuery(int i) { final Query query = Query.of(i); String sql = query.sql(new Random(0)); switch (i) { case 58: if (Bug.upgrade("new TPC-DS generator")) { // Work around bug: Support '<DATE> = <character literal>'. sql = sql.replace(" = '", " = DATE '"); } else { // Until TPC-DS generator can handle date(...). sql = sql.replace("'date([YEAR]+\"-01-01\",[YEAR]+\"-07-24\",sales)'", "DATE '1998-08-18'"); } break; case 72: // Work around CALCITE-304: Support '<DATE> + <INTEGER>'. sql = sql.replace("+ 5", "+ interval '5' day"); break; case 95: sql = sql.replace("60 days", "interval '60' day"); sql = sql.replace("d_date between '", "d_date between date '"); break; } return with() .query(sql.replaceAll("tpcds\\.", "tpcds_01.")); } public Frameworks.ConfigBuilder config() throws Exception { final Holder<SchemaPlus> root = Holder.of(null); CalciteAssert.model(TPCDS_MODEL) .doWithConnection( new Function<CalciteConnection, Object>() { public Object apply(CalciteConnection input) { root.set(input.getRootSchema().getSubSchema("TPCDS")); return null; } }); return Frameworks.newConfigBuilder() .parserConfig(SqlParser.Config.DEFAULT) .defaultSchema(root.get()) .traitDefs((List<RelTraitDef>) null) .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); } /** * Builder query 27 using {@link RelBuilder}. * * <blockquote><pre> * select i_item_id, * s_state, grouping(s_state) g_state, * avg(ss_quantity) agg1, * avg(ss_list_price) agg2, * avg(ss_coupon_amt) agg3, * avg(ss_sales_price) agg4 * from store_sales, customer_demographics, date_dim, store, item * where ss_sold_date_sk = d_date_sk and * ss_item_sk = i_item_sk and * ss_store_sk = s_store_sk and * ss_cdemo_sk = cd_demo_sk and * cd_gender = 'dist(gender, 1, 1)' and * cd_marital_status = 'dist(marital_status, 1, 1)' and * cd_education_status = 'dist(education, 1, 1)' and * d_year = 1998 and * s_state in ('distmember(fips_county,[STATENUMBER.1], 3)', * 'distmember(fips_county,[STATENUMBER.2], 3)', * 'distmember(fips_county,[STATENUMBER.3], 3)', * 'distmember(fips_county,[STATENUMBER.4], 3)', * 'distmember(fips_county,[STATENUMBER.5], 3)', * 'distmember(fips_county,[STATENUMBER.6], 3)') * group by rollup (i_item_id, s_state) * order by i_item_id * ,s_state * LIMIT 100 * </pre></blockquote> */ @Test public void testQuery27Builder() throws Exception { final RelBuilder builder = RelBuilder.create(config().build()); final RelNode root = builder.scan("STORE_SALES") .scan("CUSTOMER_DEMOGRAPHICS") .scan("DATE_DIM") .scan("STORE") .scan("ITEM") .join(JoinRelType.INNER) .join(JoinRelType.INNER) .join(JoinRelType.INNER) .join(JoinRelType.INNER) .filter( builder.equals(builder.field("SS_SOLD_DATE_SK"), builder.field("D_DATE_SK")), builder.equals(builder.field("SS_ITEM_SK"), builder.field("I_ITEM_SK")), builder.equals(builder.field("SS_STORE_SK"), builder.field("S_STORE_SK")), builder.equals(builder.field("SS_CDEMO_SK"), builder.field("CD_DEMO_SK")), builder.equals(builder.field("CD_GENDER"), builder.literal("M")), builder.equals(builder.field("CD_MARITAL_STATUS"), builder.literal("S")), builder.equals(builder.field("CD_EDUCATION_STATUS"), builder.literal("HIGH SCHOOL")), builder.equals(builder.field("D_YEAR"), builder.literal(1998)), builder.call(SqlStdOperatorTable.IN, builder.field("S_STATE"), builder.call(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, builder.literal("CA"), builder.literal("OR"), builder.literal("WA"), builder.literal("TX"), builder.literal("OK"), builder.literal("MD")))) .aggregate(builder.groupKey("I_ITEM_ID", "S_STATE"), builder.avg(false, "AGG1", builder.field("SS_QUANTITY")), builder.avg(false, "AGG2", builder.field("SS_LIST_PRICE")), builder.avg(false, "AGG3", builder.field("SS_COUPON_AMT")), builder.avg(false, "AGG4", builder.field("SS_SALES_PRICE"))) .sortLimit(0, 100, builder.field("I_ITEM_ID"), builder.field("S_STATE")) .build(); System.out.println(RelOptUtil.toString(root)); } } // End TpcdsTest.java