/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.test.recordJobs.relational;
import org.apache.log4j.Logger;
import eu.stratosphere.api.common.Plan;
import eu.stratosphere.api.common.Program;
import eu.stratosphere.api.common.ProgramDescription;
import eu.stratosphere.api.java.record.operators.FileDataSink;
import eu.stratosphere.api.java.record.operators.FileDataSource;
import eu.stratosphere.api.java.record.operators.JoinOperator;
import eu.stratosphere.api.java.record.operators.MapOperator;
import eu.stratosphere.api.java.record.operators.ReduceOperator;
import eu.stratosphere.test.recordJobs.relational.query9Util.AmountAggregate;
import eu.stratosphere.test.recordJobs.relational.query9Util.FilteredPartsJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.IntPair;
import eu.stratosphere.test.recordJobs.relational.query9Util.LineItemMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.OrderMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.OrderedPartsJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartFilter;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartListJoin;
import eu.stratosphere.test.recordJobs.relational.query9Util.PartsuppMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.StringIntPair;
import eu.stratosphere.test.recordJobs.relational.query9Util.StringIntPairStringDataOutFormat;
import eu.stratosphere.test.recordJobs.relational.query9Util.SupplierMap;
import eu.stratosphere.test.recordJobs.relational.query9Util.SuppliersJoin;
import eu.stratosphere.test.recordJobs.util.IntTupleDataInFormat;
import eu.stratosphere.types.IntValue;
/**
* Quote from the TPC-H homepage:
* "The TPC-H is a decision support benchmark on relational data.
* Its documentation and the data generator (DBGEN) can be found
* on http://www.tpc.org/tpch/ .This implementation is tested with
* the DB2 data format."
* This PACT program implements the query 9 of the TPC-H benchmark:
*
* <pre>
* select nation, o_year, sum(amount) as sum_profit
* from (
* select n_name as nation, extract(year from o_orderdate) as o_year,
* l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
* from part, supplier, lineitem, partsupp, orders, nation
* where
* s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey
* and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey
* and p_name like '%[COLOR]%'
* ) as profit
* group by nation, o_year
* order by nation, o_year desc;
* </pre>
*
* Plan:<br>
* Match "part" and "partsupp" on "partkey" -> "parts" with (partkey, suppkey) as key
* Match "orders" and "lineitem" on "orderkey" -> "ordered_parts" with (partkey, suppkey) as key
* Match "parts" and "ordered_parts" on (partkey, suppkey) -> "filtered_parts" with "suppkey" as key
* Match "supplier" and "nation" on "nationkey" -> "suppliers" with "suppkey" as key
* Match "filtered_parts" and "suppliers" on" suppkey" -> "partlist" with (nation, o_year) as key
* Group "partlist" by (nation, o_year), calculate sum(amount)
*
* <b>Attention:</b> The "order by" part is not implemented!
*
*/
@SuppressWarnings("serial")
public class TPCHQuery9 implements Program, ProgramDescription {
public final String ARGUMENTS = "dop partInputPath partSuppInputPath ordersInputPath lineItemInputPath supplierInputPath nationInputPath outputPath";
private static Logger LOGGER = Logger.getLogger(TPCHQuery9.class);
private int degreeOfParallelism = 1;
private String partInputPath, partSuppInputPath, ordersInputPath, lineItemInputPath, supplierInputPath,
nationInputPath;
private String outputPath;
@Override
public Plan getPlan(String... args) throws IllegalArgumentException {
if (args.length != 8)
{
LOGGER.warn("number of arguments do not match!");
this.degreeOfParallelism = 1;
this.partInputPath = "";
this.partSuppInputPath = "";
this.ordersInputPath = "";
this.lineItemInputPath = "";
this.supplierInputPath = "";
this.nationInputPath = "";
this.outputPath = "";
}else
{
this.degreeOfParallelism = Integer.parseInt(args[0]);
this.partInputPath = args[1];
this.partSuppInputPath = args[2];
this.ordersInputPath = args[3];
this.lineItemInputPath = args[4];
this.supplierInputPath = args[5];
this.nationInputPath = args[6];
this.outputPath = args[7];
}
/* Create the 6 data sources: */
/* part: (partkey | name, mfgr, brand, type, size, container, retailprice, comment) */
FileDataSource partInput = new FileDataSource(
new IntTupleDataInFormat(), this.partInputPath, "\"part\" source");
//partInput.setOutputContract(UniqueKey.class);
// partInput.getCompilerHints().setAvgNumValuesPerKey(1);
/* partsupp: (partkey | suppkey, availqty, supplycost, comment) */
FileDataSource partSuppInput = new FileDataSource(
new IntTupleDataInFormat(), this.partSuppInputPath, "\"partsupp\" source");
/* orders: (orderkey | custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment) */
FileDataSource ordersInput = new FileDataSource(
new IntTupleDataInFormat(), this.ordersInputPath, "\"orders\" source");
//ordersInput.setOutputContract(UniqueKey.class);
// ordersInput.getCompilerHints().setAvgNumValuesPerKey(1);
/* lineitem: (orderkey | partkey, suppkey, linenumber, quantity, extendedprice, discount, tax, ...) */
FileDataSource lineItemInput = new FileDataSource(
new IntTupleDataInFormat(), this.lineItemInputPath, "\"lineitem\" source");
/* supplier: (suppkey | name, address, nationkey, phone, acctbal, comment) */
FileDataSource supplierInput = new FileDataSource(
new IntTupleDataInFormat(), this.supplierInputPath, "\"supplier\" source");
//supplierInput.setOutputContract(UniqueKey.class);
// supplierInput.getCompilerHints().setAvgNumValuesPerKey(1);
/* nation: (nationkey | name, regionkey, comment) */
FileDataSource nationInput = new FileDataSource(
new IntTupleDataInFormat(), this.nationInputPath, "\"nation\" source");
//nationInput.setOutputContract(UniqueKey.class);
// nationInput.getCompilerHints().setAvgNumValuesPerKey(1);
/* Filter on part's name, project values to NULL: */
MapOperator filterPart = MapOperator.builder(PartFilter.class)
.name("filterParts")
.build();
/* Map to change the key element of partsupp, project value to (supplycost, suppkey): */
MapOperator mapPartsupp = MapOperator.builder(PartsuppMap.class)
.name("mapPartsupp")
.build();
/* Map to extract the year from order: */
MapOperator mapOrder = MapOperator.builder(OrderMap.class)
.name("mapOrder")
.build();
/* Project value to (partkey, suppkey, quantity, price = extendedprice*(1-discount)): */
MapOperator mapLineItem = MapOperator.builder(LineItemMap.class)
.name("proj.Partsupp")
.build();
/* - change the key of supplier to nationkey, project value to suppkey */
MapOperator mapSupplier = MapOperator.builder(SupplierMap.class)
.name("proj.Partsupp")
.build();
/* Equijoin on partkey of part and partsupp: */
JoinOperator partsJoin = JoinOperator.builder(PartJoin.class, IntValue.class, 0, 0)
.name("partsJoin")
.build();
/* Equijoin on orderkey of orders and lineitem: */
JoinOperator orderedPartsJoin =
JoinOperator.builder(OrderedPartsJoin.class, IntValue.class, 0, 0)
.name("orderedPartsJoin")
.build();
/* Equijoin on nationkey of supplier and nation: */
JoinOperator suppliersJoin =
JoinOperator.builder(SuppliersJoin.class, IntValue.class, 0, 0)
.name("suppliersJoin")
.build();
/* Equijoin on (partkey,suppkey) of parts and orderedParts: */
JoinOperator filteredPartsJoin =
JoinOperator.builder(FilteredPartsJoin.class, IntPair.class, 0, 0)
.name("filteredPartsJoin")
.build();
/* Equijoin on suppkey of filteredParts and suppliers: */
JoinOperator partListJoin =
JoinOperator.builder(PartListJoin.class, IntValue.class , 0, 0)
.name("partlistJoin")
.build();
/* Aggregate sum(amount) by (nation,year): */
ReduceOperator sumAmountAggregate =
ReduceOperator.builder(AmountAggregate.class, StringIntPair.class, 0)
.name("groupyBy")
.build();
/* Connect input filters: */
filterPart.setInput(partInput);
mapPartsupp.setInput(partSuppInput);
mapOrder.setInput(ordersInput);
mapLineItem.setInput(lineItemInput);
mapSupplier.setInput(supplierInput);
/* Connect equijoins: */
partsJoin.setFirstInput(filterPart);
partsJoin.setSecondInput(mapPartsupp);
orderedPartsJoin.setFirstInput(mapOrder);
orderedPartsJoin.setSecondInput(mapLineItem);
suppliersJoin.setFirstInput(mapSupplier);
suppliersJoin.setSecondInput(nationInput);
filteredPartsJoin.setFirstInput(partsJoin);
filteredPartsJoin.setSecondInput(orderedPartsJoin);
partListJoin.setFirstInput(filteredPartsJoin);
partListJoin.setSecondInput(suppliersJoin);
/* Connect aggregate: */
sumAmountAggregate.setInput(partListJoin);
/* Connect sink: */
FileDataSink result = new FileDataSink(new StringIntPairStringDataOutFormat(), this.outputPath, "Results sink");
result.setInput(sumAmountAggregate);
Plan p = new Plan(result, "TPC-H query 9");
p.setDefaultParallelism(this.degreeOfParallelism);
return p;
}
@Override
public String getDescription() {
return "TPC-H query 9, parameters: " + this.ARGUMENTS;
}
}