/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.test.recordJobs.relational; import java.io.IOException; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.util.Iterator; import eu.stratosphere.api.common.Plan; import eu.stratosphere.api.common.Program; import eu.stratosphere.api.common.ProgramDescription; import eu.stratosphere.api.java.record.operators.FileDataSink; import eu.stratosphere.api.java.record.operators.FileDataSource; import eu.stratosphere.api.java.record.functions.JoinFunction; import eu.stratosphere.api.java.record.functions.MapFunction; import eu.stratosphere.api.java.record.functions.ReduceFunction; import eu.stratosphere.api.java.record.io.FileOutputFormat; import eu.stratosphere.api.java.record.operators.JoinOperator; import eu.stratosphere.api.java.record.operators.MapOperator; import eu.stratosphere.api.java.record.operators.ReduceOperator; import eu.stratosphere.test.recordJobs.util.IntTupleDataInFormat; import eu.stratosphere.test.recordJobs.util.Tuple; import eu.stratosphere.types.DoubleValue; import eu.stratosphere.types.IntValue; import eu.stratosphere.types.Record; import eu.stratosphere.types.StringValue; import eu.stratosphere.util.Collector; @SuppressWarnings("serial") public class TPCHQuery10 implements Program, ProgramDescription { // -------------------------------------------------------------------------------------------- // Local Filters and Projections // -------------------------------------------------------------------------------------------- /** * Forwards (0 = orderkey, 1 = custkey). */ public static class FilterO extends MapFunction { private static final int YEAR_FILTER = 1990; private final IntValue custKey = new IntValue(); @Override public void map(Record record, Collector<Record> out) throws Exception { Tuple t = record.getField(1, Tuple.class); if (Integer.parseInt(t.getStringValueAt(4).substring(0, 4)) > FilterO.YEAR_FILTER) { // project this.custKey.setValue((int) t.getLongValueAt(1)); record.setField(1, this.custKey); out.collect(record); } } } /** * Forwards (0 = lineitem, 1 = tuple (extendedprice, discount) ) */ public static class FilterLI extends MapFunction { private final Tuple tuple = new Tuple(); @Override public void map(Record record, Collector<Record> out) throws Exception { Tuple t = record.getField(1, this.tuple); if (t.getStringValueAt(8).equals("R")) { t.project(0x60); // l_extendedprice, l_discount record.setField(1, t); out.collect(record); } } } /** * Returns (0 = custkey, 1 = custName, 2 = NULL, 3 = balance, 4 = nationkey, 5 = address, 6 = phone, 7 = comment) */ public static class ProjectC extends MapFunction { private final Tuple tuple = new Tuple(); private final StringValue custName = new StringValue(); private final StringValue balance = new StringValue(); private final IntValue nationKey = new IntValue(); private final StringValue address = new StringValue(); private final StringValue phone = new StringValue(); private final StringValue comment = new StringValue(); @Override public void map(Record record, Collector<Record> out) throws Exception { final Tuple t = record.getField(1, this.tuple); this.custName.setValue(t.getStringValueAt(1)); this.address.setValue(t.getStringValueAt(2)); this.nationKey.setValue((int) t.getLongValueAt(3)); this.phone.setValue(t.getStringValueAt(4)); this.balance.setValue(t.getStringValueAt(5)); this.comment.setValue(t.getStringValueAt(7)); record.setField(1, this.custName); record.setField(3, this.balance); record.setField(4, this.nationKey); record.setField(5, this.address); record.setField(6, this.phone); record.setField(7, this.comment); out.collect(record); } } /** * Returns (0 = nationkey, 1 = nation_name) */ public static class ProjectN extends MapFunction { private final Tuple tuple = new Tuple(); private final StringValue nationName = new StringValue(); @Override public void map(Record record, Collector<Record> out) throws Exception { final Tuple t = record.getField(1, this.tuple); this.nationName.setValue(t.getStringValueAt(1)); record.setField(1, this.nationName); out.collect(record); } } // -------------------------------------------------------------------------------------------- // Joins // -------------------------------------------------------------------------------------------- /** * Returns (0 = custKey, 1 = tuple (extendedprice, discount) ) */ public static class JoinOL extends JoinFunction { @Override public void join(Record order, Record lineitem, Collector<Record> out) throws Exception { lineitem.setField(0, order.getField(1, IntValue.class)); out.collect(lineitem); } } /** * Returns (0 = custkey, 1 = custName, 2 = extPrice * (1-discount), 3 = balance, 4 = nationkey, 5 = address, 6 = phone, 7 = comment) */ public static class JoinCOL extends JoinFunction { private final DoubleValue d = new DoubleValue(); @Override public void join(Record custRecord, Record olRecord, Collector<Record> out) throws Exception { final Tuple t = olRecord.getField(1, Tuple.class); final double extPrice = Double.parseDouble(t.getStringValueAt(0)); final double discount = Double.parseDouble(t.getStringValueAt(1)); this.d.setValue(extPrice * (1 - discount)); custRecord.setField(2, this.d); out.collect(custRecord); } } /** * Returns (0 = custkey, 1 = custName, 2 = extPrice * (1-discount), 3 = balance, 4 = nationName, 5 = address, 6 = phone, 7 = comment) */ public static class JoinNCOL extends JoinFunction { @Override public void join(Record colRecord, Record nation, Collector<Record> out) throws Exception { colRecord.setField(4, nation.getField(1, StringValue.class)); out.collect(colRecord); } } @ReduceOperator.Combinable public static class Sum extends ReduceFunction { private final DoubleValue d = new DoubleValue(); @Override public void reduce(Iterator<Record> records, Collector<Record> out) throws Exception { Record record = null; double sum = 0; while (records.hasNext()) { record = records.next(); sum += record.getField(2, DoubleValue.class).getValue(); } this.d.setValue(sum); record.setField(2, this.d); out.collect(record); } @Override public void combine(Iterator<Record> records, Collector<Record> out) throws Exception { reduce(records,out); } } public static class TupleOutputFormat extends FileOutputFormat { private static final long serialVersionUID = 1L; private final DecimalFormat formatter; private final StringBuilder buffer = new StringBuilder(); public TupleOutputFormat() { DecimalFormatSymbols decimalFormatSymbol = new DecimalFormatSymbols(); decimalFormatSymbol.setDecimalSeparator('.'); this.formatter = new DecimalFormat("#.####"); this.formatter.setDecimalFormatSymbols(decimalFormatSymbol); } @Override public void writeRecord(Record record) throws IOException { this.buffer.setLength(0); this.buffer.append(record.getField(0, IntValue.class).toString()).append('|'); this.buffer.append(record.getField(1, StringValue.class).toString()).append('|'); this.buffer.append(this.formatter.format(record.getField(2, DoubleValue.class).getValue())).append('|'); this.buffer.append(record.getField(3, StringValue.class).toString()).append('|'); this.buffer.append(record.getField(4, StringValue.class).toString()).append('|'); this.buffer.append(record.getField(5, StringValue.class).toString()).append('|'); this.buffer.append(record.getField(6, StringValue.class).toString()).append('|'); this.buffer.append(record.getField(7, StringValue.class).toString()).append('|'); this.buffer.append('\n'); final byte[] bytes = this.buffer.toString().getBytes(); this.stream.write(bytes); } } /* * (non-Javadoc) * * @see eu.stratosphere.pact.common.plan.PlanAssemblerDescription#getDescription() */ @Override public String getDescription() { return "TPC-H Query 10"; } /* * (non-Javadoc) * * @see eu.stratosphere.pact.common.plan.PlanAssembler#getPlan(java.lang.String[]) */ @Override public Plan getPlan(String... args) throws IllegalArgumentException { final String ordersPath; final String lineitemsPath; final String customersPath; final String nationsPath; final String resultPath; final int degreeOfParallelism; if (args.length < 6) { throw new IllegalArgumentException("Invalid number of parameters"); } else { degreeOfParallelism = Integer.parseInt(args[0]); ordersPath = args[1]; lineitemsPath = args[2]; customersPath = args[3]; nationsPath = args[4]; resultPath = args[5]; } FileDataSource orders = new FileDataSource(new IntTupleDataInFormat(), ordersPath, "Orders"); // orders.setOutputContract(UniqueKey.class); // orders.getCompilerHints().setAvgNumValuesPerKey(1); FileDataSource lineitems = new FileDataSource(new IntTupleDataInFormat(), lineitemsPath, "LineItems"); // lineitems.getCompilerHints().setAvgNumValuesPerKey(4); FileDataSource customers = new FileDataSource(new IntTupleDataInFormat(), customersPath, "Customers"); FileDataSource nations = new FileDataSource(new IntTupleDataInFormat(), nationsPath, "Nations"); MapOperator mapO = MapOperator.builder(FilterO.class) .name("FilterO") .build(); MapOperator mapLi = MapOperator.builder(FilterLI.class) .name("FilterLi") .build(); MapOperator projectC = MapOperator.builder(ProjectC.class) .name("ProjectC") .build(); MapOperator projectN = MapOperator.builder(ProjectN.class) .name("ProjectN") .build(); JoinOperator joinOL = JoinOperator.builder(JoinOL.class, IntValue.class, 0, 0) .name("JoinOL") .build(); JoinOperator joinCOL = JoinOperator.builder(JoinCOL.class, IntValue.class, 0, 0) .name("JoinCOL") .build(); JoinOperator joinNCOL = JoinOperator.builder(JoinNCOL.class, IntValue.class, 4, 0) .name("JoinNCOL") .build(); ReduceOperator reduce = ReduceOperator.builder(Sum.class) .keyField(IntValue.class, 0) .keyField(StringValue.class, 1) .keyField(StringValue.class, 3) .keyField(StringValue.class, 4) .keyField(StringValue.class, 5) .keyField(StringValue.class, 6) .keyField(StringValue.class, 7) .name("Reduce") .build(); FileDataSink result = new FileDataSink(new TupleOutputFormat(), resultPath, "Output"); result.setInput(reduce); reduce.setInput(joinNCOL); joinNCOL.setFirstInput(joinCOL); joinNCOL.setSecondInput(projectN); joinCOL.setFirstInput(projectC); joinCOL.setSecondInput(joinOL); joinOL.setFirstInput(mapO); joinOL.setSecondInput(mapLi); projectC.setInput(customers); projectN.setInput(nations); mapLi.setInput(lineitems); mapO.setInput(orders); // return the PACT plan Plan p = new Plan(result, "TPCH Q10"); p.setDefaultParallelism(degreeOfParallelism); return p; } }