/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.test.recordJobs.relational; import java.util.Iterator; import eu.stratosphere.api.common.Plan; import eu.stratosphere.api.common.Program; import eu.stratosphere.api.java.record.operators.FileDataSink; import eu.stratosphere.api.java.record.operators.FileDataSource; import eu.stratosphere.api.java.record.functions.JoinFunction; import eu.stratosphere.api.java.record.functions.ReduceFunction; import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFieldsExcept; import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFieldsFirstExcept; import eu.stratosphere.api.java.record.io.CsvInputFormat; import eu.stratosphere.api.java.record.io.CsvOutputFormat; import eu.stratosphere.api.java.record.operators.JoinOperator; import eu.stratosphere.api.java.record.operators.ReduceOperator; import eu.stratosphere.types.IntValue; import eu.stratosphere.types.Record; import eu.stratosphere.util.Collector; public class MergeOnlyJoin implements Program { private static final long serialVersionUID = 1L; @ConstantFieldsFirstExcept(2) public static class JoinInputs extends JoinFunction { private static final long serialVersionUID = 1L; @Override public void join(Record input1, Record input2, Collector<Record> out) { input1.setField(2, input2.getField(1, IntValue.class)); out.collect(input1); } } @ConstantFieldsExcept({}) public static class DummyReduce extends ReduceFunction { private static final long serialVersionUID = 1L; @Override public void reduce(Iterator<Record> values, Collector<Record> out) { while (values.hasNext()) { out.collect(values.next()); } } } @Override public Plan getPlan(final String... args) { // parse program parameters int numSubtasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String input1Path = (args.length > 1 ? args[1] : ""); String input2Path = (args.length > 2 ? args[2] : ""); String output = (args.length > 3 ? args[3] : ""); int numSubtasksInput2 = (args.length > 4 ? Integer.parseInt(args[4]) : 1); // create DataSourceContract for Orders input @SuppressWarnings("unchecked") CsvInputFormat format1 = new CsvInputFormat('|', IntValue.class, IntValue.class); FileDataSource input1 = new FileDataSource(format1, input1Path, "Input 1"); ReduceOperator aggInput1 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0) .input(input1) .name("AggOrders") .build(); // create DataSourceContract for Orders input @SuppressWarnings("unchecked") CsvInputFormat format2 = new CsvInputFormat('|', IntValue.class, IntValue.class); FileDataSource input2 = new FileDataSource(format2, input2Path, "Input 2"); input2.setDegreeOfParallelism(numSubtasksInput2); ReduceOperator aggInput2 = ReduceOperator.builder(DummyReduce.class, IntValue.class, 0) .input(input2) .name("AggLines") .build(); aggInput2.setDegreeOfParallelism(numSubtasksInput2); // create JoinOperator for joining Orders and LineItems JoinOperator joinLiO = JoinOperator.builder(JoinInputs.class, IntValue.class, 0, 0) .input1(aggInput1) .input2(aggInput2) .name("JoinLiO") .build(); // create DataSinkContract for writing the result FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, joinLiO, "Output"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n') .fieldDelimiter('|') .lenient(true) .field(IntValue.class, 0) .field(IntValue.class, 1) .field(IntValue.class, 2); // assemble the PACT plan Plan plan = new Plan(result, "Merge Only Join"); plan.setDefaultParallelism(numSubtasks); return plan; } }