/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.test.iterative; import java.io.BufferedReader; import java.io.Serializable; import eu.stratosphere.api.common.Plan; import eu.stratosphere.api.java.record.operators.DeltaIteration; import eu.stratosphere.api.java.record.operators.FileDataSink; import eu.stratosphere.api.java.record.operators.FileDataSource; import eu.stratosphere.api.java.record.functions.FunctionAnnotation.ConstantFieldsSecondExcept; import eu.stratosphere.api.java.record.functions.JoinFunction; import eu.stratosphere.api.java.record.io.CsvInputFormat; import eu.stratosphere.api.java.record.io.CsvOutputFormat; import eu.stratosphere.api.java.record.operators.JoinOperator; import eu.stratosphere.api.java.record.operators.MapOperator; import eu.stratosphere.api.java.record.operators.ReduceOperator; import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.DuplicateLongMap; import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.MinimumComponentIDReduce; import eu.stratosphere.test.recordJobs.graph.WorksetConnectedComponents.NeighborWithComponentIDJoin; import eu.stratosphere.test.testdata.ConnectedComponentsData; import eu.stratosphere.test.util.RecordAPITestBase; import eu.stratosphere.types.LongValue; import eu.stratosphere.types.Record; import eu.stratosphere.util.Collector; /** * Tests a bug that prevented that the solution set can be on both sides of the match/cogroup function. */ public class ConnectedComponentsWithSolutionSetFirstITCase extends RecordAPITestBase { private static final long SEED = 0xBADC0FFEEBEEFL; private static final int NUM_VERTICES = 1000; private static final int NUM_EDGES = 10000; protected String verticesPath; protected String edgesPath; protected String resultPath; @Override protected void preSubmit() throws Exception { verticesPath = createTempFile("vertices.txt", ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES)); edgesPath = createTempFile("edges.txt", ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED)); resultPath = getTempFilePath("results"); } @Override protected Plan getTestJob() { return getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput(4, verticesPath, edgesPath, resultPath, 100); } @Override protected void postSubmit() throws Exception { for (BufferedReader reader : getResultReader(resultPath)) { ConnectedComponentsData.checkOddEvenResult(reader); } } // -------------------------------------------------------------------------------------------- // Classes and methods for the test program // -------------------------------------------------------------------------------------------- @ConstantFieldsSecondExcept({}) public static final class UpdateComponentIdMatchMirrored extends JoinFunction implements Serializable { private static final long serialVersionUID = 1L; @Override public void join(Record currentVertexWithComponent, Record newVertexWithComponent, Collector<Record> out){ long candidateComponentID = newVertexWithComponent.getField(1, LongValue.class).getValue(); long currentComponentID = currentVertexWithComponent.getField(1, LongValue.class).getValue(); if (candidateComponentID < currentComponentID) { out.collect(newVertexWithComponent); } } } @SuppressWarnings("unchecked") private static Plan getPlanForWorksetConnectedComponentsWithSolutionSetAsFirstInput( int numSubTasks, String verticesInput, String edgeInput, String output, int maxIterations) { // data source for initial vertices FileDataSource initialVertices = new FileDataSource(new CsvInputFormat(' ', LongValue.class), verticesInput, "Vertices"); MapOperator verticesWithId = MapOperator.builder(DuplicateLongMap.class).input(initialVertices).name("Assign Vertex Ids").build(); DeltaIteration iteration = new DeltaIteration(0, "Connected Components Iteration"); iteration.setInitialSolutionSet(verticesWithId); iteration.setInitialWorkset(verticesWithId); iteration.setMaximumNumberOfIterations(maxIterations); // create DataSourceContract for the edges FileDataSource edges = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class), edgeInput, "Edges"); // create CrossOperator for distance computation JoinOperator joinWithNeighbors = JoinOperator.builder(new NeighborWithComponentIDJoin(), LongValue.class, 0, 0) .input1(iteration.getWorkset()) .input2(edges) .name("Join Candidate Id With Neighbor") .build(); // create ReduceOperator for finding the nearest cluster centers ReduceOperator minCandidateId = ReduceOperator.builder(new MinimumComponentIDReduce(), LongValue.class, 0) .input(joinWithNeighbors) .name("Find Minimum Candidate Id") .build(); // create CrossOperator for distance computation JoinOperator updateComponentId = JoinOperator.builder(new UpdateComponentIdMatchMirrored(), LongValue.class, 0, 0) .input1(iteration.getSolutionSet()) .input2(minCandidateId) .name("Update Component Id") .build(); iteration.setNextWorkset(updateComponentId); iteration.setSolutionSetDelta(updateComponentId); // create DataSinkContract for writing the new cluster positions FileDataSink result = new FileDataSink(new CsvOutputFormat(), output, iteration, "Result"); CsvOutputFormat.configureRecordFormat(result) .recordDelimiter('\n') .fieldDelimiter(' ') .field(LongValue.class, 0) .field(LongValue.class, 1); // return the PACT plan Plan plan = new Plan(result, "Workset Connected Components"); plan.setDefaultParallelism(numSubTasks); return plan; } }