/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.cascade;
import java.io.IOException;
import cascading.ClusterTestCase;
import cascading.flow.Flow;
import cascading.flow.FlowConnector;
import cascading.operation.Identity;
import cascading.operation.regex.RegexSplitter;
import cascading.operation.text.FieldJoiner;
import cascading.pipe.CoGroup;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.SequenceFile;
import cascading.scheme.TextLine;
import cascading.tap.Dfs;
import cascading.tap.Tap;
import cascading.tuple.Fields;
public class ParallelCascadeTest extends ClusterTestCase
{
String inputFile = "build/test/data/ips.20.txt";
String outputPath = "build/test/output/parallelcascade/";
public ParallelCascadeTest()
{
super( "parallel cascade tests", true );
}
private Flow firstFlow( String name )
{
Tap source = new Dfs( new TextLine( new Fields( "offset", "line" ) ), inputFile );
Pipe pipe = new Pipe( name );
pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( "ip" ) ), new Fields( "ip" ) );
Tap sink = new Dfs( new SequenceFile( new Fields( "ip" ) ), outputPath + "/" + name, true );
return new FlowConnector( getProperties() ).connect( source, sink, pipe );
}
private Flow secondFlow( String name, Tap source )
{
Pipe pipe = new Pipe( name );
pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third", "fourth" ), "\\." ) );
pipe = new Each( pipe, new FieldJoiner( new Fields( "mangled" ), "-" ) );
Tap sink = new Dfs( new SequenceFile( new Fields( "mangled" ) ), outputPath + "/" + name, true );
return new FlowConnector( getProperties() ).connect( source, sink, pipe );
}
private Flow thirdFlow( Tap lhs, Tap rhs )
{
Pipe lhsPipe = new Pipe( "lhs" );
Pipe rhsPipe = new Pipe( "rhs" );
Pipe pipe = new CoGroup( lhsPipe, new Fields( 0 ), rhsPipe, new Fields( 0 ), Fields.size( 2 ) );
Tap sink = new Dfs( new TextLine(), outputPath + "/fourth", true );
return new FlowConnector( getProperties() ).connect( Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ), sink, pipe );
}
public void testCascade() throws IOException
{
copyFromLocal( inputFile );
Flow first1 = firstFlow( "first1" );
Flow second1 = secondFlow( "second1", first1.getSink() );
Flow first2 = firstFlow( "first2" );
Flow second2 = secondFlow( "second2", first2.getSink() );
Flow third = thirdFlow( second1.getSink(), second2.getSink() );
Cascade cascade = new CascadeConnector().connect( first1, second1, first2, second2, third );
cascade.start();
cascade.complete();
validateLength( third, 28 );
}
}