/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading; import java.io.File; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.operation.Insert; import cascading.operation.regex.RegexSplitter; import cascading.pipe.Each; import cascading.pipe.Every; import cascading.pipe.GroupBy; import cascading.pipe.Pipe; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tap.Tap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntryIterator; public class BufferPipesTest extends ClusterTestCase { String inputFileApache = "build/test/data/apache.10.txt"; String inputFileIps = "build/test/data/ips.20.txt"; String inputFileNums20 = "build/test/data/nums.20.txt"; String inputFileNums10 = "build/test/data/nums.10.txt"; String inputFileCritics = "build/test/data/critics.txt"; String inputFileUpper = "build/test/data/upper.txt"; String inputFileLower = "build/test/data/lower.txt"; String inputFileLowerOffset = "build/test/data/lower-offset.txt"; String inputFileJoined = "build/test/data/lower+upper.txt"; String inputFileLhs = "build/test/data/lhs.txt"; String inputFileRhs = "build/test/data/rhs.txt"; String inputFileCross = "build/test/data/lhs+rhs-cross.txt"; String outputPath = "build/test/output/buffer/"; public BufferPipesTest() { super( "buffer pipes", false ); // no need for clustering } public void testSimpleBuffer() throws Exception { if( !new File( inputFileLhs ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileLhs ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLhs ); Tap sink = new Hfs( new TextLine(), outputPath + "/simple", true ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower" ), "\\s" ) ); pipe = new GroupBy( pipe, new Fields( "num" ) ); pipe = new Every( pipe, new TestBuffer( new Fields( "next" ), 2, true, true, "next" ) ); pipe = new Each( pipe, new Insert( new Fields( "final" ), "final" ), Fields.ALL ); Flow flow = new FlowConnector( getProperties() ).connect( source, sink, pipe ); // flow.writeDOT( "unknownselect.dot" ); flow.complete(); validateLength( flow, 23, null ); TupleEntryIterator iterator = flow.openSink(); assertEquals( "not equal: tuple.get(1)", "1\tnull\tnext\tfinal", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "1\ta\tnext\tfinal", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "1\tb\tnext\tfinal", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "1\tc\tnext\tfinal", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "1\tnull\tnext\tfinal", iterator.next().get( 1 ) ); iterator.close(); } public void testSimpleBuffer2() throws Exception { if( !new File( inputFileLhs ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileLhs ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileLhs ); Tap sink = new Hfs( new TextLine(), outputPath + "/simple2", true ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower" ), "\\s" ) ); pipe = new GroupBy( pipe, new Fields( "num" ) ); pipe = new Every( pipe, new Fields( "lower" ), new TestBuffer( new Fields( "next" ), 1, true, "next" ), Fields.RESULTS ); pipe = new Each( pipe, new Insert( new Fields( "final" ), "final" ), Fields.ALL ); Flow flow = new FlowConnector( getProperties() ).connect( source, sink, pipe ); // flow.writeDOT( "unknownselect.dot" ); flow.complete(); validateLength( flow, 18, null ); TupleEntryIterator iterator = flow.openSink(); Comparable line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "next\tfinal", line ); line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "next\tfinal", line ); line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "next\tfinal", line ); iterator.close(); } public void testSimpleBuffer3() throws Exception { if( !new File( inputFileJoined ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileJoined ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileJoined ); Tap sink = new Hfs( new TextLine(), outputPath + "/simple3", true ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ), "\\s" ) ); pipe = new GroupBy( pipe, new Fields( "num" ) ); pipe = new Every( pipe, new TestBuffer( new Fields( "new" ), new Tuple( "new" ) ), new Fields( "new", "lower", "upper" ) ); Flow flow = new FlowConnector( getProperties() ).connect( source, sink, pipe ); // flow.writeDOT( "unknownselect.dot" ); flow.complete(); validateLength( flow, 5, null ); TupleEntryIterator iterator = flow.openSink(); Comparable line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "new\ta\tA", line ); line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "new\tb\tB", line ); line = iterator.next().get( 1 ); assertEquals( "not equal: tuple.get(1)", "new\tc\tC", line ); iterator.close(); } }