/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.function; import java.io.File; import java.io.IOException; import java.util.Iterator; import cascading.CascadingTestCase; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.operation.Function; import cascading.operation.Insert; import cascading.operation.function.SetValue; import cascading.operation.regex.RegexFilter; import cascading.operation.regex.RegexSplitter; import cascading.operation.text.FieldFormatter; import cascading.pipe.Each; import cascading.pipe.GroupBy; import cascading.pipe.Pipe; import cascading.pipe.assembly.AggregateBy; import cascading.pipe.assembly.CountBy; import cascading.pipe.assembly.SumBy; import cascading.scheme.TextLine; import cascading.tap.Lfs; import cascading.tap.Tap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryIterator; import cascading.tuple.TupleListCollector; /** * */ public class FunctionTest extends CascadingTestCase { String inputFileApache = "build/test/data/apache.200.txt"; String inputFileUpper = "build/test/data/upper.txt"; String outputPath = "build/test/output/function/"; public FunctionTest() { super( "function tests" ); } public void testInsert() throws IOException { if( !new File( inputFileApache ).exists() ) fail( "data file not found" ); Tap source = new Lfs( new TextLine(), inputFileApache ); Tap sink = new Lfs( new TextLine(), outputPath + "insert", true ); Pipe pipe = new Pipe( "apache" ); pipe = new Each( pipe, new Insert( new Fields( "A", "B" ), "a", "b" ) ); pipe = new GroupBy( pipe, new Fields( "A" ) ); Flow flow = new FlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 200 ); TupleEntryIterator iterator = flow.openSink(); assertEquals( "not equal: tuple.get(1)", "a\tb", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "a\tb", iterator.next().get( 1 ) ); } public void testFieldFormatter() throws IOException { if( !new File( inputFileUpper ).exists() ) fail( "data file not found" ); Tap source = new Lfs( new TextLine(), inputFileUpper ); Tap sink = new Lfs( new TextLine(), outputPath + "formatter", true ); Pipe pipe = new Pipe( "formatter" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "a", "b" ), "\\s" ) ); pipe = new Each( pipe, new FieldFormatter( new Fields( "result" ), "%s and %s" ) ); Flow flow = new FlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5 ); TupleEntryIterator iterator = flow.openSink(); assertEquals( "not equal: tuple.get(1)", "1 and A", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "2 and B", iterator.next().get( 1 ) ); } public void testSetValue() throws IOException { if( !new File( inputFileUpper ).exists() ) fail( "data file not found" ); Tap source = new Lfs( new TextLine(), inputFileUpper ); Tap sink = new Lfs( new TextLine(), outputPath + "setvalue", true ); Pipe pipe = new Pipe( "setvalue" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "char" ), "\\s" ) ); pipe = new Each( pipe, new SetValue( new Fields( "result" ), new RegexFilter( "[A-C]" ) ) ); Flow flow = new FlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5 ); TupleEntryIterator iterator = flow.openSink(); assertEquals( "not equal: tuple.get(1)", "true", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "true", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "true", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "false", iterator.next().get( 1 ) ); assertEquals( "not equal: tuple.get(1)", "false", iterator.next().get( 1 ) ); } public void testPartialCounts() { Function function = new AggregateBy.CompositeFunction( new Fields( "value" ), Fields.ALL, new CountBy.CountPartials( new Fields( "count" ) ), 2 ); Fields incoming = new Fields( "value" ); TupleEntry[] tuples = new TupleEntry[]{ new TupleEntry( incoming, new Tuple( "a" ) ), new TupleEntry( incoming, new Tuple( "a" ) ), new TupleEntry( incoming, new Tuple( "b" ) ), new TupleEntry( incoming, new Tuple( "b" ) ), new TupleEntry( incoming, new Tuple( "c" ) ), new TupleEntry( incoming, new Tuple( "c" ) ), new TupleEntry( incoming, new Tuple( "a" ) ), new TupleEntry( incoming, new Tuple( "a" ) ), new TupleEntry( incoming, new Tuple( "d" ) ), new TupleEntry( incoming, new Tuple( "d" ) ), }; Tuple[] expected = new Tuple[]{ new Tuple( "a", 2L ), new Tuple( "b", 2L ), new Tuple( "c", 2L ), new Tuple( "a", 2L ), new Tuple( "d", 2L ), }; TupleListCollector collector = invokeFunction( function, tuples, new Fields( "value", "count" ) ); Iterator<Tuple> iterator = collector.iterator(); int count = 0; while( iterator.hasNext() ) assertEquals( expected[ count++ ], iterator.next() ); } public void testPartialSums() { Function function = new AggregateBy.CompositeFunction( new Fields( "key" ), new Fields( "value" ), new SumBy.SumPartials( new Fields( "sum" ), float.class ), 2 ); Fields incoming = new Fields( "key", "value" ); TupleEntry[] tuples = new TupleEntry[]{ new TupleEntry( incoming, new Tuple( "a", 1 ) ), new TupleEntry( incoming, new Tuple( "a", 1 ) ), new TupleEntry( incoming, new Tuple( "b", 1 ) ), new TupleEntry( incoming, new Tuple( "b", 1 ) ), new TupleEntry( incoming, new Tuple( "c", 1 ) ), new TupleEntry( incoming, new Tuple( "c", 1 ) ), new TupleEntry( incoming, new Tuple( "a", 1 ) ), new TupleEntry( incoming, new Tuple( "a", 1 ) ), new TupleEntry( incoming, new Tuple( "d", 1 ) ), new TupleEntry( incoming, new Tuple( "d", 1 ) ), }; Tuple[] expected = new Tuple[]{ new Tuple( "a", 2F ), new Tuple( "b", 2F ), new Tuple( "c", 2F ), new Tuple( "a", 2F ), new Tuple( "d", 2F ), }; TupleListCollector collector = invokeFunction( function, tuples, new Fields( "key", "sum" ) ); Iterator<Tuple> iterator = collector.iterator(); int count = 0; while( iterator.hasNext() ) assertEquals( expected[ count++ ], iterator.next() ); } }