/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.operation.regex;
import java.util.Iterator;
import cascading.CascadingTestCase;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleListCollector;
/**
*
*/
public class RegexesTest extends CascadingTestCase
{
public RegexesTest()
{
super( "regex test" );
}
@Override
protected void setUp() throws Exception
{
super.setUp();
}
public void testSplitter()
{
RegexSplitter splitter = new RegexSplitter( "\t" );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.UNKNOWN;
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testSplitterGenerator()
{
RegexSplitGenerator splitter = new RegexSplitGenerator( new Fields( "word" ), "\\s+" );
Tuple arguments = new Tuple( "foo\t bar" );
Fields resultFields = new Fields( "field" );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 2, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
assertEquals( "not equal: iterator.next().get(0)", "foo", iterator.next().get( 0 ) );
assertEquals( "not equal: iterator.next().get(0)", "bar", iterator.next().get( 0 ) );
}
public void testReplace()
{
RegexReplace splitter = new RegexReplace( new Fields( "words" ), "\\s+", "-", true );
Tuple arguments = new Tuple( "foo\t bar" );
Fields resultFields = Fields.UNKNOWN;
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo-bar", tuple.get( 0 ) );
}
public void testParserDeclared()
{
RegexParser splitter = new RegexParser( new Fields( "lhs", "rhs" ), "(\\S+)\\s+(\\S+)", new int[]{1, 2} );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.size( 2 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testParserDeclared2()
{
RegexParser splitter = new RegexParser( new Fields( "lhs", "rhs" ), "(\\S+)\\s+(\\S+)" );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.size( 2 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testParserDeclared3()
{
RegexParser splitter = new RegexParser( new Fields( "lhs" ), "(\\S+)\\s+\\S+" );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.size( 1 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "wrong tupel size", 1, tuple.size() );
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
}
public void testParserDeclared4()
{
RegexParser splitter = new RegexParser( new Fields( "lhs" ), "\\S+\\s+\\S+" );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.size( 1 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "wrong tupel size", 1, tuple.size() );
assertEquals( "not equal: tuple.get(0)", "foo\tbar", tuple.get( 0 ) );
}
/** Contributed by gicode */
public void testParserDeclared5()
{
RegexParser splitter = new RegexParser( new Fields( "bar" ), "^GET /foo\\?bar=([^\\&]+)&" );
Tuple arguments = new Tuple( "GET /foo?bar=z123&baz=2" );
Fields resultFields = Fields.size( 1 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "wrong tuple size", 1, tuple.size() );
assertEquals( "not equal: tuple.get(0)", "z123", tuple.get( 0 ) );
}
public void testParserDeclared6()
{
RegexParser splitter = new RegexParser( new Fields( "lhs" ), "(\\S+)\\s+\\S+", new int[]{1} );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.size( 1 );
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "wrong tupel size", 1, tuple.size() );
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
}
public void testParserUnknown()
{
RegexParser splitter = new RegexParser( Fields.UNKNOWN, "(\\S+)\\s+(\\S+)", new int[]{1, 2} );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.UNKNOWN;
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testParserUnknown2()
{
RegexParser splitter = new RegexParser( "(\\S+)\\s+(\\S+)", new int[]{1, 2} );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.UNKNOWN;
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testParserUnknown3()
{
RegexParser splitter = new RegexParser( "(\\S+)\\s+(\\S+)" );
Tuple arguments = new Tuple( "foo\tbar" );
Fields resultFields = Fields.UNKNOWN;
TupleListCollector collector = invokeFunction( splitter, arguments, resultFields );
assertEquals( "wrong size", 1, collector.size() );
Iterator<Tuple> iterator = collector.iterator();
Tuple tuple = iterator.next();
assertEquals( "not equal: tuple.get(0)", "foo", tuple.get( 0 ) );
assertEquals( "not equal: tuple.get(1)", "bar", tuple.get( 1 ) );
}
public void testFilter()
{
Tuple arguments = new Tuple( "foo", "bar" );
RegexFilter filter = new RegexFilter( "foo\tbar" );
boolean isRemove = invokeFilter( filter, arguments );
assertTrue( "was not remove", !isRemove );
}
}