/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.scheme; import java.io.File; import cascading.ClusterTestCase; import cascading.cascade.Cascade; import cascading.cascade.CascadeConnector; import cascading.flow.Flow; import cascading.flow.FlowConnector; import cascading.operation.expression.ExpressionFunction; import cascading.pipe.Each; import cascading.pipe.Pipe; import cascading.tap.Hfs; import cascading.tap.Tap; import cascading.tuple.Fields; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; public class WritableSequenceFileTest extends ClusterTestCase { String inputFileApache = "build/test/data/apache.10.txt"; String outputPath = "build/test/output/writablesequence/"; public WritableSequenceFileTest() { super( "use tap collector tests", true ); } public void testWritable() throws Exception { if( !new File( inputFileApache ).exists() ) fail( "data file not found" ); copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "keyvalue" ); pipe = new Each( pipe, new Fields( "offset" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.LongWritable($0)", long.class ), Fields.REPLACE ); pipe = new Each( pipe, new Fields( "line" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.Text($0)", String.class ), Fields.REPLACE ); Tap tapKeyValue = new Hfs( new WritableSequenceFile( new Fields( "offset", "line" ), LongWritable.class, Text.class ), outputPath + "/keyvalue", true ); Tap tapKey = new Hfs( new WritableSequenceFile( new Fields( "offset" ), LongWritable.class, null ), outputPath + "/key", true ); Tap tapValue = new Hfs( new WritableSequenceFile( new Fields( "line" ), Text.class ), outputPath + "/value", true ); Flow flowKeyValue = new FlowConnector( getProperties() ).connect( source, tapKeyValue, pipe ); Flow flowKey = new FlowConnector( getProperties() ).connect( tapKeyValue, tapKey, new Pipe( "key" ) ); Flow flowValue = new FlowConnector( getProperties() ).connect( tapKeyValue, tapValue, new Pipe( "value" ) ); Cascade cascade = new CascadeConnector().connect( "keyvalues", flowKeyValue, flowKey, flowValue ); cascade.complete(); validateLength( flowKeyValue, 10, 2 ); validateLength( flowKey, 10, 1 ); validateLength( flowValue, 10, 1 ); } }