/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.tuple.hadoop;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import cascading.CascadingTestCase;
import cascading.tuple.Tuple;
import cascading.tuple.TupleInputStream;
import cascading.tuple.TupleOutputStream;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.serializer.WritableSerialization;
import org.apache.hadoop.mapred.JobConf;
/**
*
*/
public class HadoopSerializationTest extends CascadingTestCase
{
String outputPath = "build/test/output/tuples/serialization";
public HadoopSerializationTest()
{
super( "serialization tuple test" );
}
public void testInputOutputSerialization() throws IOException
{
long time = System.currentTimeMillis();
JobConf jobConf = new JobConf();
jobConf.set( "io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName() ); // disable/replace WritableSerialization class
jobConf.set( "cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName() ); // not using Text, just testing parsing
TupleSerialization tupleSerialization = new TupleSerialization( jobConf );
File file = new File( outputPath );
file.mkdirs();
file = new File( file, "/test.bytes" );
TupleOutputStream output = new TupleOutputStream( new FileOutputStream( file, false ), tupleSerialization.getElementWriter() );
for( int i = 0; i < 501; i++ ) // 501 is arbitrary
{
String aString = "string number " + i;
double random = Math.random();
output.writeTuple( new Tuple( i, aString, random, new TestText( aString ), new Tuple( "inner tuple", new BytesWritable( "some string".getBytes() ) ), new BytesWritable( Integer.toString( i ).getBytes( "UTF-8" ) ), new BooleanWritable( false ) ) );
}
output.close();
assertEquals( "wrong size", 89967L, file.length() ); // just makes sure the file size doesnt change from expected
TupleInputStream input = new TupleInputStream( new FileInputStream( file ), tupleSerialization.getElementReader() );
int k = -1;
for( int i = 0; i < 501; i++ )
{
Tuple tuple = input.readTuple();
int value = tuple.getInteger( 0 );
assertTrue( "wrong diff", value - k == 1 );
assertTrue( "wrong type", tuple.get( 3 ) instanceof TestText );
assertTrue( "wrong type", tuple.get( 4 ) instanceof Tuple );
assertTrue( "wrong type", tuple.get( 5 ) instanceof BytesWritable );
byte[] bytes = ( (BytesWritable) tuple.get( 5 ) ).getBytes();
String string = new String( bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8" );
assertEquals( "wrong value", Integer.parseInt( string ), i );
assertTrue( "wrong type", tuple.get( 6 ) instanceof BooleanWritable );
k = value;
}
input.close();
System.out.println( "time = " + ( System.currentTimeMillis() - time ) );
}
}