/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.scheme;
import java.beans.ConstructorProperties;
import java.io.IOException;
import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
/**
* Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given
* {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile.
* <p/>
* This Class is a convenience for those who need to read/write specific types from existing sequence files without
* them being wrapped in a Tuple instance.
* <p/>
* Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be
* uniquely different types (LongWritable, Text).
* <p/>
* If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value.
* <p/>
* {@link NullWritable} is used as the empty type for either a null keyType or valueType.
*/
public class WritableSequenceFile extends SequenceFile
{
protected Class<? extends Writable> keyType;
protected Class<? extends Writable> valueType;
/**
* Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
*
* @param fields of type Fields
* @param valueType of type Class<? extends Writable>, may not be null
*/
@ConstructorProperties({"fields", "valueType"})
public WritableSequenceFile( Fields fields, Class<? extends Writable> valueType )
{
this( fields, null, valueType );
}
/**
* Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
*
* @param fields of type Fields
* @param keyType of type Class<? extends Writable>
* @param valueType of type Class<? extends Writable>
*/
@ConstructorProperties({"fields", "keyType", "valueType"})
public WritableSequenceFile( Fields fields, Class<? extends Writable> keyType, Class<? extends Writable> valueType )
{
super( fields );
this.keyType = keyType;
this.valueType = valueType;
if( keyType == null && valueType == null )
throw new IllegalArgumentException( "both keyType and valueType may not be null" );
if( keyType == null && fields.size() != 1 )
throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'keys' from a sequence file" );
else if( valueType == null && fields.size() != 1 )
throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'values' from a sequence file" );
else if( keyType != null && valueType != null && fields.size() != 2 )
throw new IllegalArgumentException( "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file" );
}
@Override
public void sinkInit( Tap tap, JobConf conf )
{
super.sinkInit( tap, conf );
if( keyType != null )
conf.setOutputKeyClass( keyType );
else
conf.setOutputKeyClass( NullWritable.class );
if( valueType != null )
conf.setOutputValueClass( valueType );
else
conf.setOutputValueClass( NullWritable.class );
}
@Override
public Tuple source( Object key, Object value )
{
if( keyType == null )
return new Tuple( value );
if( valueType == null )
return new Tuple( key );
return new Tuple( key, value );
}
@Override
public void sink( TupleEntry tupleEntry, OutputCollector outputCollector ) throws IOException
{
Object keyValue = NullWritable.get();
Object valueValue = NullWritable.get();
if( keyType == null )
{
valueValue = tupleEntry.getObject( getSinkFields() );
}
else if( valueType == null )
{
keyValue = tupleEntry.getObject( getSinkFields() );
}
else
{
keyValue = tupleEntry.getObject( getSinkFields().get( 0 ) );
valueValue = tupleEntry.getObject( getSinkFields().get( 1 ) );
}
outputCollector.collect( keyValue, valueValue );
}
@Override
public boolean equals( Object object )
{
if( this == object )
return true;
if( !( object instanceof WritableSequenceFile ) )
return false;
if( !super.equals( object ) )
return false;
WritableSequenceFile that = (WritableSequenceFile) object;
if( keyType != null ? !keyType.equals( that.keyType ) : that.keyType != null )
return false;
if( valueType != null ? !valueType.equals( that.valueType ) : that.valueType != null )
return false;
return true;
}
@Override
public int hashCode()
{
int result = super.hashCode();
result = 31 * result + ( keyType != null ? keyType.hashCode() : 0 );
result = 31 * result + ( valueType != null ? valueType.hashCode() : 0 );
return result;
}
}