/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.scheme; import java.beans.ConstructorProperties; import java.io.IOException; import cascading.tap.Tap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; /** * Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given * {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile. * <p/> * This Class is a convenience for those who need to read/write specific types from existing sequence files without * them being wrapped in a Tuple instance. * <p/> * Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be * uniquely different types (LongWritable, Text). * <p/> * If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value. * <p/> * {@link NullWritable} is used as the empty type for either a null keyType or valueType. */ public class WritableSequenceFile extends SequenceFile { protected Class<? extends Writable> keyType; protected Class<? extends Writable> valueType; /** * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. * * @param fields of type Fields * @param valueType of type Class<? extends Writable>, may not be null */ @ConstructorProperties({"fields", "valueType"}) public WritableSequenceFile( Fields fields, Class<? extends Writable> valueType ) { this( fields, null, valueType ); } /** * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. * * @param fields of type Fields * @param keyType of type Class<? extends Writable> * @param valueType of type Class<? extends Writable> */ @ConstructorProperties({"fields", "keyType", "valueType"}) public WritableSequenceFile( Fields fields, Class<? extends Writable> keyType, Class<? extends Writable> valueType ) { super( fields ); this.keyType = keyType; this.valueType = valueType; if( keyType == null && valueType == null ) throw new IllegalArgumentException( "both keyType and valueType may not be null" ); if( keyType == null && fields.size() != 1 ) throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'keys' from a sequence file" ); else if( valueType == null && fields.size() != 1 ) throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'values' from a sequence file" ); else if( keyType != null && valueType != null && fields.size() != 2 ) throw new IllegalArgumentException( "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file" ); } @Override public void sinkInit( Tap tap, JobConf conf ) { super.sinkInit( tap, conf ); if( keyType != null ) conf.setOutputKeyClass( keyType ); else conf.setOutputKeyClass( NullWritable.class ); if( valueType != null ) conf.setOutputValueClass( valueType ); else conf.setOutputValueClass( NullWritable.class ); } @Override public Tuple source( Object key, Object value ) { if( keyType == null ) return new Tuple( value ); if( valueType == null ) return new Tuple( key ); return new Tuple( key, value ); } @Override public void sink( TupleEntry tupleEntry, OutputCollector outputCollector ) throws IOException { Object keyValue = NullWritable.get(); Object valueValue = NullWritable.get(); if( keyType == null ) { valueValue = tupleEntry.getObject( getSinkFields() ); } else if( valueType == null ) { keyValue = tupleEntry.getObject( getSinkFields() ); } else { keyValue = tupleEntry.getObject( getSinkFields().get( 0 ) ); valueValue = tupleEntry.getObject( getSinkFields().get( 1 ) ); } outputCollector.collect( keyValue, valueValue ); } @Override public boolean equals( Object object ) { if( this == object ) return true; if( !( object instanceof WritableSequenceFile ) ) return false; if( !super.equals( object ) ) return false; WritableSequenceFile that = (WritableSequenceFile) object; if( keyType != null ? !keyType.equals( that.keyType ) : that.keyType != null ) return false; if( valueType != null ? !valueType.equals( that.valueType ) : that.valueType != null ) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + ( keyType != null ? keyType.hashCode() : 0 ); result = 31 * result + ( valueType != null ? valueType.hashCode() : 0 ); return result; } }