/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap; import java.beans.ConstructorProperties; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import cascading.scheme.Scheme; import cascading.scheme.SequenceFile; import cascading.tap.hadoop.MultiInputFormat; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.log4j.Logger; /** * Class MultiSinkTap is both a {@link CompositeTap} and {@link SinkTap} that can write to multiple child {@link Tap} instances simultaneously. * <p/> * It is the counterpart to {@link MultiSourceTap}. */ public class MultiSinkTap extends SinkTap implements CompositeTap { /** Field LOG */ private static final Logger LOG = Logger.getLogger( MultiSinkTap.class ); /** Field taps */ private Tap[] taps; /** Field tempPath */ private String tempPath = "__multisink_placeholder" + Integer.toString( (int) ( System.currentTimeMillis() * Math.random() ) ); /** Field childConfigs */ private List<Map<String, String>> childConfigs; private class MultiSinkCollector extends TupleEntryCollector implements OutputCollector { OutputCollector[] collectors; public MultiSinkCollector( JobConf conf, Tap... taps ) throws IOException { collectors = new OutputCollector[taps.length]; conf = new JobConf( conf ); JobConf[] jobConfs = MultiInputFormat.getJobConfs( conf, childConfigs ); for( int i = 0; i < taps.length; i++ ) { Tap tap = taps[ i ]; LOG.info( "opening for write: " + tap.toString() ); collectors[ i ] = (OutputCollector) tap.openForWrite( jobConfs[ i ] ); } } protected void collect( Tuple tuple ) { throw new UnsupportedOperationException( "collect should never be called on MultiSinkCollector" ); } public void collect( Object key, Object value ) throws IOException { for( OutputCollector collector : collectors ) collector.collect( key, value ); } @Override public void close() { super.close(); try { for( OutputCollector collector : collectors ) { try { ( (TupleEntryCollector) collector ).close(); } catch( Exception exception ) { LOG.warn( "exception closing TupleEntryCollector", exception ); } } } finally { collectors = null; } } } /** * Constructor MultiSinkTap creates a new MultiSinkTap instance. * * @param taps of type Tap... */ @ConstructorProperties({"taps"}) public MultiSinkTap( Tap... taps ) { this.taps = taps; } protected Tap[] getTaps() { return taps; } @Override public Tap[] getChildTaps() { return Arrays.copyOf( taps, taps.length ); } @Override public boolean isWriteDirect() { return true; } @Override public Path getPath() { return new Path( tempPath ); } @Override public TupleEntryCollector openForWrite( JobConf conf ) throws IOException { return new MultiSinkCollector( conf, getTaps() ); } @Override public void sinkInit( JobConf conf ) throws IOException { childConfigs = new ArrayList<Map<String, String>>(); for( int i = 0; i < getTaps().length; i++ ) { Tap tap = getTaps()[ i ]; JobConf jobConf = new JobConf( conf ); tap.sinkInit( jobConf ); childConfigs.add( MultiInputFormat.getConfig( conf, jobConf ) ); } } @Override public boolean makeDirs( JobConf conf ) throws IOException { for( Tap tap : getTaps() ) { if( !tap.makeDirs( conf ) ) return false; } return true; } @Override public boolean deletePath( JobConf conf ) throws IOException { for( Tap tap : getTaps() ) { if( !tap.deletePath( conf ) ) return false; } return true; } @Override public boolean pathExists( JobConf conf ) throws IOException { for( Tap tap : getTaps() ) { if( !tap.pathExists( conf ) ) return false; } return true; } @Override public long getPathModified( JobConf conf ) throws IOException { long modified = getTaps()[ 0 ].getPathModified( conf ); for( int i = 1; i < getTaps().length; i++ ) modified = Math.max( getTaps()[ i ].getPathModified( conf ), modified ); return modified; } @Override public void sink( TupleEntry tupleEntry, OutputCollector outputCollector ) throws IOException { for( int i = 0; i < taps.length; i++ ) taps[ i ].sink( tupleEntry, ( (MultiSinkCollector) outputCollector ).collectors[ i ] ); } @Override public Scheme getScheme() { if( super.getScheme() != null ) return super.getScheme(); Set<Comparable> fieldNames = new LinkedHashSet<Comparable>(); for( int i = 0; i < getTaps().length; i++ ) { for( Object o : getTaps()[ i ].getSinkFields() ) fieldNames.add( (Comparable) o ); } Fields allFields = new Fields( fieldNames.toArray( new Comparable[fieldNames.size()] ) ); setScheme( new SequenceFile( allFields ) ); return super.getScheme(); } @Override public String toString() { return "MultiSinkTap[" + ( taps == null ? "none" : Arrays.asList( taps ) ) + ']'; } @Override public boolean equals( Object o ) { if( this == o ) return true; if( !( o instanceof MultiSinkTap ) ) return false; if( !super.equals( o ) ) return false; MultiSinkTap that = (MultiSinkTap) o; if( !Arrays.equals( taps, that.taps ) ) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + ( taps != null ? Arrays.hashCode( taps ) : 0 ); return result; } }