/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.tap.hadoop;
import java.io.IOException;
import cascading.tap.Tap;
import cascading.tap.TapException;
import cascading.tuple.Tuple;
import cascading.tuple.TupleIterator;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;
/**
* Class TapIterator is an implementation of {@link TupleIterator}. It is returned by {@link cascading.tap.Tap} instances when
* opening the taps resource for reading.
*/
public class TapIterator implements TupleIterator
{
/** Field LOG */
private static final Logger LOG = Logger.getLogger( TapIterator.class );
/** Field tap */
private final Tap tap;
/** Field inputFormat */
private InputFormat inputFormat;
/** Field conf */
private final JobConf conf;
/** Field splits */
private InputSplit[] splits;
/** Field reader */
private RecordReader reader;
/** Field key */
private Object key;
/** Field value */
private Object value;
/** Field currentSplit */
private int currentSplit = 0;
/** Field currentTuple */
private Tuple currentTuple;
/** Field complete */
private boolean complete = false;
/**
* Constructor TapIterator creates a new TapIterator instance.
*
* @param conf of type JobConf
* @throws IOException when
*/
public TapIterator( Tap tap, JobConf conf ) throws IOException
{
this.tap = tap;
this.conf = new JobConf( conf );
initalize();
}
private void initalize() throws IOException
{
tap.sourceInit( conf );
if( !tap.pathExists( conf ) )
{
complete = true;
return;
}
inputFormat = conf.getInputFormat();
if( inputFormat instanceof JobConfigurable )
( (JobConfigurable) inputFormat ).configure( conf );
splits = inputFormat.getSplits( conf, 1 );
if( splits.length == 0 )
{
complete = true;
return;
}
reader = makeReader( currentSplit );
key = reader.createKey();
value = reader.createValue();
if( LOG.isDebugEnabled() )
{
LOG.debug( "found splits: " + splits.length );
LOG.debug( "using key: " + key.getClass().getName() );
LOG.debug( "using value: " + value.getClass().getName() );
}
}
private RecordReader makeReader( int currentSplit ) throws IOException
{
if( LOG.isDebugEnabled() )
LOG.debug( "reading split: " + currentSplit );
return inputFormat.getRecordReader( splits[ currentSplit ], conf, Reporter.NULL );
}
/**
* Method hasNext returns true if there more {@link Tuple} instances available.
*
* @return boolean
*/
public boolean hasNext()
{
getNextTuple();
return !complete;
}
/**
* Method next returns the next {@link Tuple}.
*
* @return Tuple
*/
public Tuple next()
{
try
{
getNextTuple();
return currentTuple;
}
finally
{
currentTuple = null;
}
}
private void getNextTuple()
{
if( currentTuple != null || reader == null )
return;
try
{
if( reader.next( key, value ) )
{
currentTuple = tap.source( key, value );
getNextTuple(); // handles case where currentTuple is returned null from the source
}
else if( currentSplit < splits.length - 1 )
{
reader.close();
reader = makeReader( ++currentSplit );
getNextTuple();
}
else
{
complete = true;
}
}
catch( IOException exception )
{
throw new TapException( "could not get next tuple", exception );
}
}
public void remove()
{
throw new UnsupportedOperationException( "unimplemented" );
}
public void close()
{
try
{
if( reader != null )
reader.close();
}
catch( IOException exception )
{
LOG.warn( "exception closing iteraor", exception );
}
}
}