/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.tap;
import java.io.IOException;
import java.net.URI;
import java.util.Set;
import cascading.CascadingException;
import cascading.flow.Scope;
import cascading.scheme.Scheme;
import cascading.scheme.SequenceFile;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
/** Class TempHfs creates a temporary {@link Tap} instance for use internally. */
public class TempHfs extends Hfs
{
/** Field name */
final String name;
/** Field schemeClass */
private Class schemeClass;
/** Field temporaryPath */
private String temporaryPath;
/** Class NullScheme is a noop scheme used as a placeholder */
private static class NullScheme extends Scheme
{
@Override
public void sourceInit( Tap tap, JobConf conf ) throws IOException
{
// do nothing
}
@Override
public void sinkInit( Tap tap, JobConf conf ) throws IOException
{
conf.setOutputKeyClass( Tuple.class );
conf.setOutputValueClass( Tuple.class );
conf.setOutputFormat( NullOutputFormat.class );
}
@Override
public Tuple source( Object key, Object value )
{
return null;
}
@Override
public void sink( TupleEntry tupleEntry, OutputCollector outputCollector ) throws IOException
{
}
}
/**
* Constructor TempHfs creates a new TempHfs instance.
*
* @param name of type String
*/
public TempHfs( String name )
{
super( new SequenceFile()
{
} );
this.name = name;
}
/**
* Constructor TempHfs creates a new TempHfs instance.
*
* @param name of type String
* @param isNull of type boolean
*/
public TempHfs( String name, boolean isNull )
{
super( isNull ? new NullScheme() : new SequenceFile()
{
} );
this.name = name;
}
/**
* Constructor TempDfs creates a new TempDfs instance.
*
* @param name of type String
*/
public TempHfs( String name, Class schemeClass )
{
this.name = name;
if( schemeClass == null )
this.schemeClass = SequenceFile.class;
else
this.schemeClass = schemeClass;
}
public Class getSchemeClass()
{
return schemeClass;
}
private void makeTemporaryFile( JobConf conf )
{
// init stringPath as path is transient
if( stringPath != null )
return;
temporaryPath = makeTemporaryPathDir( name );
stringPath = new Path( getTempPath( conf ), temporaryPath ).toString();
}
@Override
public URI getURIScheme( JobConf jobConf ) throws IOException
{
makeTemporaryFile( jobConf );
return super.getURIScheme( jobConf );
}
@Override
public Scope outgoingScopeFor( Set<Scope> incoming )
{
// if incoming is Each, both value and group fields are the same
// if incoming is Every, group fields are only those grouped on
// if incoming is Group, value fields are all the fields
Scope scope = incoming.iterator().next();
Fields outgoingFields = null;
if( scope.isGroup() )
outgoingFields = scope.getOutValuesFields();
else
outgoingFields = scope.getOutGroupingFields();
try
{
setScheme( (Scheme) schemeClass.getConstructor( Fields.class ).newInstance( outgoingFields ) );
}
catch( Exception exception )
{
throw new CascadingException( "unable to create specified scheme: " + schemeClass.getName() );
}
return new Scope( outgoingFields );
}
@Override
public void sourceInit( JobConf conf ) throws IOException
{
makeTemporaryFile( conf );
super.sourceInit( conf );
}
@Override
public void sinkInit( JobConf conf ) throws IOException
{
makeTemporaryFile( conf );
super.sinkInit( conf );
}
@Override
public boolean isWriteDirect()
{
return false;
}
@Override
public boolean deletePath( JobConf conf ) throws IOException
{
if( temporaryPath == null ) // never initialized
return true;
return super.deletePath( conf ) && getFileSystem( conf ).delete( new Path( getTempPath( conf ), temporaryPath ), true );
}
@Override
public String toString()
{
return getClass().getSimpleName() + "[\"" + getScheme() + "\"]" + "[" + name + "]";
}
@Override
public boolean equals( Object object )
{
if( this == object )
return true;
if( object == null || getClass() != object.getClass() )
return false;
if( !super.equals( object ) )
return false;
TempHfs tempHfs = (TempHfs) object;
if( name != null ? !name.equals( tempHfs.name ) : tempHfs.name != null )
return false;
return true;
}
@Override
public int hashCode()
{
// don't use super hashCode() as path changes during runtime
return 31 * ( System.identityHashCode( this ) + name != null ? name.hashCode() : 0 );
}
}