/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import cascading.flow.Flow;
import cascading.flow.MultiMapReducePlanner;
import cascading.scheme.SequenceFile;
import cascading.scheme.TextLine;
import cascading.tap.Hfs;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryIterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MiniMRCluster;
/**
*
*/
public class ClusterTestCase extends CascadingTestCase
{
public static final String CLUSTER_TESTING_PROPERTY = "test.cluster.enabled";
transient private static MiniDFSCluster dfs;
transient private static FileSystem fileSys;
transient private static MiniMRCluster mr;
transient private static JobConf jobConf;
transient private static Map<Object, Object> properties = new HashMap<Object, Object>();
transient private boolean enableCluster;
int numMapTasks = 4;
int numReduceTasks = 1;
private String logger;
public ClusterTestCase( String string, boolean enableCluster )
{
super( string );
if( !enableCluster )
this.enableCluster = false;
else
this.enableCluster = Boolean.parseBoolean( System.getProperty( CLUSTER_TESTING_PROPERTY, Boolean.toString( enableCluster ) ) );
this.logger = System.getProperty( "log4j.logger" );
}
public ClusterTestCase( String string, boolean enableCluster, int numMapTasks, int numReduceTasks )
{
this( string, enableCluster );
this.numMapTasks = numMapTasks;
this.numReduceTasks = numReduceTasks;
}
public ClusterTestCase( String string )
{
super( string );
}
public ClusterTestCase()
{
}
public boolean isEnableCluster()
{
return enableCluster;
}
public void setUp() throws IOException
{
if( jobConf != null )
return;
if( !enableCluster )
{
jobConf = new JobConf();
}
else
{
System.setProperty( "test.build.data", "build" );
new File( "build/test/log" ).mkdirs();
System.setProperty( "hadoop.log.dir", "build/test/log" );
Configuration conf = new Configuration();
dfs = new MiniDFSCluster( conf, 4, true, null );
fileSys = dfs.getFileSystem();
mr = new MiniMRCluster( 4, fileSys.getUri().toString(), 1 );
jobConf = mr.createJobConf();
jobConf.set( "mapred.child.java.opts", "-Xmx512m" );
jobConf.setMapSpeculativeExecution( false );
jobConf.setReduceSpeculativeExecution( false );
}
jobConf.setNumMapTasks( numMapTasks );
jobConf.setNumReduceTasks( numReduceTasks );
if( logger != null )
properties.put( "log4j.logger", logger );
Flow.setJobPollingInterval( properties, 500 ); // should speed up tests
MultiMapReducePlanner.setJobConf( properties, jobConf );
}
public Map<Object, Object> getProperties()
{
return new HashMap<Object, Object>( properties );
}
public JobConf getJobConf()
{
return new JobConf( jobConf );
}
public FileSystem getFileSystem() throws IOException
{
if( fileSys != null )
return fileSys;
return FileSystem.get( jobConf );
}
public String makeQualifiedPath( String path ) throws IOException
{
return new Path( path ).makeQualified( getFileSystem() ).toString();
}
protected void copyFromLocal( String inputFile ) throws IOException
{
if( !enableCluster )
return;
Path path = new Path( inputFile );
if( !fileSys.exists( path ) )
FileUtil.copy( new File( inputFile ), fileSys, path, false, jobConf );
}
public void tearDown() throws IOException
{
// do nothing, let the jvm shut things down
}
public void validateLengthText( String path, int length ) throws IOException
{
validateLengthText( path, length, null );
}
public void validateLengthText( String path, int length, String regex ) throws IOException
{
Hfs tap = new Hfs( new TextLine( new Fields( "line" ) ), path );
assertTrue( "path does not exist: " + path, tap.pathExists( getJobConf() ) );
TupleEntryIterator iterator = tap.openForRead( getJobConf() );
try
{
validateLength( iterator, length, -1, regex == null ? null : Pattern.compile( regex ) );
}
finally
{
if( iterator != null )
iterator.close();
}
}
public void validateLengthSequence( String path, Fields fields, int length ) throws IOException
{
validateLengthSequence( path, fields, length, null );
}
public void validateLengthSequence( String path, Fields fields, int length, String regex ) throws IOException
{
Hfs tap = new Hfs( new SequenceFile( fields ), path );
assertTrue( "path does not exist: " + path, tap.pathExists( getJobConf() ) );
TupleEntryIterator iterator = tap.openForRead( getJobConf() );
try
{
validateLength( iterator, length, -1, regex == null ? null : Pattern.compile( regex ) );
}
finally
{
if( iterator != null )
iterator.close();
}
}
}