/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import cascading.flow.Flow; import cascading.flow.MultiMapReducePlanner; import cascading.scheme.SequenceFile; import cascading.scheme.TextLine; import cascading.tap.Hfs; import cascading.tuple.Fields; import cascading.tuple.TupleEntryIterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRCluster; /** * */ public class ClusterTestCase extends CascadingTestCase { public static final String CLUSTER_TESTING_PROPERTY = "test.cluster.enabled"; transient private static MiniDFSCluster dfs; transient private static FileSystem fileSys; transient private static MiniMRCluster mr; transient private static JobConf jobConf; transient private static Map<Object, Object> properties = new HashMap<Object, Object>(); transient private boolean enableCluster; int numMapTasks = 4; int numReduceTasks = 1; private String logger; public ClusterTestCase( String string, boolean enableCluster ) { super( string ); if( !enableCluster ) this.enableCluster = false; else this.enableCluster = Boolean.parseBoolean( System.getProperty( CLUSTER_TESTING_PROPERTY, Boolean.toString( enableCluster ) ) ); this.logger = System.getProperty( "log4j.logger" ); } public ClusterTestCase( String string, boolean enableCluster, int numMapTasks, int numReduceTasks ) { this( string, enableCluster ); this.numMapTasks = numMapTasks; this.numReduceTasks = numReduceTasks; } public ClusterTestCase( String string ) { super( string ); } public ClusterTestCase() { } public boolean isEnableCluster() { return enableCluster; } public void setUp() throws IOException { if( jobConf != null ) return; if( !enableCluster ) { jobConf = new JobConf(); } else { System.setProperty( "test.build.data", "build" ); new File( "build/test/log" ).mkdirs(); System.setProperty( "hadoop.log.dir", "build/test/log" ); Configuration conf = new Configuration(); dfs = new MiniDFSCluster( conf, 4, true, null ); fileSys = dfs.getFileSystem(); mr = new MiniMRCluster( 4, fileSys.getUri().toString(), 1 ); jobConf = mr.createJobConf(); jobConf.set( "mapred.child.java.opts", "-Xmx512m" ); jobConf.setMapSpeculativeExecution( false ); jobConf.setReduceSpeculativeExecution( false ); } jobConf.setNumMapTasks( numMapTasks ); jobConf.setNumReduceTasks( numReduceTasks ); if( logger != null ) properties.put( "log4j.logger", logger ); Flow.setJobPollingInterval( properties, 500 ); // should speed up tests MultiMapReducePlanner.setJobConf( properties, jobConf ); } public Map<Object, Object> getProperties() { return new HashMap<Object, Object>( properties ); } public JobConf getJobConf() { return new JobConf( jobConf ); } public FileSystem getFileSystem() throws IOException { if( fileSys != null ) return fileSys; return FileSystem.get( jobConf ); } public String makeQualifiedPath( String path ) throws IOException { return new Path( path ).makeQualified( getFileSystem() ).toString(); } protected void copyFromLocal( String inputFile ) throws IOException { if( !enableCluster ) return; Path path = new Path( inputFile ); if( !fileSys.exists( path ) ) FileUtil.copy( new File( inputFile ), fileSys, path, false, jobConf ); } public void tearDown() throws IOException { // do nothing, let the jvm shut things down } public void validateLengthText( String path, int length ) throws IOException { validateLengthText( path, length, null ); } public void validateLengthText( String path, int length, String regex ) throws IOException { Hfs tap = new Hfs( new TextLine( new Fields( "line" ) ), path ); assertTrue( "path does not exist: " + path, tap.pathExists( getJobConf() ) ); TupleEntryIterator iterator = tap.openForRead( getJobConf() ); try { validateLength( iterator, length, -1, regex == null ? null : Pattern.compile( regex ) ); } finally { if( iterator != null ) iterator.close(); } } public void validateLengthSequence( String path, Fields fields, int length ) throws IOException { validateLengthSequence( path, fields, length, null ); } public void validateLengthSequence( String path, Fields fields, int length, String regex ) throws IOException { Hfs tap = new Hfs( new SequenceFile( fields ), path ); assertTrue( "path does not exist: " + path, tap.pathExists( getJobConf() ) ); TupleEntryIterator iterator = tap.openForRead( getJobConf() ); try { validateLength( iterator, length, -1, regex == null ? null : Pattern.compile( regex ) ); } finally { if( iterator != null ) iterator.close(); } } }