package com.scaleunlimited.cascading.hadoop.test; import java.io.File; import java.io.IOException; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MiniMRClientCluster; import org.apache.hadoop.mapred.MiniMRClientClusterFactory; import com.scaleunlimited.cascading.hadoop.HadoopPlatform; @SuppressWarnings({ "rawtypes", "serial" }) public class MiniClusterPlatform extends HadoopPlatform { private static final String MR_IDENTIFIER = "com.scaleunlimited.cascading.MiniClusterPlatform"; private static final String DEFAULT_LOGDIR_NAME = "minicluster-logs"; // private static final String DEFAULT_TEMPDIR_NAME = "minicluster-tmp"; private MiniMRClientCluster _mr2 = null; MiniDFSCluster _dfs = null; public MiniClusterPlatform(Class applicationJarClass) throws IOException { this(applicationJarClass, 1); } public MiniClusterPlatform(Class applicationJarClass, int numContainers) throws IOException { this(applicationJarClass, numContainers, null); } public MiniClusterPlatform(Class applicationJarClass, int numContainers, String logDirName) throws IOException { super(applicationJarClass); setupMiniClusterPlatform(numContainers, logDirName); } private void setupMiniClusterPlatform(int numContainers, String logDirName) throws IOException { String sysTmpDir = System.getProperty("java.io.tmpdir"); if (logDirName == null) { File logDir = new File(sysTmpDir, DEFAULT_LOGDIR_NAME); logDirName = logDir.getAbsolutePath(); } File logDir = new File(logDirName); logDir.mkdirs(); FileUtils.deleteDirectory(logDir); setLogDir(logDir); File dfsDir = new File("build/test/data"); FileUtils.deleteDirectory(dfsDir); // Get rid of warnings that we get from bogus security settings. System.setProperty("java.security.krb5.realm", ""); System.setProperty("java.security.krb5.kdc", ""); System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA); Configuration conf = new HdfsConfiguration(); // TODO seems like we'd want to set up other config values, e.g. some candidates are: // // yarn.nodemanager.local-dirs - ignored? // yarn.nodemanager.log-dirs - ignored? // dfs.data.dir - ignored, use MiniDFSCluster.HDFS_MINIDFS_BASEDIR // mapreduce.map.memory.mb - ??? // mapreduce.reduce.memory.mb - ??? // mapreduce.map.java.opts - ??? // mapreduce.reduce.java.opts - ??? // yarn.nodemanager.resource.memory-mb - ??? // yarn.scheduler.minimum-allocation-mb - ??? // yarn.scheduler.maximum-allocation-mb - ??? // yarn.app.mapreduce.am.resource.mb - ??? // yarn.app.mapreduce.am.command-opts - ??? // yarn.nodemanager.resource.cpu-vcores - ??? // mapreduce.map.cpu.vcores - ??? (defaults to 1) // mapreduce.reduce.cpu.vcores - ??? (defaults to 1) conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, dfsDir.getAbsolutePath()); _dfs = new MiniDFSCluster.Builder(conf).build(); _dfs.waitClusterUp(); _mr2 = MiniMRClientClusterFactory.create(this.getClass(), MR_IDENTIFIER, numContainers, conf); _mr2.start(); // Update _conf to match what we get back from the minicluster Configuration newConf = _mr2.getConfig(); _conf = new JobConf(newConf); } public void shutdown() throws InterruptedException, IOException { if (_mr2 != null) { _mr2.stop(); } if (_dfs != null) { _dfs.shutdown(); // Note that we don't wait for the cluster to be down, since // isClusterUp() always returns true. } // Sadly, the Hadoop 2.2 MiniDFSCluster always put stuff into a "target" // directory that's relative to the CWD. So we'll need to get rid of "target/MR_IDENTIFIER" // and also target/test-dir (hard-coded as well) File targetDir = new File("target"); File idDir = new File(targetDir, MR_IDENTIFIER); FileUtils.deleteDirectory(idDir); File testDir = new File(targetDir, "test-dir"); FileUtils.deleteDirectory(testDir); // And now, if "target" is empty, we should get rid of it to, since we created it. if (FileUtils.listFiles(targetDir, null, false).isEmpty()) { FileUtils.deleteDirectory(targetDir); } } }