/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tez.mapreduce.hadoop;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.dag.api.TezConfiguration;
import org.apache.tez.dag.api.VertexLocationHint.TaskLocationHint;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestMRHelpers {
protected static MiniDFSCluster dfsCluster;
private static Configuration conf = new Configuration();
private static FileSystem remoteFs;
private static Path testFilePath;
private static Path oldSplitsDir;
private static Path newSplitsDir;
private static String TEST_ROOT_DIR = "target"
+ Path.SEPARATOR + TestMRHelpers.class.getName() + "-tmpDir";
@BeforeClass
public static void setup() throws IOException {
try {
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
.format(true).racks(null).build();
remoteFs = dfsCluster.getFileSystem();
} catch (IOException io) {
throw new RuntimeException("problem starting mini dfs cluster", io);
}
Configuration testConf = new YarnConfiguration(
dfsCluster.getFileSystem().getConf());
File testConfFile = new File(TEST_ROOT_DIR, "test.xml");
try {
testConfFile.createNewFile();
testConf.writeXml(new FileOutputStream(testConfFile));
testConfFile.deleteOnExit();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new RuntimeException(e);
}
remoteFs.mkdirs(new Path("/tmp/input/"));
remoteFs.mkdirs(new Path("/tmp/splitsDirNew/"));
remoteFs.mkdirs(new Path("/tmp/splitsDirOld/"));
testFilePath = remoteFs.makeQualified(new Path("/tmp/input/test.xml"));
remoteFs.copyFromLocalFile(new Path(testConfFile.getAbsolutePath()),
testFilePath);
FileStatus fsStatus = remoteFs.getFileStatus(testFilePath);
Assert.assertTrue(fsStatus.getLen() > 0);
oldSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirOld/"));
newSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirNew/"));
}
private void verifyLocationHints(Path inputSplitsDir,
List<TaskLocationHint> actual) throws Exception {
JobID jobId = new JobID("dummy", 1);
TaskSplitMetaInfo[] splitsInfo =
SplitMetaInfoReader.readSplitMetaInfo(jobId , remoteFs,
conf, inputSplitsDir);
int splitsCount = splitsInfo.length;
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splitsCount);
for (int i = 0; i < splitsCount; ++i) {
locationHints.add(
new TaskLocationHint(new HashSet<String>(
Arrays.asList(splitsInfo[i].getLocations())), null));
}
Assert.assertEquals(locationHints, actual);
}
private InputSplitInfo generateNewSplits(Path inputSplitsDir)
throws Exception {
JobConf jobConf = new JobConf();
jobConf.setUseNewMapper(true);
jobConf.setClass(MRJobConfig.INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class,
InputFormat.class);
jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString());
return MRHelpers.generateInputSplits(jobConf, inputSplitsDir);
}
@Test
public void testNewSplitsGen() throws Exception {
InputSplitInfo info = generateNewSplits(newSplitsDir);
Assert.assertEquals(new Path(newSplitsDir,
MRHelpers.JOB_SPLIT_RESOURCE_NAME),
info.getSplitsFile());
Assert.assertEquals(new Path(newSplitsDir,
MRHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME),
info.getSplitsMetaInfoFile());
RemoteIterator<LocatedFileStatus> files =
remoteFs.listFiles(newSplitsDir, false);
boolean foundSplitsFile = false;
boolean foundMetaFile = false;
int totalFilesFound = 0;
while (files.hasNext()) {
LocatedFileStatus status = files.next();
String fName = status.getPath().getName();
totalFilesFound++;
if (fName.equals(MRHelpers.JOB_SPLIT_RESOURCE_NAME)) {
foundSplitsFile = true;
} else if (fName.equals(MRHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
foundMetaFile = true;
} else {
Assert.fail("Found invalid file in splits dir, filename=" + fName);
}
Assert.assertTrue(status.getLen() > 0);
}
Assert.assertEquals(2, totalFilesFound);
Assert.assertTrue(foundSplitsFile);
Assert.assertTrue(foundMetaFile);
verifyLocationHints(newSplitsDir, info.getTaskLocationHints());
}
private InputSplitInfo generateOldSplits(Path inputSplitsDir)
throws Exception {
JobConf jobConf = new JobConf();
jobConf.setUseNewMapper(false);
jobConf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString());
return MRHelpers.generateInputSplits(jobConf, inputSplitsDir);
}
@Test
public void testOldSplitsGen() throws Exception {
InputSplitInfo info = generateOldSplits(oldSplitsDir);
Assert.assertEquals(new Path(oldSplitsDir,
MRHelpers.JOB_SPLIT_RESOURCE_NAME),
info.getSplitsFile());
Assert.assertEquals(new Path(oldSplitsDir,
MRHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME),
info.getSplitsMetaInfoFile());
RemoteIterator<LocatedFileStatus> files =
remoteFs.listFiles(oldSplitsDir, false);
boolean foundSplitsFile = false;
boolean foundMetaFile = false;
int totalFilesFound = 0;
while (files.hasNext()) {
LocatedFileStatus status = files.next();
String fName = status.getPath().getName();
totalFilesFound++;
if (fName.equals(MRHelpers.JOB_SPLIT_RESOURCE_NAME)) {
foundSplitsFile = true;
} else if (fName.equals(MRHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
foundMetaFile = true;
} else {
Assert.fail("Found invalid file in splits dir, filename=" + fName);
}
Assert.assertTrue(status.getLen() > 0);
}
Assert.assertEquals(2, totalFilesFound);
Assert.assertTrue(foundSplitsFile);
Assert.assertTrue(foundMetaFile);
verifyLocationHints(oldSplitsDir, info.getTaskLocationHints());
}
@Test
public void testInputSplitLocalResourceCreation() throws Exception {
InputSplitInfo inputSplitInfo = generateOldSplits(oldSplitsDir);
Map<String, LocalResource> localResources =
new HashMap<String, LocalResource>();
localResources.put("job.split", null);
try {
MRHelpers.updateLocalResourcesForInputSplits(remoteFs,
inputSplitInfo, localResources);
fail("Expected failure for job.split override in local resources map");
} catch (RuntimeException e) {
// Expected
}
localResources.clear();
MRHelpers.updateLocalResourcesForInputSplits(remoteFs,
inputSplitInfo, localResources);
Assert.assertEquals(2, localResources.size());
Assert.assertTrue(localResources.containsKey(
MRHelpers.JOB_SPLIT_RESOURCE_NAME));
Assert.assertTrue(localResources.containsKey(
MRHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));
}
private Configuration createConfForJavaOptsTest() {
Configuration conf = new Configuration(false);
conf.set(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS, "fooMapAdminOpts");
conf.set(MRJobConfig.MAP_JAVA_OPTS, "fooMapJavaOpts");
conf.set(MRJobConfig.MAP_LOG_LEVEL, "FATAL");
conf.set(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS, "fooReduceAdminOpts");
conf.set(MRJobConfig.REDUCE_JAVA_OPTS, "fooReduceJavaOpts");
conf.set(MRJobConfig.REDUCE_LOG_LEVEL, "TRACE");
return conf;
}
@Test
public void testMapJavaOptions() {
Configuration conf = createConfForJavaOptsTest();
String opts = MRHelpers.getMapJavaOpts(conf);
Assert.assertTrue(opts.contains("fooMapAdminOpts"));
Assert.assertTrue(opts.contains(" fooMapJavaOpts "));
Assert.assertFalse(opts.contains("fooReduceAdminOpts "));
Assert.assertFalse(opts.contains(" fooReduceJavaOpts "));
Assert.assertTrue(opts.indexOf("fooMapAdminOpts")
< opts.indexOf("fooMapJavaOpts"));
Assert.assertTrue(opts.contains(" -D"
+ TezConfiguration.TEZ_ROOT_LOGGER_NAME + "=FATAL"));
Assert.assertFalse(opts.contains(" -D"
+ TezConfiguration.TEZ_ROOT_LOGGER_NAME + "=TRACE"));
}
@Test
public void testReduceJavaOptions() {
Configuration conf = createConfForJavaOptsTest();
String opts = MRHelpers.getReduceJavaOpts(conf);
Assert.assertFalse(opts.contains("fooMapAdminOpts"));
Assert.assertFalse(opts.contains(" fooMapJavaOpts "));
Assert.assertTrue(opts.contains("fooReduceAdminOpts"));
Assert.assertTrue(opts.contains(" fooReduceJavaOpts "));
Assert.assertTrue(opts.indexOf("fooReduceAdminOpts")
< opts.indexOf("fooReduceJavaOpts"));
Assert.assertFalse(opts.contains(" -D"
+ TezConfiguration.TEZ_ROOT_LOGGER_NAME + "=FATAL"));
Assert.assertTrue(opts.contains(" -D"
+ TezConfiguration.TEZ_ROOT_LOGGER_NAME + "=TRACE"));
}
@Test
public void testContainerResourceConstruction() {
JobConf conf = new JobConf(new Configuration());
Resource mapResource = MRHelpers.getMapResource(conf);
Resource reduceResource = MRHelpers.getReduceResource(conf);
Assert.assertEquals(MRJobConfig.DEFAULT_MAP_CPU_VCORES,
mapResource.getVirtualCores());
Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB,
mapResource.getMemory());
Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES,
reduceResource.getVirtualCores());
Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
reduceResource.getMemory());
conf.setInt(MRJobConfig.MAP_CPU_VCORES, 2);
conf.setInt(MRJobConfig.MAP_MEMORY_MB, 123);
conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 20);
conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 1234);
mapResource = MRHelpers.getMapResource(conf);
reduceResource = MRHelpers.getReduceResource(conf);
Assert.assertEquals(2, mapResource.getVirtualCores());
Assert.assertEquals(123, mapResource.getMemory());
Assert.assertEquals(20, reduceResource.getVirtualCores());
Assert.assertEquals(1234, reduceResource.getMemory());
}
private Configuration setupConfigForMREnvTest() {
JobConf conf = new JobConf(new Configuration());
conf.set(MRJobConfig.MAP_ENV, "foo=map1,bar=map2");
conf.set(MRJobConfig.REDUCE_ENV, "foo=red1,bar=red2");
conf.set(MRJobConfig.MAP_LOG_LEVEL, "TRACE");
conf.set(MRJobConfig.REDUCE_LOG_LEVEL, "FATAL");
conf.set(MRJobConfig.MAPRED_ADMIN_USER_ENV, "LD_LIBRARY_PATH=$TEZ_ADMIN_ENV_TEST/lib/native");
return conf;
}
private void testCommonEnvSettingsForMRTasks(Map<String, String> env) {
Assert.assertTrue(env.containsKey("foo"));
Assert.assertTrue(env.containsKey("bar"));
Assert.assertTrue(env.containsKey(Environment.LD_LIBRARY_PATH.name()));
Assert.assertTrue(env.containsKey(Environment.SHELL.name()));
Assert.assertTrue(env.containsKey("HADOOP_ROOT_LOGGER"));
Assert.assertEquals("$PWD:$TEZ_ADMIN_ENV_TEST/lib/native",
env.get(Environment.LD_LIBRARY_PATH.name()));
// TEZ-273 will reinstate this or similar.
// for (String val : YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH) {
// Assert.assertTrue(env.get(Environment.CLASSPATH.name()).contains(val));
// }
// Assert.assertTrue(0 ==
// env.get(Environment.CLASSPATH.name()).indexOf(Environment.PWD.$()));
}
@Test
public void testMREnvSetupForMap() {
Configuration conf = setupConfigForMREnvTest();
Map<String, String> env = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(conf, env, true);
testCommonEnvSettingsForMRTasks(env);
Assert.assertEquals("map1", env.get("foo"));
Assert.assertEquals("map2", env.get("bar"));
}
@Test
public void testMREnvSetupForReduce() {
Configuration conf = setupConfigForMREnvTest();
Map<String, String> env = new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRTasks(conf, env, false);
testCommonEnvSettingsForMRTasks(env);
Assert.assertEquals("red1", env.get("foo"));
Assert.assertEquals("red2", env.get("bar"));
}
@Test
public void testGetBaseMRConf() {
Configuration conf = MRHelpers.getBaseMRConfiguration();
Assert.assertNotNull(conf);
conf = MRHelpers.getBaseMRConfiguration(new YarnConfiguration());
Assert.assertNotNull(conf);
}
@Test
public void testMRAMJavaOpts() {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, " -Dadminfoobar ");
conf.set(MRJobConfig.MR_AM_COMMAND_OPTS, " -Duserfoo ");
String opts = MRHelpers.getMRAMJavaOpts(conf);
Assert.assertEquals("-Dadminfoobar -Duserfoo", opts);
}
@Test
public void testMRAMEnvironmentSetup() {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV, "foo=bar,admin1=foo1");
conf.set(MRJobConfig.MR_AM_ENV, "foo=bar2,user=foo2");
Map<String, String> env =
new HashMap<String, String>();
MRHelpers.updateEnvironmentForMRAM(conf, env);
Assert.assertEquals("foo1", env.get("admin1"));
Assert.assertEquals("foo2", env.get("user"));
Assert.assertEquals(("bar" + File.pathSeparator + "bar2"), env.get("foo"));
}
}