/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.internal.app.runtime.batch.distributed;
import co.cask.cdap.internal.app.runtime.distributed.LocalizeResource;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
/**
* A helper class for dealing with MapReduce framework localization and classpath settings based on different
* hadoop configurations
*/
public final class MapReduceContainerHelper {
private static final Logger LOG = LoggerFactory.getLogger(MapReduceContainerHelper.class);
/**
* Returns a list of path to be used for the MapReduce framework classpath.
*
* @param hConf the configuration for the job.
* @param result a list for appending MR framework classpath
* @return the same {@code result} list from the argument
*/
public static List<String> getMapReduceClassPath(Configuration hConf, List<String> result) {
String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
// For classpath config get from the hConf, we splits it with both "," and ":" because one can set
// the conf with something like "path1,path2:path3" and
// it should become "path1:path2:path3" in the target JVM process
Splitter splitter = Splitter.on(Pattern.compile(",|" + File.pathSeparatorChar)).trimResults().omitEmptyStrings();
// If MR framework is non specified, use yarn.application.classpath and mapreduce.application.classpath
// Otherwise, only use the mapreduce.application.classpath
if (framework == null) {
String yarnClassPath = hConf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
Joiner.on(",").join(YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH));
Iterables.addAll(result, splitter.split(yarnClassPath));
}
// Add MR application classpath
Iterables.addAll(result, splitter.split(hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)));
return result;
}
/**
* Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
*
* @param hConf the job configuration
* @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
*/
@Nullable
public static URI getFrameworkURI(Configuration hConf) {
String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
if (framework == null) {
return null;
}
try {
// Parse the path. It can contains '#' to represent the localized file name
URI uri = new URI(framework);
String linkName = uri.getFragment();
// The following resolution logic is copied from JobSubmitter in MR.
FileSystem fs = FileSystem.get(hConf);
Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
frameworkPath = fc.resolvePath(frameworkPath);
uri = frameworkPath.toUri();
// If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name
if (linkName == null) {
linkName = uri.getPath();
int idx = linkName.lastIndexOf('/');
if (idx >= 0) {
linkName = linkName.substring(idx + 1);
}
}
return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName);
} catch (URISyntaxException e) {
LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.",
framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
} catch (IOException e) {
LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.",
framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
}
return null;
}
/**
* Sets the resources that need to be localized to program runner twill container that used as MapReduce client.
*
* @param hConf The hadoop configuration
* @param localizeResources the map to be updated with the localized resources.
* @return a list of extra classpaths need to be set for the program runner container.
*/
public static List<String> localizeFramework(Configuration hConf, Map<String, LocalizeResource> localizeResources) {
try {
URI frameworkURI = getFrameworkURI(hConf);
// If MR Application framework is used, need to localize the framework file to Twill container
if (frameworkURI != null) {
URI uri = new URI(frameworkURI.getScheme(), frameworkURI.getAuthority(), frameworkURI.getPath(), null, null);
localizeResources.put(frameworkURI.getFragment(), new LocalizeResource(uri, true));
}
return ImmutableList.copyOf(getMapReduceClassPath(hConf, new ArrayList<String>()));
} catch (URISyntaxException e) {
// Shouldn't happen since the frameworkURI is already parsed.
throw Throwables.propagate(e);
}
}
private MapReduceContainerHelper() {
// no-op
}
}