/**
*
*/
package org.archive.hadoop.util;
import java.io.IOException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
/**
* @author kenji
*
*/
public class HadoopUtil {
// code stolen from org.apche.hadoop.hbase.mapreduce.TableMapReduceUtil.
// too bad this is not part of Hadoop rather than Hbase. I just made it free of
// generics-related warnings.
static Log LOG = LogFactory.getLog(HadoopUtil.class);
/**
* Add the jars containing the given classes to the job's configuration
* such that JobClient will ship them to the cluster and add them to
* the DistributedCache.
*/
public static void addDependencyJars(Configuration conf,
Class<?>... classes) throws IOException {
FileSystem localFs = FileSystem.getLocal(conf);
Set<String> jars = new HashSet<String>();
// Add jars that are already in the tmpjars variable
jars.addAll( conf.getStringCollection("tmpjars") );
// Add jars containing the specified classes
for (Class<?> clazz : classes) {
if (clazz == null) continue;
String pathStr = findContainingJar(clazz);
if (pathStr == null) {
LOG.warn("Could not find jar for class " + clazz +
" in order to ship it to the cluster.");
continue;
}
Path path = new Path(pathStr);
if (!localFs.exists(path)) {
LOG.warn("Could not validate jar file " + path + " for class "
+ clazz);
continue;
}
jars.add(path.makeQualified(localFs).toString());
}
if (jars.isEmpty()) return;
conf.set("tmpjars",
StringUtils.arrayToString(jars.toArray(new String[0])));
}
/**
* Find a jar that contains a class of the same name, if any.
* It will return a jar file, even if that is not the first thing
* on the class path that has a class with the same name.
*
* This is shamelessly copied from JobConf
*
* @param my_class the class to find.
* @return a jar file that contains the class, or null.
* @throws IOException
*/
private static String findContainingJar(Class<?> my_class) {
ClassLoader loader = my_class.getClassLoader();
String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
try {
for(Enumeration<URL> itr = loader.getResources(class_file);
itr.hasMoreElements();) {
URL url = itr.nextElement();
if ("jar".equals(url.getProtocol())) {
String toReturn = url.getPath();
if (toReturn.startsWith("file:")) {
toReturn = toReturn.substring("file:".length());
}
// URLDecoder is a misnamed class, since it actually decodes
// x-www-form-urlencoded MIME type rather than actual
// URL encoding (which the file path has). Therefore it would
// decode +s to ' 's which is incorrect (spaces are actually
// either unencoded or encoded as "%20"). Replace +s first, so
// that they are kept sacred during the decoding process.
toReturn = toReturn.replaceAll("\\+", "%2B");
toReturn = URLDecoder.decode(toReturn, "UTF-8");
return toReturn.replaceAll("!.*$", "");
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return null;
}
}