package hip.ch8; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.StringUtils; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; public class JobHelper { public static final Log log = LogFactory.getLog(JobHelper.class.getName()); public static void addDirToCache(File dir, FileSystem fs, Set<String> localUrls) { if (null == dir) { return; } for (File libfile : dir.listFiles()) { if (libfile.exists() && !libfile.isDirectory() && libfile.getName().endsWith("jar")) { addToCache(libfile.toString(), fs, localUrls); } } } public static void addToCache(String file, FileSystem fs, Set<String> localUrls) { if (null == file) { return; } Path p = new Path(file); String qualified = p.makeQualified(fs).toString(); localUrls.add(qualified); } public static void addToCache(Configuration conf) throws IOException { String mavenDependencies = System.getProperty("MVN_CLASSPATH"); for(String path: org.apache.commons.lang.StringUtils.split(mavenDependencies, ":")) { addJarForJob(conf, path); } } public static void addJarForJob(Configuration conf, String localFile) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Set<String> localUrls = new HashSet<String>(); addToCache(localFile, fs, localUrls); // If we didn't put anything in our set, then there's nothing to cache. if (localUrls.isEmpty()) { return; } // Add these to the 'tmpjars' array, which the MR JobSubmitter // will upload to HDFS and put in the DistributedCache libjars. String tmpjars = conf.get("tmpjars"); StringBuilder sb = new StringBuilder(); if (null != tmpjars) { sb.append(tmpjars); sb.append(","); } sb.append( StringUtils.arrayToString(localUrls.toArray(new String[0]))); conf.set("tmpjars", sb.toString()); //log.info("tmpjars = " + conf.get("tmpjars")); } }