/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to You under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.blur.mapreduce.lib; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.blur.log.Log; import org.apache.blur.log.LogFactory; import org.apache.blur.lucene.security.DocumentVisibility; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.StringUtils; /** * This utility code was taken from HBase to locate classes and the jars files * to add to the MapReduce Job. */ public class BlurMapReduceUtil { private final static Log LOG = LogFactory.getLog(BlurMapReduceUtil.class); /** * Add the Blur dependency jars as well as jars for any of the configured job * classes to the job configuration, so that JobClient will ship them to the * cluster and add them to the DistributedCache. */ public static void addDependencyJars(Job job) throws IOException { try { addDependencyJars(job.getConfiguration(), org.apache.zookeeper.ZooKeeper.class, job.getMapOutputKeyClass(), job.getMapOutputValueClass(), job.getInputFormatClass(), job.getOutputKeyClass(), job.getOutputValueClass(), job.getOutputFormatClass(), job.getPartitionerClass(), job.getCombinerClass(), DocumentVisibility.class); addAllJarsInBlurLib(job.getConfiguration()); } catch (ClassNotFoundException e) { throw new IOException(e); } } /** * Adds all the jars in the same path as the blur jar files. * * @param conf * @throws IOException */ public static void addAllJarsInBlurLib(Configuration conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); jars.addAll(conf.getStringCollection("tmpjars")); String property = System.getProperty("java.class.path"); String[] files = property.split("\\:"); String blurLibPath = getPath("blur-", files); if (blurLibPath == null) { return; } List<String> pathes = getPathes(blurLibPath, files); for (String pathStr : pathes) { Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path); continue; } jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); } private static List<String> getPathes(String path, String[] files) { List<String> pathes = new ArrayList<String>(); for (String file : files) { if (file.startsWith(path)) { pathes.add(file); } } return pathes; } private static String getPath(String startsWith, String[] files) { for (String file : files) { int lastIndexOf = file.lastIndexOf('/'); String fileName = file.substring(lastIndexOf + 1); if (fileName.startsWith(startsWith)) { return file.substring(0, lastIndexOf); } } return null; } /** * Add the jars containing the given classes to the job's configuration such * that JobClient will ship them to the cluster and add them to the * DistributedCache. */ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) { continue; } String pathStr = findOrCreateJar(clazz); if (pathStr == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); } /** * If org.apache.hadoop.util.JarFinder is available (0.23+ hadoop), finds the * Jar for a class or creates it if it doesn't exist. If the class is in a * directory in the classpath, it creates a Jar on the fly with the contents * of the directory and returns the path to that Jar. If a Jar is created, it * is created in the system temporary directory. * * Otherwise, returns an existing jar that contains a class of the same name. * * @param my_class * the class to find. * @return a jar file that contains the class, or null. * @throws IOException */ private static String findOrCreateJar(Class<?> my_class) throws IOException { try { Class<?> jarFinder = Class.forName("org.apache.hadoop.util.JarFinder"); // hadoop-0.23 has a JarFinder class that will create the jar // if it doesn't exist. Note that this is needed to run the mapreduce // unit tests post-0.23, because mapreduce v2 requires the relevant jars // to be in the mr cluster to do output, split, etc. At unit test time, // the hbase jars do not exist, so we need to create some. Note that we // can safely fall back to findContainingJars for pre-0.23 mapreduce. Method m = jarFinder.getMethod("getJar", Class.class); return (String) m.invoke(null, my_class); } catch (InvocationTargetException ite) { // function was properly called, but threw it's own exception throw new IOException(ite.getCause()); } catch (Exception e) { // ignore all other exceptions. related to reflection failure } LOG.debug("New JarFinder: org.apache.hadoop.util.JarFinder.getJar " + "not available. Using old findContainingJar"); return findContainingJar(my_class); } /** * Find a jar that contains a class of the same name, if any. It will return a * jar file, even if that is not the first thing on the class path that has a * class with the same name. * * This is shamelessly copied from JobConf * * @param my_class * the class to find. * @return a jar file that contains the class, or null. * @throws IOException */ private static String findContainingJar(Class<?> my_class) { ClassLoader loader = my_class.getClassLoader(); String class_file = my_class.getName().replaceAll("\\.", "/") + ".class"; try { for (Enumeration<URL> itr = loader.getResources(class_file); itr.hasMoreElements();) { URL url = itr.nextElement(); if ("jar".equals(url.getProtocol())) { String toReturn = url.getPath(); if (toReturn.startsWith("file:")) { toReturn = toReturn.substring("file:".length()); } // URLDecoder is a misnamed class, since it actually decodes // x-www-form-urlencoded MIME type rather than actual // URL encoding (which the file path has). Therefore it would // decode +s to ' 's which is incorrect (spaces are actually // either unencoded or encoded as "%20"). Replace +s first, so // that they are kept sacred during the decoding process. toReturn = toReturn.replaceAll("\\+", "%2B"); toReturn = URLDecoder.decode(toReturn, "UTF-8"); return toReturn.replaceAll("!.*$", ""); } } } catch (IOException e) { throw new RuntimeException(e); } return null; } }