JarManager.java example

Explorer
spork-streaming-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pig.impl.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.net.URLDecoder;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import org.antlr.runtime.CommonTokenStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
import org.apache.pig.impl.PigContext;
import org.apache.tools.bzip2r.BZip2Constants;
import org.codehaus.jackson.annotate.JsonPropertyOrder;
import org.codehaus.jackson.map.annotate.JacksonStdImpl;
import org.joda.time.DateTime;

import com.google.common.collect.Multimaps;

import dk.brics.automaton.Automaton;

public class JarManager {

    private static Log log = LogFactory.getLog(JarManager.class);
    /**
     * A container class to track the Jar files that need to be merged together to submit to Hadoop.
     */
    private static class JarListEntry {
        /**
         * The name of the Jar file to merge in.
         */
        String jar;
        /**
         * If this field is not null, only entries that start with this prefix will be merged in.
         */
        String prefix;

        JarListEntry(String jar, String prefix) {
            this.jar = jar;
            this.prefix = prefix;
        }

        @Override
        public boolean equals(Object obj) {
            if (!(obj instanceof JarListEntry))
                return false;
            JarListEntry other = (JarListEntry) obj;
            if (!jar.equals(other.jar))
                return false;
            if (prefix == null)
                return other.prefix == null;
            return prefix.equals(other.prefix);
        }

        @Override
        public int hashCode() {
            return jar.hashCode() + (prefix == null ? 1 : prefix.hashCode());
        }
    }

    private static enum DefaultPigPackages {

        PIG("org/apache/pig", PigMapReduce.class),
        BZIP2R("org/apache/tools/bzip2r", BZip2Constants.class),
        AUTOMATON("dk/brics/automaton", Automaton.class),
        ANTLR("org/antlr/runtime", CommonTokenStream.class),
        GUAVA("com/google/common", Multimaps.class),
        JACKSON_CORE("org/codehaus/jackson", JsonPropertyOrder.class),
        JACKSON_MAPPER("org/codehaus/jackson", JacksonStdImpl.class),
        JODATIME("org/joda/time", DateTime.class);

        private final String pkgPrefix;
        private final Class pkgClass;

        DefaultPigPackages(String pkgPrefix, Class pkgClass) {
            this.pkgPrefix = pkgPrefix;
            this.pkgClass = pkgClass;
        }

        public String getPkgPrefix() {
            return pkgPrefix;
        }

        public Class getPkgClass() {
            return pkgClass;
        }
    }

    /**
     * Create a jarfile in a temporary path, that is a merge of all the jarfiles containing the
     * functions and the core pig classes.
     * 
     * @param funcs
     *            the functions that will be used in a job and whose jar files need to be included
     *            in the final merged jar file.
     * @throws ClassNotFoundException
     * @throws IOException
     */
    @SuppressWarnings("deprecation")
    public static void createJar(OutputStream os, Set<String> funcs, PigContext pigContext) throws ClassNotFoundException, IOException {
        Vector<JarListEntry> jarList = new Vector<JarListEntry>();
        for (DefaultPigPackages pkgToSend : DefaultPigPackages.values()) {
            addContainingJar(jarList, pkgToSend.getPkgClass(), pkgToSend.getPkgPrefix(), pigContext);
        }

        for (String func: funcs) {
            Class clazz = pigContext.getClassForAlias(func);
            if (clazz != null) {
                addContainingJar(jarList, clazz, null, pigContext);
            }
        }
        HashMap<String, String> contents = new HashMap<String, String>();
        JarOutputStream jarFile = new JarOutputStream(os);
        Iterator<JarListEntry> it = jarList.iterator();
        while (it.hasNext()) {
            JarListEntry jarEntry = it.next();
            // log.error("Adding " + jarEntry.jar + ":" + jarEntry.prefix);
            mergeJar(jarFile, jarEntry.jar, jarEntry.prefix, contents);
        }
        for (String scriptJar: pigContext.scriptJars) {
            mergeJar(jarFile, scriptJar, null, contents);
        }
        for (String path: pigContext.scriptFiles) {
            log.debug("Adding entry " + path + " to job jar" );
            InputStream stream = null;
            if (new File(path).exists()) {
                stream = new FileInputStream(new File(path));
            } else {
                stream = PigContext.getClassLoader().getResourceAsStream(path);
            }
            if (stream==null) {
                throw new IOException("Cannot find " + path);
            }
        	addStream(jarFile, path, stream, contents);
        }
        for (Map.Entry<String, File> entry : pigContext.getScriptFiles().entrySet()) {
            log.debug("Adding entry " + entry.getKey() + " to job jar" );
            InputStream stream = null;
            if (entry.getValue().exists()) {
                stream = new FileInputStream(entry.getValue());
            } else {
                stream = PigContext.getClassLoader().getResourceAsStream(entry.getValue().getPath());
            }
            if (stream==null) {
                throw new IOException("Cannot find " + entry.getValue().getPath());
            }
        	addStream(jarFile, entry.getKey(), stream, contents);
        }

        log.debug("Adding entry pigContext to job jar" );
        jarFile.putNextEntry(new ZipEntry("pigContext"));
        new ObjectOutputStream(jarFile).writeObject(pigContext);
        jarFile.close();
    }

    /**
     * Creates a Classloader based on the passed jarFile and any extra jar files.
     * 
     * @param jarFile
     *            the jar file to be part of the newly created Classloader. This jar file plus any
     *            jars in the extraJars list will constitute the classpath.
     * @return the new Classloader.
     * @throws MalformedURLException
     */
    static ClassLoader createCl(String jarFile, PigContext pigContext) throws MalformedURLException {
        int len = pigContext.extraJars.size();
        int passedJar = jarFile == null ? 0 : 1;
        URL urls[] = new URL[len + passedJar];
        if (jarFile != null) {
            urls[0] = new URL("file:" + jarFile);
        }
        for (int i = 0; i < pigContext.extraJars.size(); i++) {
            urls[i + passedJar] = new URL("file:" + pigContext.extraJars.get(i));
        }
        return new URLClassLoader(urls, PigMapReduce.class.getClassLoader());
    }
    

    /**
     * Merge one Jar file into another.
     * 
     * @param jarFile
     *            the stream of the target jar file.
     * @param jar
     *            the name of the jar file to be merged.
     * @param prefix
     *            if not null, only entries in jar that start with this prefix will be merged.
     * @param contents
     *            the current contents of jarFile. (Use to prevent duplicate entries.)
     * @throws FileNotFoundException
     * @throws IOException
     */
    private static void mergeJar(JarOutputStream jarFile, String jar, String prefix, Map<String, String> contents)
            throws FileNotFoundException, IOException {
        JarInputStream jarInput = new JarInputStream(new FileInputStream(jar));
        log.debug("Adding jar " + jar + (prefix != null ? " for prefix "+prefix : "" ) + " to job jar" );
        mergeJar(jarFile, jarInput, prefix, contents);
    }
    
    private static void mergeJar(JarOutputStream jarFile, URL jar, String prefix, Map<String, String> contents)
    throws FileNotFoundException, IOException {
        JarInputStream jarInput = new JarInputStream(jar.openStream());

        mergeJar(jarFile, jarInput, prefix, contents);
    }

    private static void mergeJar(JarOutputStream jarFile, JarInputStream jarInput, String prefix, Map<String, String> contents)
    throws FileNotFoundException, IOException {
        JarEntry entry;
        while ((entry = jarInput.getNextJarEntry()) != null) {
            if (prefix != null && !entry.getName().startsWith(prefix)) {
                continue;
            }
            addStream(jarFile, entry.getName(), jarInput, contents);
        }
    }
        /**
     * Adds a stream to a Jar file.
     * 
     * @param os
     *            the OutputStream of the Jar file to which the stream will be added.
     * @param name
     *            the name of the stream.
     * @param is
     *            the stream to add.
     * @param contents
     *            the current contents of the Jar file. (We use this to avoid adding two streams
     *            with the same name.
     * @throws IOException
     */
    private static void addStream(JarOutputStream os, String name, InputStream is, Map<String, String> contents)
            throws IOException {
        if (contents.get(name) != null) {
            return;
        }
        contents.put(name, "");
        os.putNextEntry(new JarEntry(name));
        byte buffer[] = new byte[4096];
        int rc;
        while ((rc = is.read(buffer)) > 0) {
            os.write(buffer, 0, rc);
        }
    }


    
    /**
     * Adds the Jar file containing the given class to the list of jar files to be merged.
     * 
     * @param jarList
     *            the list of jar files to be merged.
     * @param clazz
     *            the class in a jar file to be merged.
     * @param prefix
     *            if not null, only resources from the jar file that start with this prefix will be
     *            merged.
     */
    private static void addContainingJar(Vector<JarListEntry> jarList, Class clazz, String prefix, PigContext pigContext) {
        String jar = findContainingJar(clazz);
        if (pigContext.skipJars.contains(jar) && prefix == null)
            return;
        if (jar == null)
        {
            //throw new RuntimeException("Couldn't find the jar for " + clazz.getName());
            log.warn("Couldn't find the jar for " + clazz.getName() + ", skip it");
            return;
        }
        JarListEntry jarListEntry = new JarListEntry(jar, prefix);
        if (!jarList.contains(jarListEntry))
            jarList.add(jarListEntry);
    }


    /**
     * Find a jar that contains a class of the same name, if any. It will return a jar file, even if
     * that is not the first thing on the class path that has a class with the same name.
     * 
     * @param my_class
     *            the class to find
     * @return a jar file that contains the class, or null
     * @throws IOException
     */
    public static String findContainingJar(Class my_class) {
        ClassLoader loader = PigContext.getClassLoader();
        String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
        try {
            Enumeration<URL> itr = null;
            //Try to find the class in registered jars
            if (loader instanceof URLClassLoader) {
                itr = ((URLClassLoader) loader).findResources(class_file);
            }
            //Try system classloader if not URLClassLoader or no resources found in URLClassLoader
            if (itr == null || !itr.hasMoreElements()) {
                itr = loader.getResources(class_file);
            }
            for (; itr.hasMoreElements();) {
                URL url = (URL) itr.nextElement();
                if ("jar".equals(url.getProtocol())) {
                    String toReturn = url.getPath();
                    if (toReturn.startsWith("file:")) {
                        toReturn = toReturn.substring("file:".length());
                    }
                    // URLDecoder is a misnamed class, since it actually decodes
                    // x-www-form-urlencoded MIME type rather than actual
                    // URL encoding (which the file path has). Therefore it would
                    // decode +s to ' 's which is incorrect (spaces are actually
                    // either unencoded or encoded as "%20"). Replace +s first, so
                    // that they are kept sacred during the decoding process.
                    toReturn = toReturn.replaceAll("\\+", "%2B");
                    toReturn = URLDecoder.decode(toReturn, "UTF-8");
                    return toReturn.replaceAll("!.*$", "");
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return null;
    }

}