/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.templeton.tool; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.ws.rs.core.UriBuilder; import org.apache.hadoop.hive.common.LogUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.apache.hive.hcatalog.templeton.UgiFactory; import org.apache.hive.hcatalog.templeton.BadParam; /** * General utility methods. */ public class TempletonUtils { private static final Logger LOG = LoggerFactory.getLogger(TempletonUtils.class); /** * Is the object non-empty? */ public static boolean isset(String s) { return (s != null) && (s.length() > 0); } /** * Is the object non-empty? */ public static boolean isset(char ch) { return (ch != 0); } /** * Is the object non-empty? */ public static <T> boolean isset(T[] a) { return (a != null) && (a.length > 0); } /** * Is the object non-empty? */ public static <T> boolean isset(Collection<T> col) { return (col != null) && (!col.isEmpty()); } /** * Is the object non-empty? */ public static <K, V> boolean isset(Map<K, V> col) { return (col != null) && (!col.isEmpty()); } //looking for map 100% reduce 100% public static final Pattern JAR_COMPLETE = Pattern.compile(" map \\d+%\\s+reduce \\d+%$"); public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); //looking for map = 100%, reduce = 100% public static final Pattern HIVE_COMPLETE = Pattern.compile(" map = (\\d+%),\\s+reduce = (\\d+%).*$"); /** * Hive on Tez produces progress report that looks like this * Map 1: -/- Reducer 2: 0/1 * Map 1: -/- Reducer 2: 0(+1)/1 * Map 1: -/- Reducer 2: 1/1 * * -/- means there are no tasks (yet) * 0/1 means 1 total tasks, 0 completed * 1(+2)/3 means 3 total, 1 completed and 2 running * * HIVE-8495, in particular https://issues.apache.org/jira/secure/attachment/12675504/Screen%20Shot%202014-10-16%20at%209.35.26%20PM.png * has more examples. * To report progress, we'll assume all tasks are equal size and compute "completed" as percent of "total" * "(Map|Reducer) (\\d+:) ((-/-)|(\\d+(\\(\\+\\d+\\))?/\\d+))" is the complete pattern but we'll drop "-/-" to exclude * groups that don't add information such as "Map 1: -/-" */ public static final Pattern HIVE_TEZ_COMPLETE = Pattern.compile("(Map|Reducer) (\\d+:) (\\d+(\\(\\+\\d+\\))?/\\d+)"); /** * Pig on Tez produces progress report that looks like this * DAG Status: status=RUNNING, progress=TotalTasks: 3 Succeeded: 0 Running: 0 Failed: 0 Killed: 0 * * Use Succeeded/TotalTasks to report progress * There is a hole as Pig might launch more than one DAGs. If this happens, user might * see progress rewind since the percentage is for the new DAG. To fix this, We need to fix * Pig print total number of DAGs on console, and track complete DAGs in WebHCat. */ public static final Pattern PIG_TEZ_COMPLETE = Pattern.compile("progress=TotalTasks: (\\d+) Succeeded: (\\d+)"); public static final Pattern TEZ_COUNTERS = Pattern.compile("\\d+"); /** * Extract the percent complete line from Pig or Jar jobs. */ public static String extractPercentComplete(String line) { Matcher jar = JAR_COMPLETE.matcher(line); if (jar.find()) return jar.group().trim(); Matcher pig = PIG_COMPLETE.matcher(line); if (pig.find()) return pig.group().trim(); Matcher hive = HIVE_COMPLETE.matcher(line); if(hive.find()) { return "map " + hive.group(1) + " reduce " + hive.group(2); } Matcher hiveTez = HIVE_TEZ_COMPLETE.matcher(line); if(hiveTez.find()) { int totalTasks = 0; int completedTasks = 0; do { //here each group looks something like "Map 2: 2/4" "Reducer 3: 1(+2)/4" //just parse the numbers and ignore one from "Map 2" and from "(+2)" if it's there Matcher counts = TEZ_COUNTERS.matcher(hiveTez.group()); List<String> items = new ArrayList<String>(4); while(counts.find()) { items.add(counts.group()); } completedTasks += Integer.parseInt(items.get(1)); if(items.size() == 3) { totalTasks += Integer.parseInt(items.get(2)); } else { totalTasks += Integer.parseInt(items.get(3)); } } while(hiveTez.find()); if(totalTasks == 0) { return "0% complete (0 total tasks)"; } return completedTasks * 100 / totalTasks + "% complete"; } Matcher pigTez = PIG_TEZ_COMPLETE.matcher(line); if(pigTez.find()) { int totalTasks = Integer.parseInt(pigTez.group(1)); int completedTasks = Integer.parseInt(pigTez.group(2)); if(totalTasks == 0) { return "0% complete (0 total tasks)"; } return completedTasks * 100 / totalTasks + "% complete"; } return null; } public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$"); public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$"); public static final Pattern[] ID_PATTERNS = {JAR_ID, PIG_ID}; /** * Extract the job id from jar jobs. */ public static String extractChildJobId(String line) { for (Pattern p : ID_PATTERNS) { Matcher m = p.matcher(line); if (m.find()) return m.group(1); } return null; } /** * Take an array of strings and encode it into one string. */ public static String encodeArray(String[] plain) { if (plain == null) return null; String[] escaped = new String[plain.length]; for (int i = 0; i < plain.length; ++i) { if (plain[i] == null) { plain[i] = ""; } escaped[i] = StringUtils.escapeString(plain[i]); } return StringUtils.arrayToString(escaped); } /** * Encode a List into a string. */ public static String encodeArray(List<String> list) { if (list == null) return null; String[] array = new String[list.size()]; return encodeArray(list.toArray(array)); } /** * Take an encode strings and decode it into an array of strings. */ public static String[] decodeArray(String s) { if (s == null) return null; String[] escaped = StringUtils.split(s); String[] plain = new String[escaped.length]; for (int i = 0; i < escaped.length; ++i) plain[i] = StringUtils.unEscapeString(escaped[i]); return plain; } public static String[] hadoopFsListAsArray(String files, Configuration conf, String user) throws URISyntaxException, FileNotFoundException, IOException, InterruptedException { if (files == null || conf == null) { return null; } String[] dirty = files.split(","); String[] clean = new String[dirty.length]; for (int i = 0; i < dirty.length; ++i) clean[i] = hadoopFsFilename(dirty[i], conf, user); return clean; } public static String hadoopFsListAsString(String files, Configuration conf, String user) throws URISyntaxException, FileNotFoundException, IOException, InterruptedException { if (files == null || conf == null) { return null; } return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user)); } public static String hadoopFsFilename(String fname, Configuration conf, String user) throws URISyntaxException, FileNotFoundException, IOException, InterruptedException { Path p = hadoopFsPath(fname, conf, user); if (p == null) return null; else return p.toString(); } /** * Returns all files (non-recursive) in {@code dirName} */ public static List<Path> hadoopFsListChildren(String dirName, Configuration conf, String user) throws URISyntaxException, IOException, InterruptedException { Path p = hadoopFsPath(dirName, conf, user); FileSystem fs = p.getFileSystem(conf); if(!fs.exists(p)) { return Collections.emptyList(); } FileStatus[] children = fs.listStatus(p); if(!isset(children)) { return Collections.emptyList(); } List<Path> files = new ArrayList<Path>(); for(FileStatus stat : children) { files.add(stat.getPath()); } return files; } /** * @return true iff we are sure the file is not there. */ public static boolean hadoopFsIsMissing(FileSystem fs, Path p) { try { return !fs.exists(p); } catch (Throwable t) { // Got an error, might be there anyway due to a // permissions problem. return false; } } public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user) throws IOException, URISyntaxException { URI uri = new URI(origPathStr); if (uri.getPath().isEmpty()) { String newPath = "/user/" + user; uri = UriBuilder.fromUri(uri).replacePath(newPath).build(); } else if (!new Path(uri.getPath()).isAbsolute()) { String newPath = "/user/" + user + "/" + uri.getPath(); uri = UriBuilder.fromUri(uri).replacePath(newPath).build(); } // no work needed for absolute paths return uri.toString(); } public static Path hadoopFsPath(String fname, final Configuration conf, String user) throws URISyntaxException, IOException, InterruptedException { if (fname == null || conf == null) { return null; } UserGroupInformation ugi; if (user!=null) { ugi = UgiFactory.getUgi(user); } else { ugi = UserGroupInformation.getLoginUser(); } final String finalFName = new String(fname); final FileSystem defaultFs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() { @Override public FileSystem run() throws URISyntaxException, IOException, InterruptedException { return FileSystem.get(new URI(finalFName), conf); } }); fname = addUserHomeDirectoryIfApplicable(fname, user); URI u = new URI(fname); Path p = new Path(u).makeQualified(defaultFs); if (hadoopFsIsMissing(defaultFs, p)) throw new FileNotFoundException("File " + fname + " does not exist."); FileSystem.closeAllForUGI(ugi); return p; } /** * GET the given url. Returns the number of bytes received. */ public static int fetchUrl(URL url) throws IOException { URLConnection cnx = url.openConnection(); InputStream in = cnx.getInputStream(); byte[] buf = new byte[8192]; int total = 0; int len = 0; while ((len = in.read(buf)) >= 0) total += len; return total; } /** * Set the environment variables to specify the hadoop user. */ public static Map<String, String> hadoopUserEnv(String user, String overrideClasspath) { HashMap<String, String> env = new HashMap<String, String>(); env.put("HADOOP_USER_NAME", user); if (overrideClasspath != null) { env.put("HADOOP_USER_CLASSPATH_FIRST", "true"); String cur = System.getenv("HADOOP_CLASSPATH"); if (TempletonUtils.isset(cur)) overrideClasspath = overrideClasspath + ":" + cur; env.put("HADOOP_CLASSPATH", overrideClasspath); } return env; } /** * replaces all occurrences of "\," with ","; returns {@code s} if no modifications needed */ public static String unEscapeString(String s) { return s != null && s.contains("\\,") ? StringUtils.unEscapeString(s) : s; } /** * Find a jar that contains a class of the same name and which * file name matches the given pattern. * * @param clazz the class to find. * @param fileNamePattern regex pattern that must match the jar full path * @return a jar file that contains the class, or null */ public static String findContainingJar(Class<?> clazz, String fileNamePattern) { ClassLoader loader = clazz.getClassLoader(); String classFile = clazz.getName().replaceAll("\\.", "/") + ".class"; try { for(final Enumeration<URL> itr = loader.getResources(classFile); itr.hasMoreElements();) { final URL url = itr.nextElement(); if ("jar".equals(url.getProtocol())) { String toReturn = url.getPath(); if (fileNamePattern == null || toReturn.matches(fileNamePattern)) { toReturn = URLDecoder.decode(toReturn, "UTF-8"); return toReturn.replaceAll("!.*$", ""); } } } } catch (IOException e) { throw new RuntimeException(e); } return null; } public static StringBuilder dumpPropMap(String header, Properties props) { Map<String, String> map = new HashMap<String, String>(); for(Map.Entry<Object, Object> ent : props.entrySet()) { map.put(ent.getKey().toString(), ent.getValue() == null ? null : ent.getValue().toString()); } return dumpPropMap(header, map); } public static StringBuilder dumpPropMap(String header, Map<String, String> map) { StringBuilder sb = new StringBuilder("START").append(header).append(":\n"); List<String> propKeys = new ArrayList<String>(map.keySet()); Collections.sort(propKeys); for(String propKey : propKeys) { if(propKey.toLowerCase().contains("path")) { StringTokenizer st = new StringTokenizer(map.get(propKey), File.pathSeparator); if(st.countTokens() > 1) { sb.append(propKey).append("=\n"); while (st.hasMoreTokens()) { sb.append(" ").append(st.nextToken()).append(File.pathSeparator).append('\n'); } } else { sb.append(propKey).append('=').append(map.get(propKey)).append('\n'); } } else { sb.append(propKey).append('=').append(LogUtils.maskIfPassword(propKey, map.get(propKey))); sb.append('\n'); } } return sb.append("END").append(header).append('\n'); } }