/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Comparator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.ProcessTree; import org.apache.hadoop.util.Shell; import org.apache.log4j.Appender; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; /** * A simple logger to handle the task-specific user logs. * This class uses the system property <code>hadoop.log.dir</code>. * */ public class TaskLog { private static final Log LOG = LogFactory.getLog(TaskLog.class); private static final File LOG_DIR = getLogDir( System.getProperty("hadoop.log.dir")); private static final File JT_LOG_DIR = getLogDir( System.getProperty("hadoop.log.dir") + File.separator + "jtlogs"); static File getLogDir(String hadoopLogDir) { return new File(hadoopLogDir, "userlogs").getAbsoluteFile(); } // localFS is set in (and used by) writeToIndexFile() static LocalFileSystem localFS = null; static { if (!LOG_DIR.exists()) { LOG_DIR.mkdirs(); } } public static File getTaskLogFile(TaskAttemptID taskid, LogName filter) { return new File(getBaseDir(taskid.toString()), filter.toString()); } /** * @deprecated Instead use * {@link #getAllLogsFileDetails(TaskAttemptID, boolean)} to get * the details of all log-files and then use the particular * log-type's detail to call getRealTaskLogFileLocation(String, * LogName) real log-location */ @Deprecated public static File getRealTaskLogFileLocation(TaskAttemptID taskid, LogName filter) { LogFileDetail l; try { Map<LogName, LogFileDetail> allFilesDetails = getAllLogsFileDetails(taskid, false); l = allFilesDetails.get(filter); } catch (IOException ie) { LOG.error("getTaskLogFileDetailgetAllLogsFileDetails threw an exception " + ie); return null; } return new File(getBaseDir(l.location), filter.toString()); } /** * Get the real task-log file-path * * @param location Location of the log-file. This should point to an * attempt-directory. * @param filter * @return * @throws IOException */ static String getRealTaskLogFilePath(String location, LogName filter) throws IOException { return FileUtil.makeShellPath(new File(getBaseDir(location), filter.toString())); } static class LogFileDetail { final static String LOCATION = "LOG_DIR:"; String location; long start; long length; } static Map<LogName, LogFileDetail> getAllLogsFileDetails( TaskAttemptID taskid, boolean isCleanup) throws IOException { Map<LogName, LogFileDetail> allLogsFileDetails = new HashMap<LogName, LogFileDetail>(); File indexFile = getIndexFile(taskid.toString(), isCleanup); BufferedReader fis = new BufferedReader(new java.io.FileReader(indexFile)); //the format of the index file is //LOG_DIR: <the dir where the task logs are really stored> //stdout:<start-offset in the stdout file> <length> //stderr:<start-offset in the stderr file> <length> //syslog:<start-offset in the syslog file> <length> String str = fis.readLine(); if (str == null) { //the file doesn't have anything throw new IOException ("Index file for the log of " + taskid+" doesn't exist."); } String loc = str.substring(str.indexOf(LogFileDetail.LOCATION)+ LogFileDetail.LOCATION.length()); //special cases are the debugout and profile.out files. They are guaranteed //to be associated with each task attempt since jvm reuse is disabled //when profiling/debugging is enabled for (LogName filter : new LogName[] { LogName.DEBUGOUT, LogName.PROFILE }) { LogFileDetail l = new LogFileDetail(); l.location = loc; l.length = new File(getBaseDir(l.location), filter.toString()).length(); l.start = 0; allLogsFileDetails.put(filter, l); } str = fis.readLine(); while (str != null) { LogFileDetail l = new LogFileDetail(); l.location = loc; int idx = str.indexOf(':'); LogName filter = LogName.valueOf(str.substring(0, idx).toUpperCase()); str = str.substring(idx + 1); String[] startAndLen = str.split(" "); l.start = Long.parseLong(startAndLen[0]); l.length = Long.parseLong(startAndLen[1]); allLogsFileDetails.put(filter, l); str = fis.readLine(); } fis.close(); return allLogsFileDetails; } private static File getTmpIndexFile(String taskid) { return new File(getBaseDir(taskid), "log.tmp"); } public static File getIndexFile(String taskid) { return getIndexFile(taskid, false); } public static File getIndexFile(String taskid, boolean isCleanup) { if (isCleanup) { return new File(getBaseDir(taskid), "log.index.cleanup"); } else { return new File(getBaseDir(taskid), "log.index"); } } static File getBaseDir(String taskid) { return new File(LOG_DIR, taskid); } static File getJTBaseDir(String taskid) { return new File(JT_LOG_DIR, taskid); } static final List<LogName> LOGS_TRACKED_BY_INDEX_FILES = Arrays.asList(LogName.STDOUT, LogName.STDERR, LogName.SYSLOG); private static TaskAttemptID currentTaskid; /** * Map to store previous and current lengths. */ private static Map<LogName, Long[]> logLengths = new HashMap<LogName, Long[]>(); static { for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) { logLengths.put(logName, new Long[] { Long.valueOf(0L), Long.valueOf(0L) }); } } static void writeToIndexFile(TaskAttemptID firstTaskid, TaskAttemptID currentTaskid, boolean isCleanup, Map<LogName, Long[]> lengths) throws IOException { // To ensure atomicity of updates to index file, write to temporary index // file first and then rename. File tmpIndexFile = getTmpIndexFile(currentTaskid.toString()); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(tmpIndexFile,false)); DataOutputStream dos = new DataOutputStream(bos); //the format of the index file is //LOG_DIR: <the dir where the task logs are really stored> //STDOUT: <start-offset in the stdout file> <length> //STDERR: <start-offset in the stderr file> <length> //SYSLOG: <start-offset in the syslog file> <length> dos.writeBytes(LogFileDetail.LOCATION + firstTaskid.toString() + "\n"); for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) { Long[] lens = lengths.get(logName); dos.writeBytes(logName.toString() + ":" + lens[0].toString() + " " + Long.toString(lens[1].longValue() - lens[0].longValue()) + "\n");} dos.close(); File indexFile = getIndexFile(currentTaskid.toString(), isCleanup); Path indexFilePath = new Path(indexFile.getAbsolutePath()); Path tmpIndexFilePath = new Path(tmpIndexFile.getAbsolutePath()); if (localFS == null) {// set localFS once localFS = FileSystem.getLocal(new Configuration()); } localFS.rename (tmpIndexFilePath, indexFilePath); } public synchronized static void syncLogs(TaskAttemptID firstTaskid, TaskAttemptID taskid) throws IOException { syncLogs(firstTaskid, taskid, false); } @SuppressWarnings("unchecked") public synchronized static void syncLogs(TaskAttemptID firstTaskid, TaskAttemptID taskid, boolean isCleanup) throws IOException { System.out.flush(); System.err.flush(); Enumeration<Logger> allLoggers = LogManager.getCurrentLoggers(); while (allLoggers.hasMoreElements()) { Logger l = allLoggers.nextElement(); Enumeration<Appender> allAppenders = l.getAllAppenders(); while (allAppenders.hasMoreElements()) { Appender a = allAppenders.nextElement(); if (a instanceof TaskLogAppender) { ((TaskLogAppender)a).flush(); } } } // set start and end for (LogName logName : LOGS_TRACKED_BY_INDEX_FILES) { if (currentTaskid != taskid) { // Set start = current-end logLengths.get(logName)[0] = Long.valueOf(getTaskLogFile(firstTaskid, logName).length()); } // Set current end logLengths.get(logName)[1] = Long.valueOf(getTaskLogFile(firstTaskid, logName).length()); } if (currentTaskid != taskid) { if (currentTaskid != null) { LOG.info("Starting logging for a new task " + taskid + " in the same JVM as that of the first task " + firstTaskid); } currentTaskid = taskid; } writeToIndexFile(firstTaskid, taskid, isCleanup, logLengths); } /** * The filter for userlogs. */ public static enum LogName { /** Log on the stdout of the task. */ STDOUT ("stdout"), /** Log on the stderr of the task. */ STDERR ("stderr"), /** Log on the map-reduce system logs of the task. */ SYSLOG ("syslog"), /** The java profiler information. */ PROFILE ("profile.out"), /** Log the debug script's stdout */ DEBUGOUT ("debugout"); private String prefix; private LogName(String prefix) { this.prefix = prefix; } @Override public String toString() { return prefix; } } /** * Purge old user logs, and clean up half of the logs if the number of logs * exceed the limit. * * @throws IOException */ static void cleanup(int logsRetainHours, int logsNumberLimit ) throws IOException { cleanup(TaskLog.getUserLogDir(), logsRetainHours, logsNumberLimit); } static void cleanup(File logDir, int logsRetainHours, int logsNumberLimit ) throws IOException { //Delete logs of tasks if the number of files exceed fileNumberLimit LOG.info("TaskLog.cleanup: logsRetainHours=" + logsRetainHours + " logsNumberLimit=" + logsNumberLimit); File[] totalLogs = logDir.listFiles(); long purgeTimeStamp = System.currentTimeMillis() - (logsRetainHours*60L*60*1000); if (totalLogs != null) { int numLogs = totalLogs.length; if (totalLogs.length > logsNumberLimit) { // sorting files first according to last modified time. Arrays.sort(totalLogs, new Comparator<File>() { public int compare(File f1, File f2) { return Long.valueOf(f1.lastModified()).compareTo(f2.lastModified()); } }); // deleting half of the oldest logs for (int i=0; i < numLogs / 2; i++) { LOG.info("TaskLog.cleanup deleting " + totalLogs[i]); FileUtil.fullyDelete(totalLogs[i]); } // now, delete the remaining ones that are older than purgeTimeStamp // since the array is sorted, break the loop once we pass the boundary for (int i = numLogs / 2; i < numLogs; i++) { if (totalLogs[i].lastModified() < purgeTimeStamp) { LOG.info("TaskLog.cleanup deleting " + totalLogs[i]); FileUtil.fullyDelete(totalLogs[i]); } else { break; } } } else { // if the number of logs is less than limit, only delete the files that are // older than PurgeTimeStamp for (int i=0; i < numLogs; i++) { if (totalLogs[i].lastModified() < purgeTimeStamp) { LOG.info("TaskLog.cleanup deleting " + totalLogs[i]); FileUtil.fullyDelete(totalLogs[i]); } } } } } static class Reader extends InputStream { private long bytesRemaining; private FileInputStream file ; public Reader(TaskAttemptID taskid, LogName kind, long start, long end) throws IOException { this(taskid, kind, start, end, false); } /** * Read a log file from start to end positions. The offsets may be negative, * in which case they are relative to the end of the file. For example, * Reader(taskid, kind, 0, -1) is the entire file and * Reader(taskid, kind, -4197, -1) is the last 4196 bytes. * @param taskid the id of the task to read the log file for * @param kind the kind of log to read * @param start the offset to read from (negative is relative to tail) * @param end the offset to read upto (negative is relative to tail) * @param isCleanup whether the attempt is cleanup attempt or not * @throws IOException */ public Reader(TaskAttemptID taskid, LogName kind, long start, long end, boolean isCleanup) throws IOException { // find the right log file LogFileDetail fileDetail; File jtLog = null; try { Map<LogName, LogFileDetail> allFilesDetails = getAllLogsFileDetails(taskid, isCleanup); fileDetail = allFilesDetails.get(kind); } catch (IOException e) { jtLog = new File(getJTBaseDir(taskid.toString()), kind.toString()); if (!jtLog.isFile()) { throw e; } fileDetail = new LogFileDetail(); fileDetail.location = taskid.toString(); fileDetail.start = 0; fileDetail.length = jtLog.length(); } // calculate the start and stop long size = fileDetail.length; if (start < 0) { start += size + 1; } if (end < 0) { end += size + 1; } start = Math.max(0, Math.min(start, size)); end = Math.max(0, Math.min(end, size)); start += fileDetail.start; end += fileDetail.start; bytesRemaining = end - start; if (jtLog == null) { file = new FileInputStream(new File(getBaseDir(fileDetail.location), kind.toString())); } else { file = new FileInputStream(jtLog); } // skip upto start long pos = 0; while (pos < start) { long result = file.skip(start - pos); if (result < 0) { bytesRemaining = 0; break; } pos += result; } } @Override public int read() throws IOException { int result = -1; if (bytesRemaining > 0) { bytesRemaining -= 1; result = file.read(); } return result; } @Override public int read(byte[] buffer, int offset, int length) throws IOException { length = (int) Math.min(length, bytesRemaining); int bytes = file.read(buffer, offset, length); if (bytes > 0) { bytesRemaining -= bytes; } return bytes; } @Override public int available() throws IOException { return (int) Math.min(bytesRemaining, file.available()); } @Override public void close() throws IOException { file.close(); } } private static final String bashCommand = "bash"; private static final String tailCommand = "tail"; /** * Get the desired maximum length of task's logs. * @param conf the job to look in * @return the number of bytes to cap the log files at */ public static long getTaskLogLength(JobConf conf) { return conf.getLong("mapred.userlog.limit.kb", 100) * 1024; } /** * Wrap a command in a shell to capture stdout and stderr to files. * If the tailLength is 0, the entire output will be saved. * @param cmd The command and the arguments that should be run * @param stdoutFilename The filename that stdout should be saved to * @param stderrFilename The filename that stderr should be saved to * @param tailLength The length of the tail to be saved. * @return the modified command that should be run */ public static List<String> captureOutAndError(List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength ) throws IOException { return captureOutAndError(null, cmd, stdoutFilename, stderrFilename, tailLength, false); } /** * Wrap a command in a shell to capture stdout and stderr to files. * Setup commands such as setting memory limit can be passed which * will be executed before exec. * If the tailLength is 0, the entire output will be saved. * @param setup The setup commands for the execed process. * @param cmd The command and the arguments that should be run * @param stdoutFilename The filename that stdout should be saved to * @param stderrFilename The filename that stderr should be saved to * @param tailLength The length of the tail to be saved. * @return the modified command that should be run */ public static List<String> captureOutAndError(List<String> setup, List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength ) throws IOException { return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename, tailLength, false); } /** * Wrap a command in a shell to capture stdout and stderr to files. * Setup commands such as setting memory limit can be passed which * will be executed before exec. * If the tailLength is 0, the entire output will be saved. * @param setup The setup commands for the execed process. * @param cmd The command and the arguments that should be run * @param stdoutFilename The filename that stdout should be saved to * @param stderrFilename The filename that stderr should be saved to * @param tailLength The length of the tail to be saved. * @deprecated pidFiles are no more used. Instead pid is exported to * env variable JVM_PID. * @return the modified command that should be run */ @Deprecated public static List<String> captureOutAndError(List<String> setup, List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength, String pidFileName ) throws IOException { return captureOutAndError(setup, cmd, stdoutFilename, stderrFilename, tailLength, false, pidFileName); } /** * Wrap a command in a shell to capture stdout and stderr to files. * Setup commands such as setting memory limit can be passed which * will be executed before exec. * If the tailLength is 0, the entire output will be saved. * @param setup The setup commands for the execed process. * @param cmd The command and the arguments that should be run * @param stdoutFilename The filename that stdout should be saved to * @param stderrFilename The filename that stderr should be saved to * @param tailLength The length of the tail to be saved. * @param useSetsid Should setsid be used in the command or not. * @deprecated pidFiles are no more used. Instead pid is exported to * env variable JVM_PID. * @return the modified command that should be run * */ @Deprecated public static List<String> captureOutAndError(List<String> setup, List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength, boolean useSetsid, String pidFileName ) throws IOException { return captureOutAndError(setup,cmd, stdoutFilename, stderrFilename, tailLength, useSetsid); } /** * Wrap a command in a shell to capture stdout and stderr to files. * Setup commands such as setting memory limit can be passed which * will be executed before exec. * If the tailLength is 0, the entire output will be saved. * @param setup The setup commands for the execed process. * @param cmd The command and the arguments that should be run * @param stdoutFilename The filename that stdout should be saved to * @param stderrFilename The filename that stderr should be saved to * @param tailLength The length of the tail to be saved. * @param useSetsid Should setsid be used in the command or not. * @return the modified command that should be run */ public static List<String> captureOutAndError(List<String> setup, List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength, boolean useSetsid ) throws IOException { List<String> result = new ArrayList<String>(3); result.add(bashCommand); result.add("-c"); String mergedCmd = buildCommandLine(setup, cmd, stdoutFilename, stderrFilename, tailLength, useSetsid); result.add(mergedCmd.toString()); return result; } static String buildCommandLine(List<String> setup, List<String> cmd, File stdoutFilename, File stderrFilename, long tailLength, boolean useSetSid) throws IOException { String stdout = FileUtil.makeShellPath(stdoutFilename); String stderr = FileUtil.makeShellPath(stderrFilename); StringBuffer mergedCmd = new StringBuffer(); if (!Shell.WINDOWS) { mergedCmd.append(" export JVM_PID=`echo $$` ; "); } if (setup != null && setup.size() > 0) { mergedCmd.append(addCommand(setup, false)); mergedCmd.append(";"); } if (tailLength > 0) { mergedCmd.append("("); } else if (ProcessTree.isSetsidAvailable && useSetSid && !Shell.WINDOWS) { mergedCmd.append("exec setsid "); } else { mergedCmd.append("exec "); } mergedCmd.append(addCommand(cmd, true)); mergedCmd.append(" < /dev/null "); if (tailLength > 0) { mergedCmd.append(" | "); mergedCmd.append(tailCommand); mergedCmd.append(" -c "); mergedCmd.append(tailLength); mergedCmd.append(" >> "); mergedCmd.append(stdout); mergedCmd.append(" ; exit $PIPESTATUS ) 2>&1 | "); mergedCmd.append(tailCommand); mergedCmd.append(" -c "); mergedCmd.append(tailLength); mergedCmd.append(" >> "); mergedCmd.append(stderr); mergedCmd.append(" ; exit $PIPESTATUS"); } else { mergedCmd.append(" 1>> "); mergedCmd.append(stdout); mergedCmd.append(" 2>> "); mergedCmd.append(stderr); } return mergedCmd.toString(); } /** * Add quotes to each of the command strings and * return as a single string * @param cmd The command to be quoted * @param isExecutable makes shell path if the first * argument is executable * @return returns The quoted string. * @throws IOException */ public static String addCommand(List<String> cmd, boolean isExecutable) throws IOException { StringBuffer command = new StringBuffer(); for(String s: cmd) { command.append('\''); if (isExecutable) { // the executable name needs to be expressed as a shell path for the // shell to find it. command.append(FileUtil.makeShellPath(new File(s))); isExecutable = false; } else { command.append(s); } command.append('\''); command.append(" "); } return command.toString(); } /** * Wrap a command in a shell to capture debug script's * stdout and stderr to debugout. * @param cmd The command and the arguments that should be run * @param debugoutFilename The filename that stdout and stderr * should be saved to. * @return the modified command that should be run * @throws IOException */ public static List<String> captureDebugOut(List<String> cmd, File debugoutFilename ) throws IOException { String debugout = FileUtil.makeShellPath(debugoutFilename); List<String> result = new ArrayList<String>(3); result.add(bashCommand); result.add("-c"); StringBuffer mergedCmd = new StringBuffer(); mergedCmd.append("exec "); boolean isExecutable = true; for(String s: cmd) { if (isExecutable) { // the executable name needs to be expressed as a shell path for the // shell to find it. mergedCmd.append(FileUtil.makeShellPath(new File(s))); isExecutable = false; } else { mergedCmd.append(s); } mergedCmd.append(" "); } mergedCmd.append(" < /dev/null "); mergedCmd.append(" >"); mergedCmd.append(debugout); mergedCmd.append(" 2>&1 "); result.add(mergedCmd.toString()); return result; } public static File getUserLogDir() { return LOG_DIR; } } // TaskLog