/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapred.TaskLog; import org.apache.hadoop.mapred.TaskLog.LogName; import org.apache.hadoop.mapred.TaskLog.LogFileDetail; import org.apache.hadoop.util.StringUtils; class TaskLogsMonitor extends Thread { static final Log LOG = LogFactory.getLog(TaskLogsMonitor.class); long mapRetainSize, reduceRetainSize; public TaskLogsMonitor(long mapRetSize, long reduceRetSize) { mapRetainSize = mapRetSize; reduceRetainSize = reduceRetSize; LOG.info("Starting logs' monitor with mapRetainSize=" + mapRetainSize + " and reduceRetainSize=" + reduceRetSize); } /** * The list of tasks that have finished and so need their logs to be * truncated. */ private Map<TaskAttemptID, PerJVMInfo> finishedJVMs = new HashMap<TaskAttemptID, PerJVMInfo>(); private static final int DEFAULT_BUFFER_SIZE = 4 * 1024; static final int MINIMUM_RETAIN_SIZE_FOR_TRUNCATION = 0; private static class PerJVMInfo { List<Task> allAttempts; public PerJVMInfo(List<Task> allAtmpts) { this.allAttempts = allAtmpts; } } /** * Process(JVM/debug script) has finished. Asynchronously truncate the logs of * all the corresponding tasks to the configured limit. In case of JVM, both * the firstAttempt as well as the list of all attempts that ran in the same * JVM have to be passed. For debug script, the (only) attempt itself should * be passed as both the firstAttempt as well as the list of attempts. * * @param firstAttempt * @param isTaskCleanup */ void addProcessForLogTruncation(TaskAttemptID firstAttempt, List<Task> allAttempts) { LOG.info("Adding the jvm with first-attempt " + firstAttempt + " for logs' truncation"); PerJVMInfo lInfo = new PerJVMInfo(allAttempts); synchronized (finishedJVMs) { finishedJVMs.put(firstAttempt, lInfo); finishedJVMs.notify(); } } /** * Process the removed task's logs. This involves truncating them to * retainSize. */ void truncateLogs(TaskAttemptID firstAttempt, PerJVMInfo lInfo) { // Read the log-file details for all the attempts that ran in this JVM Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails; try { taskLogFileDetails = getAllLogsFileDetails(lInfo.allAttempts); } catch (IOException e) { LOG.warn( "Exception in truncateLogs while getting allLogsFileDetails()." + " Ignoring the truncation of logs of this process.", e); return; } Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails = new HashMap<Task, Map<LogName, LogFileDetail>>(); File attemptLogDir = TaskLog.getBaseDir(firstAttempt.toString()); FileWriter tmpFileWriter; FileReader logFileReader; // Now truncate file by file logNameLoop: for (LogName logName : LogName.values()) { File logFile = TaskLog.getTaskLogFile(firstAttempt, logName); if (!logFile.exists()) { continue; } // //// Optimization: if no task is over limit, just skip truncation-code if (logFile.exists() && !isTruncationNeeded(lInfo, taskLogFileDetails, logName)) { LOG.debug("Truncation is not needed for " + logFile.getAbsolutePath()); continue; } // //// End of optimization // Truncation is needed for this log-file. Go ahead now. File tmpFile = new File(attemptLogDir, "truncate.tmp"); try { tmpFileWriter = new FileWriter(tmpFile); } catch (IOException ioe) { LOG.warn("Cannot open " + tmpFile.getAbsolutePath() + " for writing truncated log-file " + logFile.getAbsolutePath() + ". Continuing with other log files. ", ioe); continue; } try { logFileReader = new FileReader(logFile); } catch (FileNotFoundException fe) { LOG.warn("Cannot open " + logFile.getAbsolutePath() + " for reading. Continuing with other log files"); if (!tmpFile.delete()) { LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath()); } continue; } long newCurrentOffset = 0; // Process each attempt from the ordered list passed. for (Task task : lInfo.allAttempts) { // Truncate the log files of this task-attempt so that only the last // retainSize many bytes of this log file is retained and the log // file is reduced in size saving disk space. long retainSize = (task.isMapTask() ? mapRetainSize : reduceRetainSize); LogFileDetail newLogFileDetail = new LogFileDetail(); try { newLogFileDetail = truncateALogFileOfAnAttempt(task.getTaskID(), taskLogFileDetails.get(task).get(logName), retainSize, tmpFileWriter, logFileReader); } catch (IOException ioe) { LOG.warn("Cannot truncate the log file " + logFile.getAbsolutePath() + ". Caught exception while handling " + task.getTaskID(), ioe); // revert back updatedTaskLogFileDetails revertIndexFileInfo(lInfo, taskLogFileDetails, updatedTaskLogFileDetails, logName); if (!tmpFile.delete()) { LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath()); } continue logNameLoop; } // Track information for updating the index file properly. // Index files don't track DEBUGOUT and PROFILE logs, so skip'em. if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) { if (!updatedTaskLogFileDetails.containsKey(task)) { updatedTaskLogFileDetails.put(task, new HashMap<LogName, LogFileDetail>()); } // newLogFileDetail already has the location and length set, just // set the start offset now. newLogFileDetail.start = newCurrentOffset; updatedTaskLogFileDetails.get(task).put(logName, newLogFileDetail); newCurrentOffset += newLogFileDetail.length; } } try { tmpFileWriter.close(); } catch (IOException ioe) { LOG.warn("Couldn't close the tmp file " + tmpFile.getAbsolutePath() + ". Deleting it.", ioe); revertIndexFileInfo(lInfo, taskLogFileDetails, updatedTaskLogFileDetails, logName); if (!tmpFile.delete()) { LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath()); } continue; } if (!tmpFile.renameTo(logFile)) { // If the tmpFile cannot be renamed revert back // updatedTaskLogFileDetails to maintain the consistency of the // original log file revertIndexFileInfo(lInfo, taskLogFileDetails, updatedTaskLogFileDetails, logName); if (!tmpFile.delete()) { LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath()); } } } // Update the index files updateIndicesAfterLogTruncation(firstAttempt, updatedTaskLogFileDetails); } /** * @param lInfo * @param taskLogFileDetails * @param updatedTaskLogFileDetails * @param logName */ private void revertIndexFileInfo(PerJVMInfo lInfo, Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails, Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails, LogName logName) { if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) { for (Task task : lInfo.allAttempts) { if (!updatedTaskLogFileDetails.containsKey(task)) { updatedTaskLogFileDetails.put(task, new HashMap<LogName, LogFileDetail>()); } updatedTaskLogFileDetails.get(task).put(logName, taskLogFileDetails.get(task).get(logName)); } } } /** * Get the logFileDetails of all the list of attempts passed. * * @param lInfo * @return a map of task to the log-file detail * @throws IOException */ private Map<Task, Map<LogName, LogFileDetail>> getAllLogsFileDetails( final List<Task> allAttempts) throws IOException { Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails = new HashMap<Task, Map<LogName, LogFileDetail>>(); for (Task task : allAttempts) { Map<LogName, LogFileDetail> allLogsFileDetails; allLogsFileDetails = TaskLog.getAllLogsFileDetails(task.getTaskID(), task.isTaskCleanupTask()); taskLogFileDetails.put(task, allLogsFileDetails); } return taskLogFileDetails; } /** * Check if truncation of logs is needed for the given jvmInfo. If all the * tasks that ran in a JVM are within the log-limits, then truncation is not * needed. Otherwise it is needed. * * @param lInfo * @param taskLogFileDetails * @param logName * @return true if truncation is needed, false otherwise */ private boolean isTruncationNeeded(PerJVMInfo lInfo, Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails, LogName logName) { boolean truncationNeeded = false; LogFileDetail logFileDetail = null; for (Task task : lInfo.allAttempts) { long taskRetainSize = (task.isMapTask() ? mapRetainSize : reduceRetainSize); Map<LogName, LogFileDetail> allLogsFileDetails = taskLogFileDetails.get(task); logFileDetail = allLogsFileDetails.get(logName); if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION && logFileDetail.length > taskRetainSize) { truncationNeeded = true; break; } } return truncationNeeded; } /** * Truncate the log file of this task-attempt so that only the last retainSize * many bytes of each log file is retained and the log file is reduced in size * saving disk space. * * @param taskID Task whose logs need to be truncated * @param oldLogFileDetail contains the original log details for the attempt * @param taskRetainSize retain-size * @param tmpFileWriter New log file to write to. Already opened in append * mode. * @param logFileReader Original log file to read from. * @return * @throws IOException */ private LogFileDetail truncateALogFileOfAnAttempt( final TaskAttemptID taskID, final LogFileDetail oldLogFileDetail, final long taskRetainSize, final FileWriter tmpFileWriter, final FileReader logFileReader) throws IOException { LogFileDetail newLogFileDetail = new LogFileDetail(); // ///////////// Truncate log file /////////////////////// // New location of log file is same as the old newLogFileDetail.location = oldLogFileDetail.location; if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION && oldLogFileDetail.length > taskRetainSize) { LOG.info("Truncating logs for " + taskID + " from " + oldLogFileDetail.length + "bytes to " + taskRetainSize + "bytes."); newLogFileDetail.length = taskRetainSize; } else { LOG.info("No truncation needed for " + taskID + " length is " + oldLogFileDetail.length + " retain size " + taskRetainSize + "bytes."); newLogFileDetail.length = oldLogFileDetail.length; } long charsSkipped = logFileReader.skip(oldLogFileDetail.length - newLogFileDetail.length); if (charsSkipped != oldLogFileDetail.length - newLogFileDetail.length) { throw new IOException("Erroneously skipped " + charsSkipped + " instead of the expected " + (oldLogFileDetail.length - newLogFileDetail.length)); } long alreadyRead = 0; while (alreadyRead < newLogFileDetail.length) { char tmpBuf[]; // Temporary buffer to read logs if (newLogFileDetail.length - alreadyRead >= DEFAULT_BUFFER_SIZE) { tmpBuf = new char[DEFAULT_BUFFER_SIZE]; } else { tmpBuf = new char[(int) (newLogFileDetail.length - alreadyRead)]; } int bytesRead = logFileReader.read(tmpBuf); if (bytesRead < 0) { break; } else { alreadyRead += bytesRead; } tmpFileWriter.write(tmpBuf); } // ////// End of truncating log file /////////////////////// return newLogFileDetail; } /** * Truncation of logs is done. Now sync the index files to reflect the * truncated sizes. * * @param firstAttempt * @param updatedTaskLogFileDetails */ private void updateIndicesAfterLogTruncation(TaskAttemptID firstAttempt, Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails) { for (Entry<Task, Map<LogName, LogFileDetail>> entry : updatedTaskLogFileDetails.entrySet()) { Task task = entry.getKey(); Map<LogName, LogFileDetail> logFileDetails = entry.getValue(); Map<LogName, Long[]> logLengths = new HashMap<LogName, Long[]>(); // set current and previous lengths for (LogName logName : TaskLog.LOGS_TRACKED_BY_INDEX_FILES) { logLengths.put(logName, new Long[] { Long.valueOf(0L), Long.valueOf(0L) }); LogFileDetail lfd = logFileDetails.get(logName); if (lfd != null) { // Set previous lengths logLengths.get(logName)[0] = Long.valueOf(lfd.start); // Set current lengths logLengths.get(logName)[1] = Long.valueOf(lfd.start + lfd.length); } } try { TaskLog.writeToIndexFile(firstAttempt, task.getTaskID(), task.isTaskCleanupTask(), logLengths); } catch (IOException ioe) { LOG.warn("Exception in updateIndicesAfterLogTruncation : " + StringUtils.stringifyException(ioe)); LOG.warn("Exception encountered while updating index file of task " + task.getTaskID() + ". Ignoring and continuing with other tasks."); } } } /** * * @throws IOException */ void monitorTaskLogs() throws IOException { Map<TaskAttemptID, PerJVMInfo> tasksBeingTruncated = new HashMap<TaskAttemptID, PerJVMInfo>(); // Start monitoring newly added finishedJVMs synchronized (finishedJVMs) { tasksBeingTruncated.clear(); tasksBeingTruncated.putAll(finishedJVMs); finishedJVMs.clear(); } for (Entry<TaskAttemptID, PerJVMInfo> entry : tasksBeingTruncated.entrySet()) { truncateLogs(entry.getKey(), entry.getValue()); } } @Override public void run() { while (true) { try { monitorTaskLogs(); try { synchronized (finishedJVMs) { while (finishedJVMs.isEmpty()) { finishedJVMs.wait(); } } } catch (InterruptedException e) { LOG.warn(getName() + " is interrupted. Returning"); return; } } catch (Throwable e) { LOG.warn(getName() + " encountered an exception while monitoring : " + StringUtils.stringifyException(e)); LOG.info("Ingoring the exception and continuing monitoring."); } } } }