/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SecureIOUtils;
import org.apache.hadoop.mapred.TaskLog;
import org.apache.hadoop.mapred.TaskLog.LogName;
import org.apache.hadoop.mapred.TaskLog.LogFileDetail;
import org.apache.hadoop.mapreduce.server.tasktracker.JVMInfo;
import org.apache.hadoop.mapreduce.server.tasktracker.userlogs.UserLogManager;
/**
* The class for truncating the user logs.
* Should be used only by {@link UserLogManager}.
*
*/
public class TaskLogsTruncater {
static final Log LOG = LogFactory.getLog(TaskLogsTruncater.class);
static final String MAP_USERLOG_RETAIN_SIZE =
"mapreduce.cluster.map.userlog.retain-size";
static final String REDUCE_USERLOG_RETAIN_SIZE =
"mapreduce.cluster.reduce.userlog.retain-size";
static final int DEFAULT_RETAIN_SIZE = -1;
static final String TRUNCATED_MSG =
"[ ... this log file was truncated because of excess length]\n";
long mapRetainSize, reduceRetainSize;
public TaskLogsTruncater(Configuration conf) {
mapRetainSize = conf.getLong(MAP_USERLOG_RETAIN_SIZE, DEFAULT_RETAIN_SIZE);
reduceRetainSize = conf.getLong(REDUCE_USERLOG_RETAIN_SIZE,
DEFAULT_RETAIN_SIZE);
LOG.info("Initializing logs' truncater with mapRetainSize=" + mapRetainSize
+ " and reduceRetainSize=" + reduceRetainSize);
}
private static final int DEFAULT_BUFFER_SIZE = 4 * 1024;
static final int MINIMUM_RETAIN_SIZE_FOR_TRUNCATION = 0;
/**
* Check the log file sizes generated by the attempts that ran in a
* particular JVM
* @param lInfo
* @return is truncation required?
* @throws IOException
*/
public boolean shouldTruncateLogs(JVMInfo lInfo) throws IOException {
// Read the log-file details for all the attempts that ran in this JVM
Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails;
try {
taskLogFileDetails = getAllLogsFileDetails(lInfo.getAllAttempts());
} catch (IOException e) {
LOG.warn(
"Exception in truncateLogs while getting allLogsFileDetails()."
+ " Ignoring the truncation of logs of this process.", e);
return false;
}
File attemptLogDir = lInfo.getLogLocation();
for (LogName logName : LogName.values()) {
File logFile = new File(attemptLogDir, logName.toString());
if (logFile.exists()) {
if(!isTruncationNeeded(lInfo, taskLogFileDetails, logName)) {
LOG.debug("Truncation is not needed for "
+ logFile.getAbsolutePath());
} else return true;
}
}
return false;
}
/**
* Process the removed task's logs. This involves truncating them to
* retainSize.
*/
public void truncateLogs(JVMInfo lInfo) {
Task firstAttempt = lInfo.getAllAttempts().get(0);
String owner;
try {
owner = TaskLog.obtainLogDirOwner(firstAttempt.getTaskID());
} catch (IOException ioe) {
LOG.error("Unable to create a secure IO context to truncate logs for " +
firstAttempt, ioe);
return;
}
// Read the log-file details for all the attempts that ran in this JVM
Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails;
try {
taskLogFileDetails = getAllLogsFileDetails(lInfo.getAllAttempts());
} catch (IOException e) {
LOG.warn(
"Exception in truncateLogs while getting allLogsFileDetails()."
+ " Ignoring the truncation of logs of this process.", e);
return;
}
// set this boolean to true if any of the log files is truncated
boolean indexModified = false;
Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails =
new HashMap<Task, Map<LogName, LogFileDetail>>();
// Make a copy of original indices into updated indices
for (LogName logName : LogName.values()) {
copyOriginalIndexFileInfo(lInfo, taskLogFileDetails,
updatedTaskLogFileDetails, logName);
}
File attemptLogDir = lInfo.getLogLocation();
FileOutputStream tmpFileOutputStream;
FileInputStream logFileInputStream;
// Now truncate file by file
logNameLoop: for (LogName logName : LogName.values()) {
File logFile = new File(attemptLogDir, logName.toString());
// //// Optimization: if no task is over limit, just skip truncation-code
if (logFile.exists()
&& !isTruncationNeeded(lInfo, taskLogFileDetails, logName)) {
LOG.debug("Truncation is not needed for "
+ logFile.getAbsolutePath());
continue;
}
// //// End of optimization
// Truncation is needed for this log-file. Go ahead now.
// ////// Open truncate.tmp file for writing //////
File tmpFile = new File(attemptLogDir, "truncate.tmp");
try {
tmpFileOutputStream = SecureIOUtils.createForWrite(tmpFile, 0644);
} catch (IOException ioe) {
LOG.warn("Cannot open " + tmpFile.getAbsolutePath()
+ " for writing truncated log-file "
+ logFile.getAbsolutePath()
+ ". Continuing with other log files. ", ioe);
continue;
}
// ////// End of opening truncate.tmp file //////
// ////// Open logFile for reading //////
try {
logFileInputStream = SecureIOUtils.openForRead(logFile, owner);
} catch (IOException ioe) {
if (LOG.isDebugEnabled()) {
LOG.debug("Cannot open " + logFile.getAbsolutePath()
+ " for reading. Continuing with other log files", ioe);
}
try {
tmpFileOutputStream.close();
} catch (IOException e) {
LOG.warn("Cannot close tmpFileOutputStream for "
+ tmpFile.getAbsolutePath(), e);
}
if (!tmpFile.delete()) {
LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
}
continue;
}
// ////// End of opening logFile for reading //////
long newCurrentOffset = 0;
// Process each attempt from the ordered list passed.
for (Task task : lInfo.getAllAttempts()) {
// Truncate the log files of this task-attempt so that only the last
// retainSize many bytes of this log file is retained and the log
// file is reduced in size saving disk space.
long retainSize =
(task.isMapTask() ? mapRetainSize : reduceRetainSize);
LogFileDetail newLogFileDetail = null;
try {
newLogFileDetail =
truncateALogFileOfAnAttempt(task.getTaskID(),
taskLogFileDetails.get(task).get(logName), retainSize,
tmpFileOutputStream, logFileInputStream, logName);
} catch (IOException ioe) {
LOG.warn("Cannot truncate the log file "
+ logFile.getAbsolutePath()
+ ". Caught exception while handling " + task.getTaskID(),
ioe);
// revert back updatedTaskLogFileDetails
copyOriginalIndexFileInfo(lInfo, taskLogFileDetails,
updatedTaskLogFileDetails, logName);
try {
logFileInputStream.close();
} catch (IOException e) {
LOG.warn("Cannot close logFileInputStream for "
+ logFile.getAbsolutePath(), e);
}
try {
tmpFileOutputStream.close();
} catch (IOException e) {
LOG.warn("Cannot close tmpFileOutputStream for "
+ tmpFile.getAbsolutePath(), e);
}
if (!tmpFile.delete()) {
LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
}
continue logNameLoop;
}
// Track information for updating the index file properly.
// Index files don't track DEBUGOUT and PROFILE logs, so skip'em.
if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) {
if (!updatedTaskLogFileDetails.containsKey(task)) {
updatedTaskLogFileDetails.put(task,
new HashMap<LogName, LogFileDetail>());
}
// newLogFileDetail already has the location and length set, just
// set the start offset now.
newLogFileDetail.start = newCurrentOffset;
updatedTaskLogFileDetails.get(task).put(logName, newLogFileDetail);
newCurrentOffset += newLogFileDetail.length;
indexModified = true; // set the flag
}
}
// ////// Close the file streams ////////////
try {
tmpFileOutputStream.close();
} catch (IOException ioe) {
LOG.warn("Couldn't close the tmp file " + tmpFile.getAbsolutePath()
+ ". Deleting it.", ioe);
copyOriginalIndexFileInfo(lInfo, taskLogFileDetails,
updatedTaskLogFileDetails, logName);
if (!tmpFile.delete()) {
LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
}
continue;
} finally {
try {
logFileInputStream.close();
} catch (IOException e) {
LOG.warn("Cannot close logFileInputStream for "
+ logFile.getAbsolutePath(), e);
}
}
// ////// End of closing the file streams ////////////
// ////// Commit the changes from tmp file to the logFile ////////////
if (!tmpFile.renameTo(logFile)) {
// If the tmpFile cannot be renamed revert back
// updatedTaskLogFileDetails to maintain the consistency of the
// original log file
copyOriginalIndexFileInfo(lInfo, taskLogFileDetails,
updatedTaskLogFileDetails, logName);
if (!tmpFile.delete()) {
LOG.warn("Cannot delete tmpFile " + tmpFile.getAbsolutePath());
}
}
// ////// End of committing the changes to the logFile ////////////
}
if (indexModified) {
// Update the index files
updateIndicesAfterLogTruncation(attemptLogDir.toString(),
updatedTaskLogFileDetails);
}
}
/**
* @param lInfo
* @param taskLogFileDetails
* @param updatedTaskLogFileDetails
* @param logName
*/
private void copyOriginalIndexFileInfo(JVMInfo lInfo,
Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails,
Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails,
LogName logName) {
if (TaskLog.LOGS_TRACKED_BY_INDEX_FILES.contains(logName)) {
for (Task task : lInfo.getAllAttempts()) {
if (!updatedTaskLogFileDetails.containsKey(task)) {
updatedTaskLogFileDetails.put(task,
new HashMap<LogName, LogFileDetail>());
}
updatedTaskLogFileDetails.get(task).put(logName,
taskLogFileDetails.get(task).get(logName));
}
}
}
/**
* Get the logFileDetails of all the list of attempts passed.
* @param allAttempts the attempts we are interested in
*
* @return a map of task to the log-file detail
* @throws IOException
*/
private Map<Task, Map<LogName, LogFileDetail>> getAllLogsFileDetails(
final List<Task> allAttempts) throws IOException {
Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails =
new HashMap<Task, Map<LogName, LogFileDetail>>();
for (Task task : allAttempts) {
Map<LogName, LogFileDetail> allLogsFileDetails;
allLogsFileDetails =
TaskLog.getAllLogsFileDetails(task.getTaskID(), task.isTaskCleanupTask());
taskLogFileDetails.put(task, allLogsFileDetails);
}
return taskLogFileDetails;
}
/**
* Check if truncation of logs is needed for the given jvmInfo. If all the
* tasks that ran in a JVM are within the log-limits, then truncation is not
* needed. Otherwise it is needed.
*
* @param lInfo
* @param taskLogFileDetails
* @param logName
* @return true if truncation is needed, false otherwise
*/
private boolean isTruncationNeeded(JVMInfo lInfo,
Map<Task, Map<LogName, LogFileDetail>> taskLogFileDetails,
LogName logName) {
boolean truncationNeeded = false;
LogFileDetail logFileDetail = null;
for (Task task : lInfo.getAllAttempts()) {
long taskRetainSize =
(task.isMapTask() ? mapRetainSize : reduceRetainSize);
Map<LogName, LogFileDetail> allLogsFileDetails =
taskLogFileDetails.get(task);
logFileDetail = allLogsFileDetails.get(logName);
if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION
&& logFileDetail.length > taskRetainSize) {
truncationNeeded = true;
break;
}
}
return truncationNeeded;
}
/**
* Truncate the log file of this task-attempt so that only the last retainSize
* many bytes of each log file is retained and the log file is reduced in size
* saving disk space.
*
* @param taskID Task whose logs need to be truncated
* @param oldLogFileDetail contains the original log details for the attempt
* @param taskRetainSize retain-size
* @param tmpFileOutputStream New log file to write to. Already opened in append
* mode.
* @param logFileInputStream Original log file to read from.
* @return
* @throws IOException
*/
private LogFileDetail truncateALogFileOfAnAttempt(
final TaskAttemptID taskID, final LogFileDetail oldLogFileDetail,
final long taskRetainSize,
final FileOutputStream tmpFileOutputStream,
final FileInputStream logFileInputStream, final LogName logName)
throws IOException {
LogFileDetail newLogFileDetail = new LogFileDetail();
long logSize = 0;
// ///////////// Truncate log file ///////////////////////
// New location of log file is same as the old
newLogFileDetail.location = oldLogFileDetail.location;
if (taskRetainSize > MINIMUM_RETAIN_SIZE_FOR_TRUNCATION
&& oldLogFileDetail.length > taskRetainSize) {
LOG.info("Truncating " + logName + " logs for " + taskID + " from "
+ oldLogFileDetail.length + "bytes to " + taskRetainSize
+ "bytes.");
logSize = taskRetainSize;
byte[] truncatedMsgBytes = TRUNCATED_MSG.getBytes();
tmpFileOutputStream.write(truncatedMsgBytes);
newLogFileDetail.length += truncatedMsgBytes.length;
} else {
LOG.debug("No truncation needed for " + logName + " logs for " + taskID
+ " length is " + oldLogFileDetail.length + " retain size "
+ taskRetainSize + "bytes.");
logSize = oldLogFileDetail.length;
}
long bytesSkipped =
logFileInputStream.skip(oldLogFileDetail.length
- logSize);
if (bytesSkipped != oldLogFileDetail.length - logSize) {
throw new IOException("Erroneously skipped " + bytesSkipped
+ " instead of the expected "
+ (oldLogFileDetail.length - logSize)
+ " while truncating " + logName + " logs for " + taskID );
}
long alreadyRead = 0;
while (alreadyRead < logSize) {
byte tmpBuf[]; // Temporary buffer to read logs
if (logSize - alreadyRead >= DEFAULT_BUFFER_SIZE) {
tmpBuf = new byte[DEFAULT_BUFFER_SIZE];
} else {
tmpBuf = new byte[(int) (logSize - alreadyRead)];
}
int bytesRead = logFileInputStream.read(tmpBuf);
if (bytesRead < 0) {
break;
} else {
alreadyRead += bytesRead;
}
tmpFileOutputStream.write(tmpBuf);
}
newLogFileDetail.length += logSize;
// ////// End of truncating log file ///////////////////////
return newLogFileDetail;
}
/**
* Truncation of logs is done. Now sync the index files to reflect the
* truncated sizes.
*
* @param firstAttempt
* @param updatedTaskLogFileDetails
*/
private void updateIndicesAfterLogTruncation(String location,
Map<Task, Map<LogName, LogFileDetail>> updatedTaskLogFileDetails) {
for (Entry<Task, Map<LogName, LogFileDetail>> entry :
updatedTaskLogFileDetails.entrySet()) {
Task task = entry.getKey();
Map<LogName, LogFileDetail> logFileDetails = entry.getValue();
Map<LogName, Long[]> logLengths = new HashMap<LogName, Long[]>();
// set current and previous lengths
for (LogName logName : TaskLog.LOGS_TRACKED_BY_INDEX_FILES) {
logLengths.put(logName, new Long[] { Long.valueOf(0L),
Long.valueOf(0L) });
LogFileDetail lfd = logFileDetails.get(logName);
if (lfd != null) {
// Set previous lengths
logLengths.get(logName)[0] = Long.valueOf(lfd.start);
// Set current lengths
logLengths.get(logName)[1] = Long.valueOf(lfd.start + lfd.length);
}
}
try {
TaskLog.writeToIndexFile(location, task.getTaskID(),
task.isTaskCleanupTask(), logLengths);
} catch (IOException ioe) {
LOG.warn("Exception encountered while updating index file of task "
+ task.getTaskID()
+ ". Ignoring and continuing with other tasks.", ioe);
}
}
}
public static void main(String args[]) throws IOException {
String taskRanFile = args[0];
Configuration conf = new Configuration();
//read the Task objects from the file
LocalFileSystem lfs = FileSystem.getLocal(conf);
FSDataInputStream din = lfs.open(new Path(taskRanFile));
int numTasksRan = din.readInt();
List<Task> taskAttemptsRan = new ArrayList<Task>();
for (int i = 0; i < numTasksRan; i++) {
Task t;
if (din.readBoolean()) {
t = new MapTask();
} else {
t = new ReduceTask();
}
t.readFields(din);
taskAttemptsRan.add(t);
}
Task firstTask = taskAttemptsRan.get(0);
TaskLogsTruncater trunc = new TaskLogsTruncater(conf);
trunc.truncateLogs(new JVMInfo(
TaskLog.getAttemptDir(firstTask.getTaskID(),
firstTask.isTaskCleanupTask()),
taskAttemptsRan));
System.exit(0);
}
}