/*******************************************************************************
* Copyright (c) 2010 Arapiki Solutions Inc.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* "Peter Smith <psmith@arapiki.com>" - initial API and
* implementation and/or initial documentation
*******************************************************************************/
package com.buildml.scanner.legacy;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.zip.GZIPInputStream;
import com.buildml.model.IActionMgr;
import com.buildml.model.IActionMgr.OperationType;
import com.buildml.model.IBuildStore;
import com.buildml.model.IFileMgr;
import com.buildml.model.IFileMgr.PathType;
import com.buildml.scanner.FatalBuildScannerError;
import com.buildml.utils.errors.ErrorCode;
import com.buildml.utils.string.ShellCommandUtils;
/**
* This class parses the output from a CFS (capture file system)
* trace file (by default, "cfs.trace") and creates a corresponding BuildStore.
* Typically this class would only be instantiated by the LegacyBuildScanner class.
*
* @author "Peter Smith <psmith@arapiki.com>"
*/
/* package */ class TraceFileScanner {
/*
* Important note: the content of this file must be kept in sync with the
* interposer functions in CFS. If any changes are made to the data being
* stored in the trace buffer, the follow methods must also be updated.
*/
/*=====================================================================================*
* TYPES/FIELDS
*=====================================================================================*/
/*
* Each entry in the trace buffer has a corresponding tag to state what
* operation is being traced. See trace_file_format.h (in CaptureFS)
* for details.
*/
/** The end of file has been reached. Note this tag isn't actually stored in the trace file. */
private static final int TRACE_FILE_EOF = -1;
/** CFS is registering the existence of a source file. */
private static final int TRACE_FILE_REGISTER = 1;
/** A file has been opened for write. */
private static final int TRACE_FILE_WRITE = 2;
/** A file has been opened for read. */
private static final int TRACE_FILE_READ = 3;
/** A file has been opened for update. */
private static final int TRACE_FILE_MODIFY = 4;
/** A file has been deleted. */
private static final int TRACE_FILE_DELETE = 5;
/** A file has been renamed. */
private static final int TRACE_FILE_RENAME = 6;
/** A new symlink has been created. */
private static final int TRACE_FILE_NEW_LINK = 7;
/** A new program has been executed. */
private static final int TRACE_FILE_NEW_PROGRAM = 8;
/** A directory has been opened for write. */
private static final int TRACE_DIR_WRITE = 9;
/** A directory has been opened for read. */
private static final int TRACE_DIR_READ = 10;
/** A directory has been opened for update. */
private static final int TRACE_DIR_MODIFY = 11;
/** A directory has been deleted. */
private static final int TRACE_DIR_DELETE = 12;
/**
* When reading data from the trace file, the amount of data we should read each time.
*/
private static final int readBufferMax = 65536;
/** The input stream for reading the trace file. */
private InputStream inputStream;
/** Tracks the current position within the input stream */
private int traceFilePos;
/** The BuildStore we should add trace file information to (null = don't add to BuildStore). */
private IBuildStore buildStore;
/** The ActionMgr object contained within our BuildStore (null = don't add to BuildStore). */
private IActionMgr actionMgr;
/** The FileMgr object contained within our BuildStore (null = don't add to BuildStore). */
private IFileMgr fileMgr;
/** The PrintStream to write debug information to (null = don't write debug information). */
private PrintStream debugStream;
/** The amount of debug output to provide (0, 1 or 2). */
private int debugLevel;
/**
* An in-memory buffer of bytes read from the trace file. This will
* be at most readBufferMax bytes in size.
*/
private byte[] readBuffer;
/** The index of the next byte within readBuffer to be processed. */
private int bufferOffset = 0;
/** The number of bytes still to be processed from readBuffer. */
private int bytesRemaining = 0;
/**
* Mapping between the process numbers that CFS (and the trace file) provides us with
* to the "action ID" numbers that BuildStore uses. As we encounter new processes, and
* add them to the BuildStore, we'll be allocated corresponding build action IDs.
*/
private HashMap<Integer, Integer> processToActionMap = null;
/*=====================================================================================*
* CONSTRUCTORS
*=====================================================================================*/
/**
* Instantiate a new TraceFileScanner object. The trace file is opened, ready
* to have trace data read from it.
*
* @param fileName Name of the trace file to read.
* @param buildStore The BuildStore to add the trace file information to (possibly null).
* @param debugStream The PrintStream to write debug information to (possibly null).
* @param debugLevel The amount of debug information desired (0, 1 or 2).
* @throws IOException If opening the file fails.
*/
/* package */ TraceFileScanner(
String fileName,
IBuildStore buildStore,
PrintStream debugStream,
int debugLevel) throws IOException {
/* save the BuildStore and PrintStream, so that other methods can use them */
this.buildStore = buildStore;
this.debugStream = debugStream;
this.debugLevel = debugLevel;
/* these objects are part of our BuildStore object */
if (buildStore != null) {
this.actionMgr = buildStore.getActionMgr();
this.fileMgr = buildStore.getFileMgr();
}
/*
* Create a map between CFS process numbers and BuildStore action numbers.
* Insert the parent/root ID numbers to signify the first process/action
*/
processToActionMap = new HashMap<Integer, Integer>();
processToActionMap.put(Integer.valueOf(0), actionMgr.getRootAction("root"));
/* set up input stream, and variables for reading it */
inputStream = new GZIPInputStream(new FileInputStream(fileName));
readBuffer = new byte[readBufferMax];
bufferOffset = 0;
bytesRemaining = 0;
traceFilePos = 0;
}
/*=====================================================================================*
* PUBLIC METHODS
*=====================================================================================*/
/**
* Parse the whole trace file and process the data. As each tag is read, the values
* associated with that tag are also fetched and processed. The appropriate methods
* are then called on the BuildStore (and it's managers) to reconstruct the build process.
*
* @throws IOException If an I/O operation occurs while reading the file.
*/
public void parse() throws IOException {
/* we'll do a lot of writing to the database, speed up access in favour of safety. */
boolean prevState = buildStore.setFastAccessMode(true);
String fileName = null;
boolean eof = false;
do {
/* all records start with a tag, followed by a process number */
int tagTraceFilePos = traceFilePos;
int tag = getTag();
int processNum = getInt();
/* do something different for each tag */
switch (tag) {
case TRACE_FILE_EOF:
eof = true;
break;
case TRACE_FILE_REGISTER:
fileName = getString();
debugln(1, "Registered file: " + fileName);
break;
case TRACE_FILE_WRITE:
case TRACE_DIR_WRITE:
fileName = getString();
addPathAccess(fileName, processNum, OperationType.OP_WRITE,
(tag == TRACE_FILE_WRITE) ? PathType.TYPE_FILE : PathType.TYPE_DIR);
break;
case TRACE_FILE_READ:
case TRACE_DIR_READ:
fileName = getString();
addPathAccess(fileName, processNum, OperationType.OP_READ,
(tag == TRACE_FILE_READ) ? PathType.TYPE_FILE : PathType.TYPE_DIR);
break;
case TRACE_FILE_MODIFY:
case TRACE_DIR_MODIFY:
fileName = getString();
addPathAccess(fileName, processNum, OperationType.OP_MODIFIED,
(tag == TRACE_FILE_MODIFY) ? PathType.TYPE_FILE : PathType.TYPE_DIR);
break;
case TRACE_FILE_DELETE:
case TRACE_DIR_DELETE:
fileName = getString();
addPathAccess(fileName, processNum, OperationType.OP_DELETE,
(tag == TRACE_FILE_DELETE) ? PathType.TYPE_FILE : PathType.TYPE_DIR);
break;
case TRACE_FILE_RENAME:
break;
case TRACE_FILE_NEW_LINK:
break;
case TRACE_FILE_NEW_PROGRAM:
addBuildAction(processNum);
break;
default:
throw new FatalBuildScannerError("Invalid tag in trace file: " + tag +
" at trace file position " + tagTraceFilePos);
}
} while (!eof);
buildStore.setFastAccessMode(prevState);
}
/*-------------------------------------------------------------------------------------*/
/**
* Close the trace file.
*
* @throws IOException If closing the file fails.
*/
public void close() throws IOException {
inputStream.close();
}
/*=====================================================================================*
* PRIVATE METHODS
*=====================================================================================*/
/**
* Create a new build action in the BuildStore.
*
* @param processNum The Unix process ID of the scanned process.
* @throws IOException A problem occurred while reading the trace file.
*/
private void addBuildAction(int processNum) throws IOException {
/* which process spawned this new process? */
int parentProcessNum = getInt();
/* what was the current working directory for the process */
String cwd = getString();
debug(1, "New Process " + processNum + " (parent " + parentProcessNum +
", directory " + cwd + ") - ");
/* fetch all the command line arguments */
int argCount = getInt();
StringBuffer commandArgs = new StringBuffer();
boolean first = true;
while (argCount-- != 0) {
String arg = getString();
if (first) {
first = false; /* no space separator required */
} else {
commandArgs.append(' '); /* put a space between arguments */
}
String quotedArg = ShellCommandUtils.shellEscapeString(arg);
commandArgs.append(quotedArg);
}
String command = commandArgs.toString();
debugln(1, command);
debugln(2, "Environment");
while (true) {
String env = getString();
if (env.isEmpty()){ break; }
debugln(2, " - " + env);
}
/* Update the BuildStore */
if (buildStore != null) {
/* map the process number (from cfs) into the BuildStore's actionId */
Integer parentActionId = getActionId(parentProcessNum);
/* fetch the current working directory ID */
int actionDirId = fileMgr.addDirectory(cwd);
if (actionDirId == ErrorCode.BAD_PATH){
throw new FatalBuildScannerError("Invalid current working directory: " + cwd);
}
/* add the new action to the build store */
int newActionId = actionMgr.addShellCommandAction(parentActionId, actionDirId, command);
/* associate CFS's process number with BuildStore's actionID */
setActionId(processNum, newActionId);
}
}
/*-------------------------------------------------------------------------------------*/
/**
* Add a new mapping from a CFS process number to a BuildStore actionID.
*
* @param processNum The CFS process number.
* @param newActionId The corresponding BuildStore actionId.
* @throws FatalBuildScannerError If there's already a mapping for this process.
*/
private void setActionId(int processNum, int newActionId) {
if (processToActionMap.get(Integer.valueOf(processNum)) != null){
throw new FatalBuildScannerError("Process number " + processNum +
" appears to have been created twice.");
}
processToActionMap.put(Integer.valueOf(processNum), Integer.valueOf(newActionId));
}
/*-------------------------------------------------------------------------------------*/
/**
* Given a CFS process number, translate it into a BuildStore action number.
*
* @param processNum The CFS process number.
* @return The corresponding BuildStore action number.
* @throws FatalBuildScannerError Ff the process-to-action mapping is unknown.
*/
private int getActionId(int processNum) {
Integer actionId = processToActionMap.get(Integer.valueOf(processNum));
if (actionId == null){
throw new FatalBuildScannerError("Process number " + processNum +
" does not have an assigned action number");
}
return actionId;
}
/*-------------------------------------------------------------------------------------*/
/**
* Record a file access in the BuildStore, based on a file access that was noted
* in the trace file.
*
* @param fileName The name of the file that was accessed.
* @param processNum The Unix process ID of the process that did the accessing.
* @param direction The type of access (read, write).
* @param type Is this a file, directory or symlink.
*/
private void addPathAccess(String fileName, int processNum, OperationType direction, PathType type) {
/* debug output */
String opString;
switch(direction) {
case OP_READ:
opString = "reading";
break;
case OP_WRITE:
opString = "writing";
break;
case OP_MODIFIED:
opString="modified";
break;
case OP_DELETE:
opString="deleted";
break;
default:
opString="unknown operation";
break;
}
debugln(1, "Process " + processNum + " " + opString + " " +
fileName +
" (" + ((type == PathType.TYPE_FILE) ? "File" :
(type == PathType.TYPE_DIR) ? "Directory" : "Symlink") + ")");
/* get the BuildStore actionId for the current process */
int actionId = getActionId(processNum);
/* get the BuildStore fileId */
int fileId;
if (type == PathType.TYPE_FILE) {
fileId = fileMgr.addFile(fileName);
} else if (type == PathType.TYPE_DIR) {
fileId = fileMgr.addDirectory(fileName);
} else {
fileId = fileMgr.addSymlink(fileName);
}
if (fileId < 0){
String msg = "Failed to add file: " + fileName + " to the BuildML database.";
int existingPathId = fileMgr.getPath(fileName);
if (existingPathId >= 0) {
PathType existingType = fileMgr.getPathType(existingPathId);
if (existingType != type) {
msg += "\nThere is already a path with this name, but with a " +
"different type (file versus directory).\n";
msg += "You must modify the legacy build system to fix this problem.";
}
}
throw new FatalBuildScannerError(msg);
}
/* add the file access information to the build store */
actionMgr.addFileAccess(actionId, fileId, direction);
/*
* If the file we are accessing has been trashed, that's because it was a temporary
* file that's no longer required. Because temporary file names can sometimes be
* reused, we need to permanently empty the trash, otherwise we can't re-add the
* same path.
*/
if (fileMgr.isPathTrashed(fileId)) {
buildStore.emptyTrash();
}
}
/*-------------------------------------------------------------------------------------*/
/**
* Read a single byte from the trace file, and return it as an
* integer.
*
* @return The byte of data, or TRACE_FILE_EOF (-1) if there's no more data left.
* @throws IOException If anything abnormal happens when reading the data.
*/
private int getByte() throws IOException {
/* if there's no data left in the in-memory buffer, read some more */
if (bytesRemaining == 0){
bytesRemaining = inputStream.read(readBuffer);
bufferOffset = 0;
}
/* if there are no more bytes in the input stream, inform the caller */
if (bytesRemaining == -1) {
return TRACE_FILE_EOF;
}
bytesRemaining--;
int val = readBuffer[bufferOffset++];
/* track the position within the input stream (regardless of buffer location) */
traceFilePos++;
/* Java doesn't have unsigned bytes, so do the adjustment */
if (val < 0) {
val += 256;
}
return val;
}
/*-------------------------------------------------------------------------------------*/
/**
* Fetch a trace file tag from the trace file.
*
* @return The next tag in the file (e.g. TRACE_FILE_READ).
* @throws IOException If something fails when reading the file.
*/
private int getTag() throws IOException {
return getByte();
}
/*-------------------------------------------------------------------------------------*/
/**
* Fetch a NUL-terminated string from the trace file.
*
* @return The string that was read from the trace file.
* @throws IOException If something fails when reading the file. For example,
* if the EOF is reached before a NUL character is seen.
*/
private String getString() throws IOException {
StringBuffer buf = new StringBuffer(256);
while (true) {
int val = getByte();
/* a nul-byte is the end of the C-style string */
if (val == 0) {
break;
/* but if we see an EOF in the middle of the string, error */
} else if (val == TRACE_FILE_EOF) {
throw new IOException("File appears to be truncated");
}
buf.append((char)val);
}
return buf.toString();
}
/*-------------------------------------------------------------------------------------*/
/**
* Fetch a 4-byte little-endian integer from the trace file.
*
* @return The integer that was read from the trace file.
* @throws IOException If something fails while reading the integer.
*/
private int getInt() throws IOException {
/* TODO: optimize if this ends up being slow */
int dig1 = getByte();
int dig2 = getByte();
int dig3 = getByte();
int dig4 = getByte();
/* numbers are stored in little-endian order */
return (dig4 << 24) | (dig3 << 16) | (dig2 << 8) | dig1;
}
/*-------------------------------------------------------------------------------------*/
/**
* Like println(), but display the message to the debug stream (if defined). Only
* display the message if this message's debug level is less than or equal to the
* overall debug level setting.
*
* @param level The debug level for this message.
* @param message The message to be displayed on the debug stream.
*/
private void debugln(int level, String message) {
if ((debugStream != null) && (level <= debugLevel)) {
debugStream.println(message);
}
}
/*-------------------------------------------------------------------------------------*/
/**
* Like print(), but display the message to the debug stream (if defined).
*
* @param level The debug level for this message.
* @param message The message to be displayed on the debug stream.
*/
private void debug(int level, String message) {
if ((debugStream != null) && (level <= debugLevel)) {
debugStream.print(message);
}
}
/*-------------------------------------------------------------------------------------*/
}