/*
* file: MppCleanUtility.java
* author: Jon Iles
* copyright: (c) Packwood Software 2008
* date: 07/02/2008
*/
/*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*/
package net.sf.mpxj.utility;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.sf.mpxj.MPXJException;
import net.sf.mpxj.ProjectFile;
import net.sf.mpxj.ProjectProperties;
import net.sf.mpxj.Resource;
import net.sf.mpxj.Task;
import net.sf.mpxj.common.NumberHelper;
import net.sf.mpxj.mpp.MPPReader;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* This class allows the caller to replace the content of an MPP file
* to make it anonymous, in such a way that the structure of the project
* is maintained unchanged. The point of this exercise is to allow end
* customers who use MPXJ functionality to submit problematic project files
* obtain support. The fact that the structure of the file is maintained
* unchanged means that it is likely that the problem with the file will
* still be apparent. It also means that end users are more likely to
* submit these files as, along with the removal of sensitive information, this
* utility means that no user effort is required to modify the file
* before it is sent to the organisation providing support.
*
* Note the following items are made anonymous:
* - Task Names
* - Resource Names
* - Resource Initials
* - Project Summary Data
*/
public class MppCleanUtility
{
/**
* Main method.
*
* @param args array of command line arguments
*/
public static void main(String[] args)
{
try
{
if (args.length != 2)
{
System.out.println("Usage: MppClean <input mpp file name> <output mpp file name>");
}
else
{
System.out.println("Clean started.");
long start = System.currentTimeMillis();
MppCleanUtility clean = new MppCleanUtility();
clean.process(args[0], args[1]);
long elapsed = System.currentTimeMillis() - start;
System.out.println("Clean completed in " + elapsed + "ms");
}
}
catch (Exception ex)
{
ex.printStackTrace();
}
}
/**
* Process an MPP file to make it anonymous.
*
* @param input input file name
* @param output output file name
* @throws Exception
*/
private void process(String input, String output) throws MPXJException, IOException
{
//
// Extract the project data
//
MPPReader reader = new MPPReader();
m_project = reader.read(input);
String varDataFileName;
String projectDirName;
int mppFileType = NumberHelper.getInt(m_project.getProjectProperties().getMppFileType());
switch (mppFileType)
{
case 8:
{
projectDirName = " 1";
varDataFileName = "FixDeferFix 0";
break;
}
case 9:
{
projectDirName = " 19";
varDataFileName = "Var2Data";
break;
}
case 12:
{
projectDirName = " 112";
varDataFileName = "Var2Data";
break;
}
default:
{
throw new IllegalArgumentException("Unsupported file type " + mppFileType);
}
}
//
// Load the raw file
//
FileInputStream is = new FileInputStream(input);
POIFSFileSystem fs = new POIFSFileSystem(is);
is.close();
//
// Locate the root of the project file system
//
DirectoryEntry root = fs.getRoot();
m_projectDir = (DirectoryEntry) root.getEntry(projectDirName);
//
// Process Tasks
//
Map<String, String> replacements = new HashMap<String, String>();
for (Task task : m_project.getAllTasks())
{
mapText(task.getName(), replacements);
}
processReplacements(((DirectoryEntry) m_projectDir.getEntry("TBkndTask")), varDataFileName, replacements, true);
//
// Process Resources
//
replacements.clear();
for (Resource resource : m_project.getAllResources())
{
mapText(resource.getName(), replacements);
mapText(resource.getInitials(), replacements);
}
processReplacements((DirectoryEntry) m_projectDir.getEntry("TBkndRsc"), varDataFileName, replacements, true);
//
// Process project properties
//
replacements.clear();
ProjectProperties properties = m_project.getProjectProperties();
mapText(properties.getProjectTitle(), replacements);
processReplacements(m_projectDir, "Props", replacements, true);
replacements.clear();
mapText(properties.getProjectTitle(), replacements);
mapText(properties.getSubject(), replacements);
mapText(properties.getAuthor(), replacements);
mapText(properties.getKeywords(), replacements);
mapText(properties.getComments(), replacements);
processReplacements(root, "\005SummaryInformation", replacements, false);
replacements.clear();
mapText(properties.getManager(), replacements);
mapText(properties.getCompany(), replacements);
mapText(properties.getCategory(), replacements);
processReplacements(root, "\005DocumentSummaryInformation", replacements, false);
//
// Write the replacement raw file
//
FileOutputStream os = new FileOutputStream(output);
fs.writeFilesystem(os);
os.flush();
os.close();
}
/**
* Extracts a block of data from the MPP file, and iterates through the map
* of find/replace pairs to make the data anonymous.
*
* @param parentDirectory parent directory object
* @param fileName target file name
* @param replacements find/replace data
* @param unicode true for double byte text
* @throws IOException
*/
private void processReplacements(DirectoryEntry parentDirectory, String fileName, Map<String, String> replacements, boolean unicode) throws IOException
{
//
// Populate a list of keys and sort into descending order of length
//
List<String> keys = new ArrayList<String>(replacements.keySet());
Collections.sort(keys, new Comparator<String>()
{
@Override public int compare(String o1, String o2)
{
return (o2.length() - o1.length());
}
});
//
// Extract the raw file data
//
DocumentEntry targetFile = (DocumentEntry) parentDirectory.getEntry(fileName);
DocumentInputStream dis = new DocumentInputStream(targetFile);
int dataSize = dis.available();
byte[] data = new byte[dataSize];
dis.read(data);
dis.close();
//
// Replace the text
//
for (String findText : keys)
{
String replaceText = replacements.get(findText);
replaceData(data, findText, replaceText, unicode);
}
//
// Remove the document entry
//
targetFile.delete();
//
// Replace it with a new one
//
parentDirectory.createDocument(fileName, new ByteArrayInputStream(data));
}
/**
* Converts plan text into anonymous text. Preserves upper case, lower case,
* punctuation, whitespace and digits while making the text unreadable.
*
* @param oldText text to replace
* @param replacements map of find/replace pairs
*/
private void mapText(String oldText, Map<String, String> replacements)
{
char c2 = 0;
if (oldText != null && oldText.length() != 0 && !replacements.containsKey(oldText))
{
StringBuilder newText = new StringBuilder(oldText.length());
for (int loop = 0; loop < oldText.length(); loop++)
{
char c = oldText.charAt(loop);
if (Character.isUpperCase(c))
{
newText.append('X');
}
else
{
if (Character.isLowerCase(c))
{
newText.append('x');
}
else
{
if (Character.isDigit(c))
{
newText.append('0');
}
else
{
if (Character.isLetter(c))
{
// Handle other codepages etc. If possible find a way to
// maintain the same code page as original.
// E.g. replace with a character from the same alphabet.
// This 'should' work for most cases
if (c2 == 0)
{
c2 = c;
}
newText.append(c2);
}
else
{
newText.append(c);
}
}
}
}
}
replacements.put(oldText, newText.toString());
}
}
/**
* For a given find/replace pair, iterate through the supplied block of data
* and perform a find and replace.
*
* @param data data block
* @param findText text to find
* @param replaceText replacement text
* @param unicode true if text is double byte
*/
private void replaceData(byte[] data, String findText, String replaceText, boolean unicode)
{
boolean replaced = false;
byte[] findBytes = getBytes(findText, unicode);
byte[] replaceBytes = getBytes(replaceText, unicode);
int endIndex = data.length - findBytes.length;
for (int index = 0; index <= endIndex; index++)
{
if (compareBytes(findBytes, data, index))
{
System.arraycopy(replaceBytes, 0, data, index, replaceBytes.length);
index += replaceBytes.length;
System.out.println(findText + " -> " + replaceText);
replaced = true;
}
}
if (!replaced)
{
System.out.println("Failed to find " + findText);
}
}
/**
* Convert a Java String instance into the equivalent array of single or
* double bytes.
*
* @param value Java String instance representing text
* @param unicode true if double byte characters are required
* @return byte array representing the supplied text
*/
private byte[] getBytes(String value, boolean unicode)
{
byte[] result;
if (unicode)
{
int start = 0;
// Get the bytes in UTF-16
byte[] bytes;
try
{
bytes = value.getBytes("UTF-16");
}
catch (UnsupportedEncodingException e)
{
bytes = value.getBytes();
}
if (bytes.length > 2 && bytes[0] == -2 && bytes[1] == -1)
{
// Skip the unicode identifier
start = 2;
}
result = new byte[bytes.length - start];
for (int loop = start; loop < bytes.length - 1; loop += 2)
{
// Swap the order here
result[loop - start] = bytes[loop + 1];
result[loop + 1 - start] = bytes[loop];
}
}
else
{
result = new byte[value.length() + 1];
System.arraycopy(value.getBytes(), 0, result, 0, value.length());
}
return (result);
}
/**
* Compare an array of bytes with a subsection of a larger array of bytes.
*
* @param lhs small array of bytes
* @param rhs large array of bytes
* @param rhsOffset offset into larger array of bytes
* @return true if a match is found
*/
private boolean compareBytes(byte[] lhs, byte[] rhs, int rhsOffset)
{
boolean result = true;
for (int loop = 0; loop < lhs.length; loop++)
{
if (lhs[loop] != rhs[rhsOffset + loop])
{
result = false;
break;
}
}
return (result);
}
private ProjectFile m_project;
private DirectoryEntry m_projectDir;
}