/*
* RHQ Management Platform
* Copyright (C) 2005-2010 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation, and/or the GNU Lesser
* General Public License, version 2.1, also as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License and the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* and the GNU Lesser General Public License along with this program;
* if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.rhq.core.util.updater;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.rhq.core.util.MessageDigestGenerator;
/**
* This provides a hashmap that maps a filename to its hashcode value (md5).
* The key to the map will be filenames; the values are unique hashcodes generated from
* the content of the files.
* Note that the keys can be either relative or absolute paths. If relative, some outside
* entity will need to know how to resolve those relative paths (i.e. some outside
* entity would need to know the top level root directory for all relative paths).
*
* @author John Mazzitelli
*/
public class FileHashcodeMap extends TreeMap<String, String> {
public static final String UNKNOWN_DIR_HASHCODE = "?UNKNOWN_DIR_HASHCODE?";
public static final String UNKNOWN_FILE_HASHCODE = "?UNKNOWN_FILE_HASHCODE?";
public static final String DELETED_FILE_HASHCODE = "?DELETED_FILE_HASHCODE?";
public static final String DIRECTORY_HASHCODE ="?DIRECTORY?";
private static final long serialVersionUID = 1L;
private static final String COLUMN_SEPARATOR = "\t";
/**
* Given a directory, this will recursively traverse that directory's files/subdirectories and
* generate the hashcode map for all files it encounters and add that data to the given map.
* If given a regular file, a single entry is added to the given map.
* Note that the returned map will have all relative paths as keys (relative to <code>rootDir</code>).
* Also note that <code>ignoreRegex</code> is matched against relative paths.
*
* @param rootDir existing directory to scan and generate hashcodes for all its files
* @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
* If a relative file/directory path matches this regex, it will be skipped.
* @param ignored a set that will contain those files/directories that were ignored while scanning the root dir
* @returns the map containing all files found and their generated hashcodes
* @throws Exception if failed to generate hashcode for the directory
*/
public static FileHashcodeMap generateFileHashcodeMap(File rootDir, Pattern ignoreRegex, Set<String> ignored)
throws Exception {
if (ignored == null) {
ignored = new HashSet<String>();
} else {
ignored.clear(); // start fresh, in case caller left some old data around
}
FileHashcodeMap map = new FileHashcodeMap();
generateFileHashcodeMapRecursive(map, rootDir.getAbsolutePath(), 0, rootDir, ignoreRegex, ignored);
return map;
}
/**
* Given a directory, this will recursively traverse that directory's files/subdirectories and
* generate the hashcode map for all files it encounters and add that data to the given map.
* If given a regular file, a single entry is added to the given map.
*
* @param map the map where the hashcode data is stored
* @param rootPath the top root directory that is being scanned - all files in the returned map will be relative to this
* @param level the level deep in the file hierarchy currently being processed (0==at top root dir)
* @param fileOrDir existing directory to scan and generate hashcodes for all its files, or existing
* file to generate hashcode for
* @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
* If a relative file/directory path matches this regex, it will be skipped.
* @param ignored a set that will contain those files/directories that were ignored while scanning the root dir
*
* @throws Exception if failed to generate hashcode for the file/directory
*/
private static void generateFileHashcodeMapRecursive(FileHashcodeMap map, String rootPath, int level,
File fileOrDir, Pattern ignoreRegex, Set<String> ignored) throws Exception {
if (fileOrDir == null || !fileOrDir.exists()) {
throw new Exception("Non-existent file/directory provided: " + fileOrDir);
}
// get path relative to the top root node
String path;
if (level == 0) {
path = fileOrDir.getName();
} else {
path = fileOrDir.getAbsolutePath().substring(rootPath.length() + 1);
}
// if this path is one the caller wants us to ignore, then return immediately
if (ignoreRegex != null && ignoreRegex.matcher(path).matches()) {
ignored.add(path);
return;
}
if (fileOrDir.isDirectory()) {
// we never calculate hashcodes for our own install metadata
if (fileOrDir.getName().equals(DeploymentsMetadata.METADATA_DIR)) {
return;
}
File[] children = fileOrDir.listFiles();
if (children != null) {
for (File child : children) {
generateFileHashcodeMapRecursive(map, rootPath, level + 1, child, ignoreRegex, ignored);
}
} else {
map.put(path, UNKNOWN_DIR_HASHCODE);
}
} else {
String hashcode;
try {
hashcode = MessageDigestGenerator.getDigestString(fileOrDir);
} catch (Exception e) {
hashcode = UNKNOWN_FILE_HASHCODE;
}
map.put(path, hashcode);
}
return;
}
/**
* Loads in the file that contains file/hashcode map data.
*
* @param file the file to load
* @return map of files/hashcodes found in the file
* @throws Exception
*/
public static FileHashcodeMap loadFromFile(File file) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(file));
try {
FileHashcodeMap map = new FileHashcodeMap();
int lineNumber = 1;
String line = reader.readLine();
while (line != null) {
String[] columns = line.split(COLUMN_SEPARATOR);
if (columns.length != 2) {
throw new Exception("Format error in [" + file.getAbsolutePath() + "] at line #[" + lineNumber
+ "]: " + line);
}
map.put(columns[0], columns[1]);
lineNumber++;
line = reader.readLine();
}
return map;
} finally {
reader.close();
}
}
/**
* Takes all map entries in this object and writes them to the given file such that it can later
* be loaded in via {@link #loadFromFile(File)}.
*
* @param file the file to store the entries to
* @throws Exception if failed to store the entries to the given file
*/
public void storeToFile(File file) throws Exception {
PrintWriter writer = new PrintWriter(file);
try {
for (Map.Entry<String, String> entry : entrySet()) {
writer.println(entry.getKey() + COLUMN_SEPARATOR + entry.getValue());
}
} finally {
writer.close();
}
return;
}
/**
* This rescans a set of files (found in this map) and returns a map with an updated, current set of hashcodes.
*
* If this original map has relative paths, they will be considered relative to the given
* root directory. If a file is not found, it will still be in the returned map but its
* hashcode will be {@link #DELETED_FILE_HASHCODE}.
*
* The root directory is also scanned for new files that are not in this original
* map - if new files are found (and they do not match the ignoreRegex), they are added to the
* returned map. Note that if <code>reportNewRootFilesAsNew</code> is false, and if new files
* are found in the top root directory and they are not related to the deployment fileset,
* they will not be added to the returned map.
*
* @param rootDir directory where the relative paths are expected to be
* @param ignoreRegex if relative paths of files under rootDir match this, they will be ignored.
* This will eliminate files/directories from being considered "new" because
* they aren't in original.
* @param reportNewRootFilesAsNew do not report as new any unrelated files found in the root dir
* @return a map with current files/hashcodes, including files that were not found in original.
* the returned object also has additional info such as those files that were added,
* deleted, changed from this original. It also indicates what was ignored during the rescan.
* @throws Exception
*/
public ChangesFileHashcodeMap rescan(File rootDir, Pattern ignoreRegex, boolean reportNewRootFilesAsNew)
throws Exception {
ChangesFileHashcodeMap current = new ChangesFileHashcodeMap(this);
// go through our original files and recalculate their hashcodes
for (Map.Entry<String, String> entry : entrySet()) {
String originalFileString = entry.getKey();
// if we are now to ignore this file, don't put it in our current map and skip to the next file
if (ignoreRegex != null && ignoreRegex.matcher(originalFileString).matches()) {
current.remove(originalFileString);
current.getIgnored().add(originalFileString);
continue;
}
File originalFile = new File(originalFileString);
if (!originalFile.isAbsolute()) {
originalFile = new File(rootDir, originalFileString);
}
if (originalFile.exists()) {
String currentHashcode = FileHashcodeMap.DIRECTORY_HASHCODE;
if (!originalFile.isDirectory()) {
currentHashcode = MessageDigestGenerator.getDigestString(originalFile);
}
current.put(originalFileString, currentHashcode);
// if file has been changed, mark it as such in our return map
String originalHashcode = entry.getValue();
if (!currentHashcode.equals(originalHashcode)) {
current.getChanges().put(originalFileString, currentHashcode);
}
} else {
// file has been deleted! still put an entry in our returned map but mark it as deleted
current.put(originalFileString, DELETED_FILE_HASHCODE);
current.getDeletions().put(originalFileString, DELETED_FILE_HASHCODE);
}
}
// now recursively traverse the root directory and look for new files that aren't in our original map
// files that have been added need to be put into our returned map and also marked as added
FileHashcodeMap newFiles = new FileHashcodeMap();
Set<String> skippedFiles = new HashSet<String>();
lookForNewFilesRecursive(newFiles, skippedFiles, rootDir.getAbsolutePath(), 0, rootDir, ignoreRegex, current
.getIgnored(), reportNewRootFilesAsNew);
current.putAll(newFiles);
current.getAdditions().putAll(newFiles);
current.getSkipped().addAll(skippedFiles);
return current;
}
/**
* This looks for new files under the given fileOrDir and adds them to <code>newFiles</code>.
*
* @param newFiles the map where the new, current file/hashcode data will be stored
* @param skippedFiles a set where names of unrelated files/directories are stored. The names
* found here after this method returns are those files/dirs that were found
* in the top level root dir, but were skipped over and not processed. This will not
* contain paths with subdirectories - they will only be a filename with no paths
* because they are located directly under the root dir.
* @param rootPath the top root directory that is being scanned
* @param level the level deep in the file hierarchy currently being processed (0==at top root dir)
* @param fileOrDir existing directory/file to rescan
* @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
* If a relative file/directory path matches this regex, it will be skipped.
* @param ignored a set that will contain those files/directories that were ignored while scanning the root dir
* @param reportNewRootFilesAsNew if false, ignore unrelated files at the root dir location (level=0)
*
* @throws Exception
*/
private void lookForNewFilesRecursive(FileHashcodeMap newFiles, Set<String> skippedFiles, String rootPath,
int level, File fileOrDir, Pattern ignoreRegex, Set<String> ignored, boolean reportNewRootFilesAsNew)
throws Exception {
if (fileOrDir == null || !fileOrDir.exists()) {
throw new Exception("Non-existent file/directory provided: " + fileOrDir);
}
// get path relative to the top root node
String path;
if (level == 0) {
path = fileOrDir.getName();
} else {
path = fileOrDir.getAbsolutePath().substring(rootPath.length() + 1);
}
path = convertPath(path);
// if this path is one the caller wants us to ignore, then return immediately
if (ignoreRegex != null) {
String matchPath = path;
// directory wouldn't match pattern unless i has trailing slash
if (fileOrDir.isDirectory()) {
matchPath = convertPath(path+File.separator);
}
if (ignoreRegex.matcher(matchPath).matches()) {
ignored.add(path);
return;
}
}
if (fileOrDir.isDirectory()) {
// we never calculate hashcodes for our own install metadata
if (fileOrDir.getName().equals(DeploymentsMetadata.METADATA_DIR)) {
return;
}
// If we are currently at the top level and we are not to report unrelated files as new
// then we need to find out which files ARE related first. Related files are those files
// already in our Map at the root level (i.e. they are just a filename, no parent paths)
// plus those in our Map with a top parent dir name that match a child directory of
// fileOrDir (which is the root dir if we are at level=0).
// Note that we use File API to manipulate filenames/parents to ensure we do this right
// on Windows (taking care of relative paths with drive letters, e.g. C:subdir/file.txt).
HashSet<String> relatedTopLevelFiles = null;
if (level == 0 && !reportNewRootFilesAsNew) {
relatedTopLevelFiles = new HashSet<String>();
for (String relatedFilePath : keySet()) { // loop through our Map key entries, these are the "related" files
File relatedFile = new File(relatedFilePath);
if (!relatedFile.isAbsolute()) {
String topLevelName = relatedFile.getName(); // prime the pump; if we have no parent, this is our file already at the top root dir
File parent = relatedFile.getParentFile();
// walk up the file hierarchy until we hit the top parent - this is the related dir found in our top root
while (parent != null) {
topLevelName = parent.getName();
parent = parent.getParentFile();
}
relatedTopLevelFiles.add(topLevelName); // this is a file or dir at the top root dir
}
}
}
// we also care about directories and we're reporting them as newFiles
if (level > 0 && reportNewRootFilesAsNew) {
// if the file is not yet known to us, add it to the map of new files
if (!containsKey(path)) {
// just insert directory with non-important hash value
newFiles.put(path, DIRECTORY_HASHCODE);
}
}
File[] children = fileOrDir.listFiles();
if (children != null) {
for (File child : children) {
// skip this child if we are at the top root dir and it is not related to our fileset
if (relatedTopLevelFiles == null || relatedTopLevelFiles.contains(child.getName())) {
lookForNewFilesRecursive(newFiles, skippedFiles, rootPath, level + 1, child, ignoreRegex,
ignored, reportNewRootFilesAsNew);
} else {
skippedFiles.add(child.getName());
}
}
}
} else {
// if the file is not yet known to us, add it to the map of new files
if (!containsKey(path)) {
String hashcode;
try {
hashcode = MessageDigestGenerator.getDigestString(fileOrDir);
} catch (Exception e) {
hashcode = UNKNOWN_FILE_HASHCODE;
}
newFiles.put(path, hashcode);
}
}
return;
}
/**
* If a file was not readable or its hashcode could not be generated for some reason, its path
* will be returned as a key to the returned map. The value will be {@link #UNKNOWN_FILE_HASHCODE}.
* If a directory was not readable or its list of files could not be retrieved for some reason, its path
* will be returned as a key to the returned map. The value will be {@link #UNKNOWN_DIR_HASHCODE}.
*
* @return map of file or directories whose hashcodes could not be determined. Will be <code>null</code> if
* the map is fully complete and all content was able to have its hashcodes generated.
*/
public Map<String, String> getUnknownContent() {
Map<String, String> unknowns = null;
for (Map.Entry<String, String> entry : entrySet()) {
if (entry.getValue().equals(UNKNOWN_DIR_HASHCODE) || entry.getValue().equals(UNKNOWN_FILE_HASHCODE)) {
if (unknowns == null) {
unknowns = new HashMap<String, String>();
}
unknowns.put(entry.getKey(), entry.getValue());
}
}
return unknowns;
}
@Override
public String put(String key, String value) {
return super.put(convertPath(key), value);
}
@Override
public String get(Object key) {
return super.get(convertPath((String) key));
}
@Override
public String remove(Object key) {
return super.remove(convertPath((String) key));
}
@Override
public boolean containsKey(Object key) {
return super.containsKey(convertPath((String) key));
}
/**
* Converts the path to the form that will be stored internally.
*
* @param path a filepath to be converted
*
* @return the converted path that is to be used to store in the internal sets.
*/
public String convertPath(String path) {
if (File.separatorChar != '/') {
if (path != null) {
path = path.replace(File.separatorChar, '/');
}
}
return path;
}
}