FileHashcodeMap.java example

Explorer

rhq-master
- etc
  - agentspawn
    - src
      - main
        java
        org
        rhq
        perftest
        AgentSpawn.java
        SpawnedAgentClassLoader.java
  - apt
    - src
      - main
        java
        org
        rhq
        plugins
        apt
        AptSourcesComponent.java
        AptSourcesDiscoveryComponent.java
      - test
        java
        org
        rhq
        plugins
        apt
        AptSourcesComponentTest.java
  - augeas-utils
    - src
      - AugeasTree.java
      - Node.java
  - classloaders-test
    - dummy1
      - src
        main
        java
        dummy
        Dummy.java
    - dummy2
      - src
        main
        java
        dummy
        Dummy.java
    - dummy3
      - src
        main
        java
        dummy
        Dummy.java
    - plugin1
      - src
        main
        java
        org
        rhq
        plugin1
        SamplePlugin1ADiscoveryComponent.java
        SamplePlugin1AServerComponent.java
        SamplePlugin1BDiscoveryComponent.java
        SamplePlugin1BServerComponent.java
    - plugin2
      - src
        main
        java
        org
        rhq
        plugin2
        SamplePlugin2ADiscoveryComponent.java
        SamplePlugin2AServerComponent.java
        SamplePlugin2BDiscoveryComponent.java
        SamplePlugin2BServerComponent.java
  - dev-utils
    - TestConfigFileBuilder.java
  - guiperftests
    - src
      - test
        java
        org
        rhq
        guiperftest
        GuiPerfTestCase.java
        GuiPerfTestSuite.java
        SahiUtility.java
  - m2
    - smartgwt-war-archetype
      - src
        main
        resources
        archetype-resources
        src
        main
        java
        client
        Application.java
        test
        java
        ApplicationTestCase.java
  - rhq-ircBot
    - src
      - main
        java
        org
        rhq
        etc
        ircbot
        BugResolver.java
        BugzillaResolver.java
        Color.java
        GDocParser.java
        JiraResolver.java
        RhqIrcBot.java
        RhqIrcBotListener.java
        XTrustAllProvider.java
  - rhq-plugin-archetype
    - src
      - main
        resources
        archetype-resources
        src
        main
        java
        Holder.java
  - samples
    - custom-serverplugin
      - src
        main
        java
        org
        custom
        MyLifecycleListener.java
        MyScheduledJob.java
    - embedded-ext-plugin
      - src
        main
        java
        org
        rhq
        sample
        embeddedextplugin
        EmbeddedExtensionDiscoveryComponent.java
    - simplereport-serverplugin
      - src
        main
        java
        org
        custom
        SimpleReportsPluginComponent.java
    - skeleton-plugin
      - src
        main
        java
        org
        rhq
        sample
        skeletonplugin
        SamplePluginChildDiscoveryComponent.java
        SamplePluginChildServiceComponent.java
        SamplePluginDiscoveryComponent.java
        SamplePluginServerComponent.java
    - skinny-platform
      - src
        main
        java
        org
        rhq
        plugins
        skinnyplatform
        PlatformComponent.java
        PlatformDiscoveryComponent.java
  - test-apps
    - javaee6-test-app
      - src
        main
        java
        org
        rhq
        test
        apps
        javaee6
        HelloService.java
        HelloServiceMBean.java
        SimpleEntity.java
        SimpleQueueMDB.java
        SimpleSingleton.java
        SimpleStateful.java
        SimpleStateless.java
        SimpleTopicMDB.java
        SimpleWebService.java
        SimpleWebServiceImpl.java
    - javaee6-test-ear
      - ejb
        src
        main
        java
        org
        rhq
        test
        apps
        javaee6
        ear
        SimpleEntity.java
        SimpleSingleton.java
        SimpleStateful.java
        SimpleStateless.java
  - zipLengthCheck
    - src
      - main
        java
        org
        rhq
        devUtil
        zipCheck
        ZipCheck.java
- modules

/*
 * RHQ Management Platform
 * Copyright (C) 2005-2010 Red Hat, Inc.
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation, and/or the GNU Lesser
 * General Public License, version 2.1, also as published by the Free
 * Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License and the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License
 * and the GNU Lesser General Public License along with this program;
 * if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
package org.rhq.core.util.updater;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Pattern;

import org.rhq.core.util.MessageDigestGenerator;

/**
 * This provides a hashmap that maps a filename to its hashcode value (md5).
 * The key to the map will be filenames; the values are unique hashcodes generated from
 * the content of the files.
 * Note that the keys can be either relative or absolute paths. If relative, some outside
 * entity will need to know how to resolve those relative paths (i.e. some outside
 * entity would need to know the top level root directory for all relative paths).
 * 
 * @author John Mazzitelli
 */
public class FileHashcodeMap extends TreeMap<String, String> {
    public static final String UNKNOWN_DIR_HASHCODE = "?UNKNOWN_DIR_HASHCODE?";
    public static final String UNKNOWN_FILE_HASHCODE = "?UNKNOWN_FILE_HASHCODE?";
    public static final String DELETED_FILE_HASHCODE = "?DELETED_FILE_HASHCODE?";
    public static final String DIRECTORY_HASHCODE ="?DIRECTORY?";

    private static final long serialVersionUID = 1L;
    private static final String COLUMN_SEPARATOR = "\t";

    /**
     * Given a directory, this will recursively traverse that directory's files/subdirectories and
     * generate the hashcode map for all files it encounters and add that data to the given map.
     * If given a regular file, a single entry is added to the given map.
     * Note that the returned map will have all relative paths as keys (relative to <code>rootDir</code>).
     * Also note that <code>ignoreRegex</code> is matched against relative paths.
     * 
     * @param rootDir existing directory to scan and generate hashcodes for all its files
     * @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
     *                    If a relative file/directory path matches this regex, it will be skipped.
     * @param ignored a set that will contain those files/directories that were ignored while scanning the root dir 
     * @returns the map containing all files found and their generated hashcodes
     * @throws Exception if failed to generate hashcode for the directory
     */
    public static FileHashcodeMap generateFileHashcodeMap(File rootDir, Pattern ignoreRegex, Set<String> ignored)
        throws Exception {

        if (ignored == null) {
            ignored = new HashSet<String>();
        } else {
            ignored.clear(); // start fresh, in case caller left some old data around
        }

        FileHashcodeMap map = new FileHashcodeMap();
        generateFileHashcodeMapRecursive(map, rootDir.getAbsolutePath(), 0, rootDir, ignoreRegex, ignored);
        return map;
    }

    /**
     * Given a directory, this will recursively traverse that directory's files/subdirectories and
     * generate the hashcode map for all files it encounters and add that data to the given map.
     * If given a regular file, a single entry is added to the given map.
     * 
     * @param map         the map where the hashcode data is stored
     * @param rootPath    the top root directory that is being scanned - all files in the returned map will be relative to this
     * @param level       the level deep in the file hierarchy currently being processed (0==at top root dir)
     * @param fileOrDir   existing directory to scan and generate hashcodes for all its files, or existing
     *                    file to generate hashcode for
     * @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
     *                    If a relative file/directory path matches this regex, it will be skipped.
     * @param ignored a set that will contain those files/directories that were ignored while scanning the root dir 
     *
     * @throws Exception if failed to generate hashcode for the file/directory
     */
    private static void generateFileHashcodeMapRecursive(FileHashcodeMap map, String rootPath, int level,
        File fileOrDir, Pattern ignoreRegex, Set<String> ignored) throws Exception {

        if (fileOrDir == null || !fileOrDir.exists()) {
            throw new Exception("Non-existent file/directory provided: " + fileOrDir);
        }

        // get path relative to the top root node
        String path;
        if (level == 0) {
            path = fileOrDir.getName();
        } else {
            path = fileOrDir.getAbsolutePath().substring(rootPath.length() + 1);
        }

        // if this path is one the caller wants us to ignore, then return immediately
        if (ignoreRegex != null && ignoreRegex.matcher(path).matches()) {
            ignored.add(path);
            return;
        }

        if (fileOrDir.isDirectory()) {
            // we never calculate hashcodes for our own install metadata
            if (fileOrDir.getName().equals(DeploymentsMetadata.METADATA_DIR)) {
                return;
            }

            File[] children = fileOrDir.listFiles();
            if (children != null) {
                for (File child : children) {
                    generateFileHashcodeMapRecursive(map, rootPath, level + 1, child, ignoreRegex, ignored);
                }
            } else {
                map.put(path, UNKNOWN_DIR_HASHCODE);
            }
        } else {
            String hashcode;
            try {
                hashcode = MessageDigestGenerator.getDigestString(fileOrDir);
            } catch (Exception e) {
                hashcode = UNKNOWN_FILE_HASHCODE;
            }
            map.put(path, hashcode);
        }

        return;
    }

    /**
     * Loads in the file that contains file/hashcode map data.
     *
     * @param file the file to load
     * @return map of files/hashcodes found in the file
     * @throws Exception
     */
    public static FileHashcodeMap loadFromFile(File file) throws Exception {
        BufferedReader reader = new BufferedReader(new FileReader(file));
        try {
            FileHashcodeMap map = new FileHashcodeMap();
            int lineNumber = 1;
            String line = reader.readLine();
            while (line != null) {
                String[] columns = line.split(COLUMN_SEPARATOR);
                if (columns.length != 2) {
                    throw new Exception("Format error in [" + file.getAbsolutePath() + "] at line #[" + lineNumber
                        + "]: " + line);
                }

                map.put(columns[0], columns[1]);

                lineNumber++;
                line = reader.readLine();
            }
            return map;
        } finally {
            reader.close();
        }
    }

    /**
     * Takes all map entries in this object and writes them to the given file such that it can later
     * be loaded in via {@link #loadFromFile(File)}.
     * 
     * @param file the file to store the entries to
     * @throws Exception if failed to store the entries to the given file
     */
    public void storeToFile(File file) throws Exception {
        PrintWriter writer = new PrintWriter(file);
        try {
            for (Map.Entry<String, String> entry : entrySet()) {
                writer.println(entry.getKey() + COLUMN_SEPARATOR + entry.getValue());
            }
        } finally {
            writer.close();
        }
        return;
    }

    /**
     * This rescans a set of files (found in this map) and returns a map with an updated, current set of hashcodes.
     *
     * If this original map has relative paths, they will be considered relative to the given
     * root directory. If a file is not found, it will still be in the returned map but its
     * hashcode will be {@link #DELETED_FILE_HASHCODE}.
     * 
     * The root directory is also scanned for new files that are not in this original
     * map - if new files are found (and they do not match the ignoreRegex), they are added to the
     * returned map. Note that if <code>reportNewRootFilesAsNew</code> is false, and if new files
     * are found in the top root directory and they are not related to the deployment fileset,
     * they will not be added to the returned map.
     *
     * @param rootDir directory where the relative paths are expected to be
     * @param ignoreRegex if relative paths of files under rootDir match this, they will be ignored.
     *                    This will eliminate files/directories from being considered "new" because
     *                    they aren't in original.
     * @param reportNewRootFilesAsNew do not report as new any unrelated files found in the root dir
     * @return a map with current files/hashcodes, including files that were not found in original. 
     *         the returned object also has additional info such as those files that were added,
     *         deleted, changed from this original. It also indicates what was ignored during the rescan. 
     * @throws Exception
     */
    public ChangesFileHashcodeMap rescan(File rootDir, Pattern ignoreRegex, boolean reportNewRootFilesAsNew)
        throws Exception {
        ChangesFileHashcodeMap current = new ChangesFileHashcodeMap(this);

        // go through our original files and recalculate their hashcodes
        for (Map.Entry<String, String> entry : entrySet()) {
            String originalFileString = entry.getKey();

            // if we are now to ignore this file, don't put it in our current map and skip to the next file
            if (ignoreRegex != null && ignoreRegex.matcher(originalFileString).matches()) {
                current.remove(originalFileString);
                current.getIgnored().add(originalFileString);
                continue;
            }

            File originalFile = new File(originalFileString);
            if (!originalFile.isAbsolute()) {
                originalFile = new File(rootDir, originalFileString);
            }

            if (originalFile.exists()) {
                String currentHashcode = FileHashcodeMap.DIRECTORY_HASHCODE;
                if (!originalFile.isDirectory()) {
                    currentHashcode = MessageDigestGenerator.getDigestString(originalFile);
                }

                current.put(originalFileString, currentHashcode);

                // if file has been changed, mark it as such in our return map
                String originalHashcode = entry.getValue();
                if (!currentHashcode.equals(originalHashcode)) {
                    current.getChanges().put(originalFileString, currentHashcode);
                }
            } else {
                // file has been deleted! still put an entry in our returned map but mark it as deleted
                current.put(originalFileString, DELETED_FILE_HASHCODE);
                current.getDeletions().put(originalFileString, DELETED_FILE_HASHCODE);
            }
        }

        // now recursively traverse the root directory and look for new files that aren't in our original map
        // files that have been added need to be put into our returned map and also marked as added
        FileHashcodeMap newFiles = new FileHashcodeMap();
        Set<String> skippedFiles = new HashSet<String>();
        lookForNewFilesRecursive(newFiles, skippedFiles, rootDir.getAbsolutePath(), 0, rootDir, ignoreRegex, current
            .getIgnored(), reportNewRootFilesAsNew);
        current.putAll(newFiles);
        current.getAdditions().putAll(newFiles);
        current.getSkipped().addAll(skippedFiles);

        return current;
    }

    /**
     * This looks for new files under the given fileOrDir and adds them to <code>newFiles</code>.
     * 
     * @param newFiles    the map where the new, current file/hashcode data will be stored
     * @param skippedFiles a set where names of unrelated files/directories are stored. The names
     *                     found here after this method returns are those files/dirs that were found
     *                     in the top level root dir, but were skipped over and not processed. This will not
     *                     contain paths with subdirectories - they will only be a filename with no paths
     *                     because they are located directly under the root dir.
     * @param rootPath the top root directory that is being scanned
     * @param level the level deep in the file hierarchy currently being processed (0==at top root dir)
     * @param fileOrDir existing directory/file to rescan
     * @param ignoreRegex a regular expression that indicates which files/directories should be ignored.
     *                    If a relative file/directory path matches this regex, it will be skipped.
     * @param ignored a set that will contain those files/directories that were ignored while scanning the root dir
     * @param reportNewRootFilesAsNew if false, ignore unrelated files at the root dir location (level=0)
     *
     * @throws Exception 
     */
    private void lookForNewFilesRecursive(FileHashcodeMap newFiles, Set<String> skippedFiles, String rootPath,
        int level, File fileOrDir, Pattern ignoreRegex, Set<String> ignored, boolean reportNewRootFilesAsNew)
        throws Exception {

        if (fileOrDir == null || !fileOrDir.exists()) {
            throw new Exception("Non-existent file/directory provided: " + fileOrDir);
        }

        // get path relative to the top root node
        String path;
        if (level == 0) {
            path = fileOrDir.getName();
        } else {
            path = fileOrDir.getAbsolutePath().substring(rootPath.length() + 1);
        }

        path = convertPath(path);

        // if this path is one the caller wants us to ignore, then return immediately
        if (ignoreRegex != null) {
            String matchPath = path;
            // directory wouldn't match pattern unless i has trailing slash
            if (fileOrDir.isDirectory()) {
                matchPath = convertPath(path+File.separator);
            }
            if (ignoreRegex.matcher(matchPath).matches()) {
                ignored.add(path);
                return;
            }
        }

        if (fileOrDir.isDirectory()) {
            // we never calculate hashcodes for our own install metadata
            if (fileOrDir.getName().equals(DeploymentsMetadata.METADATA_DIR)) {
                return;
            }

            // If we are currently at the top level and we are not to report unrelated files as new
            // then we need to find out which files ARE related first. Related files are those files
            // already in our Map at the root level (i.e. they are just a filename, no parent paths)
            // plus those in our Map with a top parent dir name that match a child directory of
            // fileOrDir (which is the root dir if we are at level=0).
            // Note that we use File API to manipulate filenames/parents to ensure we do this right
            // on Windows (taking care of relative paths with drive letters, e.g. C:subdir/file.txt).
            HashSet<String> relatedTopLevelFiles = null;
            if (level == 0 && !reportNewRootFilesAsNew) {
                relatedTopLevelFiles = new HashSet<String>();
                for (String relatedFilePath : keySet()) { // loop through our Map key entries, these are the "related" files
                    File relatedFile = new File(relatedFilePath);
                    if (!relatedFile.isAbsolute()) {
                        String topLevelName = relatedFile.getName(); // prime the pump; if we have no parent, this is our file already at the top root dir
                        File parent = relatedFile.getParentFile();
                        // walk up the file hierarchy until we hit the top parent - this is the related dir found in our top root
                        while (parent != null) {
                            topLevelName = parent.getName();
                            parent = parent.getParentFile();
                        }
                        relatedTopLevelFiles.add(topLevelName); // this is a file or dir at the top root dir                             
                    }
                }
            }
            // we also care about directories and we're reporting them as newFiles
            if (level > 0 && reportNewRootFilesAsNew) {
             // if the file is not yet known to us, add it to the map of new files
                if (!containsKey(path)) {
                    // just insert directory with non-important hash value
                    newFiles.put(path, DIRECTORY_HASHCODE);
                }
            }

            File[] children = fileOrDir.listFiles();
            if (children != null) {
                for (File child : children) {
                    // skip this child if we are at the top root dir and it is not related to our fileset
                    if (relatedTopLevelFiles == null || relatedTopLevelFiles.contains(child.getName())) {
                        lookForNewFilesRecursive(newFiles, skippedFiles, rootPath, level + 1, child, ignoreRegex,
                            ignored, reportNewRootFilesAsNew);
                    } else {
                        skippedFiles.add(child.getName());
                    }
                }
            }
        } else {
            // if the file is not yet known to us, add it to the map of new files
            if (!containsKey(path)) {
                String hashcode;
                try {
                    hashcode = MessageDigestGenerator.getDigestString(fileOrDir);
                } catch (Exception e) {
                    hashcode = UNKNOWN_FILE_HASHCODE;
                }
                newFiles.put(path, hashcode);
            }
        }

        return;
    }

    /**
     * If a file was not readable or its hashcode could not be generated for some reason, its path
     * will be returned as a key to the returned map. The value will be {@link #UNKNOWN_FILE_HASHCODE}.
     * If a directory was not readable or its list of files could not be retrieved for some reason, its path
     * will be returned as a key to the returned map. The value will be {@link #UNKNOWN_DIR_HASHCODE}.
     *  
     * @return map of file or directories whose hashcodes could not be determined. Will be <code>null</code> if
     *         the map is fully complete and all content was able to have its hashcodes generated.
     */
    public Map<String, String> getUnknownContent() {
        Map<String, String> unknowns = null;

        for (Map.Entry<String, String> entry : entrySet()) {
            if (entry.getValue().equals(UNKNOWN_DIR_HASHCODE) || entry.getValue().equals(UNKNOWN_FILE_HASHCODE)) {
                if (unknowns == null) {
                    unknowns = new HashMap<String, String>();
                }
                unknowns.put(entry.getKey(), entry.getValue());
            }
        }

        return unknowns;
    }

    @Override
    public String put(String key, String value) {
        return super.put(convertPath(key), value);
    }

    @Override
    public String get(Object key) {
        return super.get(convertPath((String) key));
    }

    @Override
    public String remove(Object key) {
        return super.remove(convertPath((String) key));
    }

    @Override
    public boolean containsKey(Object key) {
        return super.containsKey(convertPath((String) key));
    }

    /**
     * Converts the path to the form that will be stored internally.
     * 
     * @param path a filepath to be converted
     * 
     * @return the converted path that is to be used to store in the internal sets.
     */
    public String convertPath(String path) {
        if (File.separatorChar != '/') {
            if (path != null) {
                path = path.replace(File.separatorChar, '/');
            }
        }

        return path;
    }
}