/* * Copyright (c) 2002-2009, The DSpace Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the DSpace Foundation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.checker; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.sql.SQLException; import java.util.Date; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; import org.dspace.core.ConfigurationManager; import org.dspace.core.Utils; /** * <p> * Main class for the checksum checker tool, which calculates checksums for each * bitstream whose ID is in the most_recent_checksum table, and compares it * against the last calculated checksum for that bitstream. * </p> * * @author Jim Downing * @author Grace Carpenter * @author Nathan Sarr * * * @todo the accessor methods are currently unused - are they useful? * @todo check for any existing resource problems */ public final class CheckerCommand { /** Usual Log4J logger. */ private static final Logger LOG = Logger.getLogger(CheckerCommand.class); /** Default digest algorithm (MD5). */ private static final String DEFAULT_DIGEST_ALGORITHM = "MD5"; /** 4 Meg byte array for reading file. */ private int BYTE_ARRAY_SIZE = 4 * 1024; /** BitstreamInfoDAO dependency. */ private BitstreamInfoDAO bitstreamInfoDAO = null; /** BitstreamDAO dependency. */ private BitstreamDAO bitstreamDAO = null; /** * Checksum history Data access object */ private ChecksumHistoryDAO checksumHistoryDAO = null; /** start time for current process. */ private Date processStartDate = null; /** * Dispatcher to be used for processing run. */ private BitstreamDispatcher dispatcher = null; /** * Container/logger with details about each bitstream and checksum results. */ private ChecksumResultsCollector collector = null; /** Report all processing */ private boolean reportVerbose = false; /** * Default constructor uses DSpace plugin manager to construct dependencies. */ public CheckerCommand() { bitstreamInfoDAO = new BitstreamInfoDAO(); bitstreamDAO = new BitstreamDAO(); checksumHistoryDAO = new ChecksumHistoryDAO(); } /** * <p> * Uses the options set up on this checker to determine a mode of execution, * and then accepts bitstream ids from the dispatcher and checks their * bitstreams against the db records. * </p> * * <p> * N.B. a valid BitstreamDispatcher must be provided using * setBitstreamDispatcher before calling this method * </p> */ public void process() { LOG.debug("Begin Checker Processing"); if (dispatcher == null) { throw new IllegalStateException("No BitstreamDispatcher provided"); } if (collector == null) { collector = new ResultsLogger(processStartDate); } // update missing bitstreams that were entered into the // bitstream table - this always done. bitstreamInfoDAO.updateMissingBitstreams(); int id = dispatcher.next(); while (id != BitstreamDispatcher.SENTINEL) { LOG.debug("Processing bitstream id = " + id); BitstreamInfo info = checkBitstream(id); if (reportVerbose || (info.getChecksumCheckResult() != ChecksumCheckResults.CHECKSUM_MATCH)) { collector.collect(info); } id = dispatcher.next(); } } /** * Check a specified bitstream. * * @param id * the bitstream id * * @return the information about the bitstream and its checksum data */ private BitstreamInfo checkBitstream(final int id) { // get bitstream info from bitstream table BitstreamInfo info = bitstreamInfoDAO.findByBitstreamId(id); // requested id was not found in bitstream // or most_recent_checksum table if (info == null) { // Note: this case should only occur if id is requested at // command line, since ref integrity checks should // prevent id from appearing in most_recent_checksum // but not bitstream table, or vice versa info = new BitstreamInfo(id); processNullInfoBitstream(info); } else if (!info.getToBeProcessed()) { // most_recent_checksum.to_be_processed is marked // 'false' for this bitstream id. // Do not do any db updates info .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_PROCESSED); } else if (info.getDeleted()) { // bitstream id is marked 'deleted' in bitstream table. processDeletedBitstream(info); } else { processBitstream(info); } return info; } /** * Digest the stream and get the checksum value. * * @param stream * InputStream to digest. * @param algorithm * the algorithm to use when digesting. * @todo Document the algorithm parameter * @return digest * * @throws java.security.NoSuchAlgorithmException * if the requested algorithm is not provided by the system * security provider. * @throws java.io.IOException * If an exception arises whilst reading the stream */ private String digestStream(InputStream stream, String algorithm) throws java.security.NoSuchAlgorithmException, java.io.IOException { // create the digest stream DigestInputStream dStream = new DigestInputStream(stream, MessageDigest .getInstance(algorithm)); byte[] bytes = new byte[BYTE_ARRAY_SIZE]; // make sure all the data is read by the digester while (dStream.read(bytes, 0, BYTE_ARRAY_SIZE) != -1) { // no-op } return Utils.toHex(dStream.getMessageDigest().digest()); } /** * Compares two checksums. * * @param checksumA * the first checksum * @param checksumB * the second checksum * * @return a result code (constants defined in Util) */ private String compareChecksums(String checksumA, String checksumB) { String result = ChecksumCheckResults.CHECKSUM_NO_MATCH; if ((checksumA == null) || (checksumB == null)) { result = ChecksumCheckResults.CHECKSUM_PREV_NOT_FOUND; } else if (checksumA.equals(checksumB)) { result = ChecksumCheckResults.CHECKSUM_MATCH; } return result; } /** * Process bitstream that was marked 'deleted' in bitstream table. A deleted * bitstream should only be checked once afterwards it should be marked * 'to_be_processed=false'. Note that to_be_processed must be manually * updated in db to allow for future processing. * * @param info * a deleted bitstream. */ private void processDeletedBitstream(BitstreamInfo info) { info.setProcessStartDate(new Date()); info .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_MARKED_DELETED); info.setProcessStartDate(new Date()); info.setProcessEndDate(new Date()); info.setToBeProcessed(false); bitstreamInfoDAO.update(info); checksumHistoryDAO.insertHistory(info); } /** * Process bitstream whose ID was not found in most_recent_checksum or * bitstream table. No updates can be done. The missing bitstream is output * to the log file. * * @param info * A not found BitStreamInfo * @todo is this method required? */ private void processNullInfoBitstream(BitstreamInfo info) { info.setInfoFound(false); info.setProcessStartDate(new Date()); info.setProcessEndDate(new Date()); info .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND); } /** * <p> * Process general case bistream. * </p> * * <p> * Note: bitstream will have timestamp indicating it was "checked", even if * actual checksumming never took place. * </p> * * @todo Why does bitstream have a timestamp indicating it's checked if * checksumming doesn't occur? * * @param info * BitstreamInfo to handle */ private void processBitstream(BitstreamInfo info) { info.setProcessStartDate(new Date()); if (info.getChecksumAlgorithm() == null) { info.setChecksumAlgorithm(DEFAULT_DIGEST_ALGORITHM); } try { InputStream bitstream = bitstreamDAO.getBitstream(info .getBitstreamId()); info.setBitstreamFound(true); String checksum = digestStream(bitstream, info .getChecksumAlgorithm()); info.setCalculatedChecksum(checksum); // compare new checksum to previous checksum info.setChecksumCheckResult(compareChecksums(info .getStoredChecksum(), info.getCalculatedChecksum())); } catch (IOException e) { // bitstream located, but file missing from asset store info .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_NOT_FOUND); info.setToBeProcessed(false); LOG.error("Error retrieving bitstream ID " + info.getBitstreamId() + " from " + "asset store.", e); } catch (SQLException e) { // ??this code only executes if an sql // exception occurs in *DSpace* code, probably // indicating a general db problem? info .setChecksumCheckResult(ChecksumCheckResults.BITSTREAM_INFO_NOT_FOUND); LOG.error("Error retrieving metadata for bitstream ID " + info.getBitstreamId(), e); } catch (NoSuchAlgorithmException e) { info .setChecksumCheckResult(ChecksumCheckResults.CHECKSUM_ALGORITHM_INVALID); info.setToBeProcessed(false); LOG.error("Invalid digest algorithm type for bitstream ID" + info.getBitstreamId(), e); } finally { info.setProcessEndDate(new Date()); // record new checksum and comparison result in db bitstreamInfoDAO.update(info); checksumHistoryDAO.insertHistory(info); } } /** * Get dispatcher being used by this run of the checker. * * @return the dispatcher being used by this run. */ public BitstreamDispatcher getDispatcher() { return dispatcher; } /** * Set the dispatcher to be used by this run of the checker. * * @param dispatcher * Dispatcher to use. */ public void setDispatcher(BitstreamDispatcher dispatcher) { this.dispatcher = dispatcher; } /** * Get the collector that holds/logs the results for this process run. * * @return The ChecksumResultsCollecter being used. */ public ChecksumResultsCollector getCollector() { return collector; } /** * Set the collector that holds/logs the results for this process run. * * @param collector * the collector to be used for this run */ public void setCollector(ChecksumResultsCollector collector) { this.collector = collector; } /** * Get time at which checker process began. * * @return start time */ public Date getProcessStartDate() { return processStartDate; } /** * Set time at which checker process began. * * @param startDate * start time */ public void setProcessStartDate(Date startDate) { processStartDate = startDate; } /** * Determine if any errors are reported * * @return true if only errors reported */ public boolean isReportVerbose() { return reportVerbose; } /** * Set report errors only * * @param reportErrorsOnly * true to report only errors in the logs. */ public void setReportVerbose(boolean reportVerbose) { this.reportVerbose = reportVerbose; } }