package dk.kb.yggdrasil.warc; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import org.jwat.warc.WarcDigest; import dk.kb.yggdrasil.exceptions.ArgumentCheck; import dk.kb.yggdrasil.exceptions.YggdrasilException; /** * Helper class for calculating checksums. */ public class Digest { /** The message digest.*/ private final MessageDigest md; /** The digest algorithm.*/ private final String digestType; /** The maximal size of the byte array for digest.*/ private static final int BYTE_ARRAY_SIZE_FOR_DIGEST = 4096; /** * Constructor. * @param digestType Type of Digest used (SHA-1 or similar) * @throws YggdrasilException If the given Algorithm is unrecognized */ public Digest(String digestType) throws YggdrasilException { ArgumentCheck.checkNotNullOrEmpty(digestType, "String digestType"); this.digestType = digestType; try { md = MessageDigest.getInstance(digestType); } catch (NoSuchAlgorithmException e) { throw new YggdrasilException("The digestType '" + digestType + "' is unrecognized", e); } } /** * Create a WarcDigest based on the given file. * @param fileToDigest The file to digest * @return a WarcDigest based on the given file. * @throws YggdrasilException If the checksum could not be calculated. */ public WarcDigest getDigestOfFile(File fileToDigest) throws YggdrasilException { ArgumentCheck.checkExistsNormalFile(fileToDigest, "File fileToDigest"); try { FileInputStream fis = null; try { fis = new FileInputStream(fileToDigest); byte[] checksumBytes = calculateChecksumWithMessageDigest(fis); String checksum = decodeBase16(checksumBytes); return createWarcDigest(checksum); } finally { if(fis != null) { fis.close(); } } } catch (IOException e) { throw new YggdrasilException("Could not calculate checksum of file '" + fileToDigest + "'", e); } } /** * Create a WarcDigest based on the given bytes. * @param bytesToDigest The bytes to digest * @return a WarcDigest based on the given file. * @throws YggdrasilException If the checksum could not be calculated. */ public WarcDigest getDigestOfBytes(byte[] bytesToDigest) throws YggdrasilException { ArgumentCheck.checkNotNullOrEmpty(bytesToDigest, "byte[] bytesToDigest"); ByteArrayInputStream bais = new ByteArrayInputStream(bytesToDigest); byte[] checksumBytes = calculateChecksumWithMessageDigest(bais); String checksum = decodeBase16(checksumBytes); return createWarcDigest(checksum); } /** * Calculates the checksum of an InputStream. * @param content The content to calculate the checksum of. * @return The 16-bit encoded checksum. * @throws YggdrasilException If something goes wrong. */ private byte[] calculateChecksumWithMessageDigest(InputStream content) throws YggdrasilException { byte[] bytes = new byte[BYTE_ARRAY_SIZE_FOR_DIGEST]; int bytesRead; try { md.reset(); while ((bytesRead = content.read(bytes)) > 0) { md.update(bytes, 0, bytesRead); } return md.digest(); } catch (Exception e) { throw new YggdrasilException("Cannot calculate the checksum.", e); } } /** * Decodes a Base16 encoded byte set into a human readable string. * @param data The data to decode. * @return The decoded data, or null if a null is given. */ private static String decodeBase16(byte[] data) { if(data == null) { return null; } StringBuffer sb = new StringBuffer(data.length * 2); for (int i = 0; i < data.length; i++){ int v = data[i] & 0xff; if (v < 16) { sb.append('0'); } sb.append(Integer.toHexString(v)); } return sb.toString(); } /** * Creates the WarcDigest for the checksum. * @param checksum The checksum for the warc-digest. * @return The WarcDigest with the algorithm for this instance and the given checksum. */ private WarcDigest createWarcDigest(String checksum) { return WarcDigest.parseWarcDigest(digestType + ":" + checksum); } }