package com.lightboxtechnologies.spectrum; import java.io.IOException; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Arrays; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.lightboxtechnologies.io.IOUtils; import org.sleuthkit.hadoop.core.SKJobFactory; import org.sleuthkit.hadoop.core.SKMapper; public class MD5Checker extends Configured implements Tool { public static class Mapper extends SKMapper<ImmutableHexWritable, FsEntry, Text, Text> { private static final Log LOG = LogFactory.getLog(MD5Checker.class.getName()); private final Text outKey = new Text(); private final Text outVal = new Text(); @Override protected void map(ImmutableHexWritable key, FsEntry value, Context context) throws IOException, InterruptedException { final byte[] actual_md5 = (byte[]) value.get("md5"); if (actual_md5 == null) { return; } final MessageDigest hasher = FsEntryUtils.getHashInstance("MD5"); final DigestInputStream in = new DigestInputStream(value.getInputStream(), hasher); IOUtils.copyLarge( in, NullOutputStream.NULL_OUTPUT_STREAM, new byte[1024 * 1024] ); final byte[] expected_md5 = hasher.digest(); if (!Arrays.equals(actual_md5, expected_md5)) { LOG.error( value.fullPath() + ": " + Hex.encodeHexString(actual_md5) + " != " + Hex.encodeHexString(expected_md5) ); } outKey.set(value.fullPath()); outVal.set( Hex.encodeHexString(actual_md5) + ' ' + Hex.encodeHexString(expected_md5) ); context.write(outKey, outVal); } } public int run(String[] args) throws ClassNotFoundException, DecoderException, IOException, InterruptedException { final String imageID = args[0]; final String friendlyName = args[1]; final Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf( imageID, friendlyName, "MD5Checker", conf ); job.setJarByClass(MD5Checker.class); job.setMapperClass(MD5Checker.Mapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, imageID); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path("md5checker")); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { System.exit( ToolRunner.run(HBaseConfiguration.create(), new MD5Checker(), args) ); } }