/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.IOException; import java.io.InputStream; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.fs.FSInputStream; import org.apache.log4j.Logger; /** * Class FSDigestInputStream is an {@link FSInputStream} implementation that can verify a * {@link MessageDigest} and will count the number of bytes read for use in progress status. */ public class FSDigestInputStream extends FSInputStream { /** Field LOG */ private static final Logger LOG = Logger.getLogger( FSDigestInputStream.class ); /** Field count */ int count = 0; /** Field inputStream */ InputStream inputStream; /** Field digestHex */ String digestHex; /** * Constructor FSDigestInputStream creates a new FSDigestInputStream instance. * * @param inputStream of type InputStream * @param digestHex of type String * @throws IOException if unable to get md5 digest */ public FSDigestInputStream( InputStream inputStream, String digestHex ) throws IOException { this( inputStream, getMD5Digest(), digestHex ); } /** * Constructor FSDigestInputStream creates a new FSDigestInputStream instance. * * @param inputStream of type InputStream * @param messageDigest of type MessageDigest * @param digestHex of type String */ public FSDigestInputStream( InputStream inputStream, MessageDigest messageDigest, String digestHex ) { this.inputStream = digestHex == null ? inputStream : new DigestInputStream( inputStream, messageDigest ); this.digestHex = digestHex; } /** * Method getMD5Digest returns the MD5Digest of this FSDigestInputStream object. * * @return the MD5Digest (type MessageDigest) of this FSDigestInputStream object. * @throws IOException when */ private static MessageDigest getMD5Digest() throws IOException { try { return MessageDigest.getInstance( "MD5" ); } catch( NoSuchAlgorithmException exception ) { throw new IOException( "digest not found: " + exception.getMessage() ); } } @Override public int read() throws IOException { count++; return inputStream.read(); } @Override public int read( byte[] b, int off, int len ) throws IOException { int result = inputStream.read( b, off, len ); count += result; return result; } @Override public void close() throws IOException { inputStream.close(); LOG.info( "closing stream, testing digest: [" + ( digestHex == null ? "none" : digestHex ) + "]" ); if( digestHex == null ) return; String digestHex = new String( Hex.encodeHex( ( (DigestInputStream) inputStream ).getMessageDigest().digest() ) ); if( !digestHex.equals( this.digestHex ) ) { String message = "given digest: [" + this.digestHex + "], does not match input stream digest: [" + digestHex + "]"; LOG.error( message ); throw new IOException( message ); } } @Override public void seek( long pos ) throws IOException { if( getPos() == pos ) return; if( getPos() > pos ) throw new IOException( "cannot seek to " + pos + ", currently at" + getPos() ); int len = (int) ( pos - getPos() ); byte[] bytes = new byte[50 * 1024]; while( len > 0 ) len -= read( bytes, 0, Math.min( len, bytes.length ) ); } @Override public long getPos() throws IOException { return count; } @Override public boolean seekToNewSource( long targetPos ) throws IOException { return false; } }