/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import cascading.util.S3Util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import org.apache.log4j.Logger; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Bucket; import org.jets3t.service.model.S3Object; /** * Class S3HttpFileSystem provides a basic {@link FileSystem} for reading and writing remote S3 data. * <p/> * To use this FileSystem, reference your S3 resources with the following URI pattern:<br/> * s3tp://AWS_ACCESS_KEY_ID:AWS_SECRET_ACCESS_KEY@bucketname/key * <p/> * Optionally these configuration/system properties can be set, instead of stuffing values into the URL authority: * "fs.s3tp.awsAccessKeyId" and "fs.s3tp.awsSecretAccessKey". * * @deprecated */ @Deprecated public class S3HttpFileSystem extends StreamedFileSystem { /** Field LOG */ private static final Logger LOG = Logger.getLogger( S3HttpFileSystem.class ); public static final String S3TP_SCHEME = "s3tp"; private URI uri; private RestS3Service s3Service; private S3Bucket s3Bucket; @Override public void initialize( URI uri, Configuration conf ) throws IOException { LOG.warn( "the S3HttpFileSystem (s3tp://) is deprecated, please use the Hadoop NativeS3NativeFileSystem (s3n://)" ); setConf( conf ); String key = conf.get( "fs.s3tp.awsAccessKeyId", System.getProperty( "fs.s3tp.awsAccessKeyId" ) ); String secret = conf.get( "fs.s3tp.awsSecretAccessKey", System.getProperty( "fs.s3tp.awsSecretAccessKey" ) ); this.s3Service = S3Util.getS3Service( uri, key, secret ); this.s3Bucket = S3Util.getS3Bucket( uri ); this.uri = URI.create( uri.getScheme() + "://" + uri.getAuthority() ); } @Override public URI getUri() { return uri; } @Override public FSDataOutputStream create( final Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress ) throws IOException { if( !overwrite && exists( path ) ) throw new IOException( "file already exists: " + path ); if( LOG.isDebugEnabled() ) LOG.debug( "creating file: " + path ); final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final DigestOutputStream digestStream = new DigestOutputStream( stream, getMD5Digest() ); return new FSDataOutputStream( digestStream, null ) { @Override public void close() throws IOException { super.close(); S3Object object = S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.CREATE_OBJECT ); object.setContentType( "text/plain" ); // todo use 'binary/octet-stream' object.setMd5Hash( digestStream.getMessageDigest().digest() ); // todo buffer to disk instead byte[] bytes = stream.toByteArray(); object.setDataInputStream( new ByteArrayInputStream( bytes ) ); object.setContentLength( bytes.length ); if( LOG.isDebugEnabled() ) LOG.debug( "putting file: " + path ); S3Util.putObject( s3Service, s3Bucket, object ); } }; } @Override public FSDataInputStream open( Path path, int i ) throws IOException { if( LOG.isDebugEnabled() ) LOG.debug( "opening file: " + path ); S3Object object = S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.OBJECT ); FSDigestInputStream inputStream = new FSDigestInputStream( S3Util.getObjectInputStream( object ), getMD5SumFor( getConf(), path ) ); // ctor requires Seekable or PositionedReadable stream return new FSDataInputStream( inputStream ); } @Override public boolean mkdirs( Path path, FsPermission fsPermission ) throws IOException { if( LOG.isDebugEnabled() ) LOG.debug( "making dirs for: " + path ); S3Object directory = S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.DETAILS ); if( directory != null && S3Util.isDirectory( directory ) ) return true; directory = S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.CREATE_DIR ); S3Util.putObject( s3Service, s3Bucket, directory ); return true; } @Deprecated @Override public boolean delete( Path path ) throws IOException { return delete( path, true ); } @Override public boolean delete( Path path, boolean recursive ) throws IOException { if( LOG.isDebugEnabled() ) LOG.debug( "deleting file: " + path ); return S3Util.deleteObject( s3Service, s3Bucket, path ); } @Override public boolean exists( Path path ) throws IOException { if( LOG.isDebugEnabled() ) LOG.debug( "testing file: " + path ); return S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.DETAILS ) != null; } @Override public FileStatus[] listStatus( Path path ) throws IOException { if( LOG.isDebugEnabled() ) LOG.debug( "listing path: " + path ); // todo: content-type not returned on list S3Object[] objects = S3Util.listObjects( s3Service, s3Bucket, path ); // if an object is an exact match, and is a file, just return the file status String key = S3Util.getKeyFrom( path ); for( S3Object object : objects ) { if( object.getKey().equals( key ) && !S3Util.isDirectory( object ) ) return new FileStatus[]{makeStatus( object )}; } FileStatus[] status = new FileStatus[objects.length]; for( int i = 0; i < objects.length; i++ ) status[ i ] = makeStatus( objects[ i ] ); return status; } @Override public FileStatus getFileStatus( Path path ) throws IOException { S3Object object = S3Util.getObject( s3Service, s3Bucket, path, S3Util.Request.DETAILS ); if( LOG.isDebugEnabled() ) LOG.debug( "returning status for: " + path ); if( object == null ) throw new FileNotFoundException( "file does not exist: " + path ); return makeStatus( object ); } private StreamedFileStatus makeStatus( S3Object object ) { return new StreamedFileStatus( object.getContentLength(), S3Util.isDirectory( object ), 1, getDefaultBlockSize(), object.getLastModifiedDate().getTime(), new Path( uri.toString() + "/", object.getKey() ), object.getMd5HashAsHex() ); } private MessageDigest getMD5Digest() throws IOException { try { return MessageDigest.getInstance( "MD5" ); } catch( NoSuchAlgorithmException exception ) { throw new IOException( "digest not found: " + exception.getMessage() ); } } }