/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.FileNotFoundException; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.log4j.Logger; /** * Class HttpFileSystem provides a basic read-only {@link FileSystem} for accessing remote HTTP and HTTPS data. * <p/> * To use this FileSystem, just use regular http:// or https:// URLs. */ public class HttpFileSystem extends StreamedFileSystem { /** Field LOG */ private static final Logger LOG = Logger.getLogger( HttpFileSystem.class ); /** Field HTTP_SCHEME */ public static final String HTTP_SCHEME = "http"; /** Field HTTPS_SCHEME */ public static final String HTTPS_SCHEME = "https"; static { HttpURLConnection.setFollowRedirects( true ); } /** Field scheme */ private String scheme; /** Field authority */ private String authority; @Override public void initialize( URI uri, Configuration configuration ) throws IOException { setConf( configuration ); scheme = uri.getScheme(); authority = uri.getAuthority(); } @Override public URI getUri() { try { return new URI( scheme, authority, null, null, null ); } catch( URISyntaxException exception ) { throw new RuntimeException( "failed parsing uri", exception ); } } @Override public FileStatus[] globStatus( Path path, PathFilter pathFilter ) throws IOException { FileStatus fileStatus = getFileStatus( path ); if( fileStatus == null ) return null; return new FileStatus[]{fileStatus}; } @Override public FSDataInputStream open( Path path, int i ) throws IOException { URL url = makeUrl( path ); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod( "GET" ); connection.connect(); debugConnection( connection ); return new FSDataInputStream( new FSDigestInputStream( connection.getInputStream(), getMD5SumFor( getConf(), path ) ) ); } @Override public boolean exists( Path path ) throws IOException { URL url = makeUrl( path ); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod( "HEAD" ); connection.connect(); debugConnection( connection ); return connection.getResponseCode() == 200; } @Override public FileStatus getFileStatus( Path path ) throws IOException { URL url = makeUrl( path ); HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod( "HEAD" ); connection.connect(); debugConnection( connection ); if( connection.getResponseCode() != 200 ) throw new FileNotFoundException( "could not find file: " + path ); long length = connection.getHeaderFieldInt( "Content-Length", 0 ); length = length < 0 ? 0 : length; // queries may return -1 long modified = connection.getHeaderFieldDate( "Last-Modified", System.currentTimeMillis() ); return new FileStatus( length, false, 1, getDefaultBlockSize(), modified, path ); } private void debugConnection( HttpURLConnection connection ) throws IOException { if( LOG.isDebugEnabled() ) { LOG.debug( "connection.getURL() = " + connection.getURL() ); LOG.debug( "connection.getRequestMethod() = " + connection.getRequestMethod() ); LOG.debug( "connection.getResponseCode() = " + connection.getResponseCode() ); LOG.debug( "connection.getResponseMessage() = " + connection.getResponseMessage() ); LOG.debug( "connection.getContentLength() = " + connection.getContentLength() ); } } private URL makeUrl( Path path ) throws IOException { if( path.toString().startsWith( scheme ) ) return URI.create( path.toString() ).toURL(); try { return new URI( scheme, authority, path.toString(), null, null ).toURL(); } catch( URISyntaxException exception ) { throw new IOException( exception.getMessage() ); } } }