package ecologylab.net; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.SocketTimeoutException; import java.net.URL; import java.net.URLConnection; import java.util.HashMap; import ecologylab.collections.CollectionTools; import ecologylab.generic.Debug; /** * Combines URLConnection with InputStream, providing convenience. * * @author andruid */ public class PURLConnection extends Debug { protected ParsedURL purl; protected InputStream inputStream; protected HttpURLConnection urlConnection; protected String mimeType; /** * If true, a timeout occurred during connect(). */ boolean timeout = false; boolean good = false; /** Fill out the instance of this resulting from a succcessful connect(). * @param purl TODO * @param urlConnection * @param inputStream */ public PURLConnection(ParsedURL purl) { this.purl = purl; } public PURLConnection(ParsedURL purl, HttpURLConnection urlConnection, InputStream inputStream) { this.purl = purl; this.inputStream = inputStream; this.urlConnection = urlConnection; this.good = true; } public void connect(ConnectionHelper connectionHelper, String userAgent, int connectionTimeout, int readTimeout) { // get an InputStream, and set the mimeType, if not bad if (purl.isFile()) { File file = purl.file(); if (file.isDirectory()) connectionHelper.handleFileDirectory(file); else { String suffix = purl.suffix(); if (suffix != null) { if (connectionHelper.parseFilesWithSuffix(suffix)) { try { fileConnect(); } catch (FileNotFoundException e) { error("Can't open because FileNotFoundException"); } } } } } else { networkConnectAndCatch(connectionHelper, userAgent, connectionTimeout, readTimeout); } } public void fileConnect() throws FileNotFoundException { inputStream = new FileInputStream(purl.file()); good = true; } public void streamConnect(InputStream inputStream) { this.inputStream = inputStream; good = true; } public void networkConnectAndCatch(ConnectionHelper connectionHelper, String userAgent) { networkConnectAndCatch(connectionHelper, userAgent, ParsedURL.CONNECT_TIMEOUT, ParsedURL.READ_TIMEOUT); } /** * @param connectionHelper * @param userAgent * @param connectionTimeout * @param readTimeout */ public void networkConnectAndCatch(ConnectionHelper connectionHelper, String userAgent, int connectionTimeout, int readTimeout) { try { networkConnect(connectionHelper, userAgent, connectionTimeout, readTimeout); } catch (SocketTimeoutException e) { timeout = true; cleanup(e); } catch (FileNotFoundException e) { cleanup(e); } catch (IOException e) { cleanup(e); } catch (Exception e) // catch all exceptions, including security { cleanup(e); } } public void networkConnect(ConnectionHelperJustRemote connectionHelper, String userAgent) throws IOException { networkConnect(connectionHelper, userAgent, ParsedURL.CONNECT_TIMEOUT, ParsedURL.READ_TIMEOUT); } /** * @param connectionHelper * @param userAgent * @param connectionTimeout * @param readTimeout * @throws IOException * @throws Exception */ public void networkConnect(ConnectionHelperJustRemote connectionHelper, String userAgent, int connectionTimeout, int readTimeout) throws IOException { URL url = purl.url(); urlConnection = (HttpURLConnection) url.openConnection(); // hack so google thinks we're a normal browser // (otherwise, it wont serve us) // connection.setRequestProperty("user-agent", GOOGLE_BOT_USER_AGENT_0); urlConnection.setRequestProperty("user-agent", userAgent); // Set the connection and read timeout. urlConnection.setConnectTimeout(connectionTimeout); urlConnection.setReadTimeout(readTimeout); /* * //TODO include more structure instead of this total hack! if * ("nytimes.com".equals(this.domain())) { String auth = new * sun.misc.BASE64Encoder().encode("fred66:fred66".getBytes()); * connection.setRequestProperty("Authorization", auth); } */ urlConnection.getContentLength(); String mimeType = urlConnection.getContentType(); // no one uses the encoding header: connection.getContentEncoding(); String unsupportedCharset = NetTools.isCharsetSupported(mimeType); if (unsupportedCharset != null) { String message = "Cant process charset " + unsupportedCharset + " in " + this; connectionHelper.displayStatus(message); error(message); } else { // notice if url changed between request and retrieved connection // if so, this is a server-side redirect URL connectionURL = urlConnection.getURL(); if (!url.equals(connectionURL)) // follow redirects! { // avoid doubly stuffed urls //TODO -- does this test belong here????? String connectionFile = connectionURL.getFile(); String file = url.getFile(); if ((file.indexOf("http://") == -1) && (connectionFile.indexOf("http://") == -1)) // if ((path.indexOf("http://") != -1) || (connectionPath.indexOf("http://") != -1)) { if (connectionHelper.processRedirect(connectionURL)) inputStream = urlConnection.getInputStream(); this.good = true; } else { println("WEIRD: skipping double stuffed url: " + connectionURL); } } else { // no redirect, eveything is kewl inputStream = urlConnection.getInputStream(); this.good = true; } } } private void cleanup(Exception e) { error("connect() " + e); close(); } public void recycle() { close(); // purl.recycle(); // purl = null; } public void reconnect() { if (purl != null && purl.isFile() && inputStream ==null) { try { inputStream = new FileInputStream(purl.file()); } catch (FileNotFoundException e) { e.printStackTrace(); } } } /** * Close the InputStream, and disconnect the URLConnection. */ public void close() { // parsing done. now free resources asap to avert leaking and memory fragmentation // (this is a known problem w java.net.HttpURLConnection) InputStream inputStream = this.inputStream; if (inputStream != null) { NetTools.close(inputStream); this.inputStream = null; } if (urlConnection != null) { urlConnection.disconnect(); this.urlConnection = null; } mimeType = null; } /** * @return Returns the inputStream. */ public InputStream inputStream() { return inputStream; } /** * @return Returns the urlConnection. */ public URLConnection urlConnection() { return urlConnection; } /** * Find the mime type returned by the web server to the URLConnection, in its header. * Thus, if there is no URLConnection (as for local file system), this always returns null. * * @return the mime type or null */ public String mimeType() { String result = this.mimeType; if ((result == null) && (urlConnection != null)) { result = urlConnection.getContentType(); if (result != null) { // create the appropriate DocumentType object // lookout for mime types with charset appened int semicolonIndex = result.indexOf(';'); if (semicolonIndex > 0) result = result.substring(0, semicolonIndex); this.mimeType = result; } } return result; } @Override public String toString() { String u = urlConnection != null ? urlConnection.toString() : "null"; String p = purl != null ? purl.toString() : "null"; String f = purl != null ? (purl.file() != null ? "path: "+ purl.file().toString() + " exists: " + purl.file().exists(): "null file") : "null purl"; StringBuilder sb = new StringBuilder(); sb.append("urlConnection: "); sb.append(u); sb.append("\t purl: "); sb.append(p); sb.append("\t file: "); sb.append(f); return sb.toString(); } public ParsedURL getPurl() { return purl; } static final String[] noAlphaMimeStrings = { "image/jpeg", "image/bmp", }; static final HashMap noAlphaMimeMap = CollectionTools .buildHashMapFromStrings(noAlphaMimeStrings); public boolean isNoAlpha() { return mimeType != null && noAlphaMimeMap.containsKey(mimeType); } public boolean getTimeout() { return timeout; } public boolean isGood() { return good; } public void setMimeType(String mimeType) { this.mimeType = mimeType; } }