// Copyright (c) 1999 Dustin Sallings <dustin@spy.net> package net.spy.net; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import net.spy.util.CloseUtil; import net.spy.util.SpyUtil; /** * Oversimplified HTTP document fetcher. */ // Fetch the contents of a URL public class HTTPFetch extends Object { private URL url; private String contents=null; private String stripped=null; private Map<String, List<String>> headers=null; private long ifModifiedSince=0; private int status=0; private long lastModified=0; private Map<String, List<String>> responseHeaders=null; /** * Get an HTTPFetch instance for the given URL. * * @param u the URL to fetch */ public HTTPFetch(URL u) { this(u, null); } /** * Get an HTTPFetch instance for the given URL and headers. * * @param u URL to fetch * @param head Map containing the headers to fetch */ public HTTPFetch(URL u, Map<String, List<String>> head) { super(); url=u; headers=head; } /** * Get the response headers from the request (will force a content fetch). */ public Map<String, List<String>> getResponseHeaders() throws IOException { getData(); return(responseHeaders); } /** * Set the ifModifiedSince value for the request. */ public void setIfModifiedSince(long to) { ifModifiedSince=to; } /** * Get the HTTP status from this request. */ public int getStatus() throws IOException { getData(); return(status); } /** * Get the last modified date of this response. */ public long getLastModified() throws IOException { getData(); return(lastModified); } /** * Get a vector containing the individual lines of the document * returned from the URL. * * @exception IOException thrown when something fails. */ public List<String> getLines() throws IOException { ArrayList<String> a = new ArrayList<String>(); StringTokenizer st=new StringTokenizer(getData(), "\r\n"); while(st.hasMoreTokens()) { a.add(st.nextToken()); } return(a); } /** * Return the contents of the URL as a whole string. * * @return the contents from the URL as a String * @throws IOException if there is a problem accessing the URL */ public String getData() throws IOException { if(contents==null) { StringBuilder sb=new StringBuilder(256); BufferedReader br = null; try { br=getReader(); String line; while( (line=br.readLine()) != null) { sb.append(line); sb.append("\n"); } } finally { CloseUtil.close(br); } contents=sb.toString(); } return(contents); } /** * Return the contents of the URL with the HTML tags stripped out. * * @exception Exception thrown when something fails. */ public String getStrippedData() throws Exception { getData(); if(stripped==null) { stripped=SpyUtil.deHTML(contents); } return(stripped); } // Get a reader for the above routines. private BufferedReader getReader() throws IOException { HttpURLConnection uc = (HttpURLConnection)url.openConnection(); if(headers!=null) { for(Map.Entry<String, List<String>> me: headers.entrySet()) { for(String val : me.getValue()) { uc.setRequestProperty(me.getKey(), val); } } } // Set the ifModifiedSince if we have one if(ifModifiedSince > 0) { uc.setIfModifiedSince(ifModifiedSince); } InputStream i = uc.getInputStream(); // Collect some data about this request status=uc.getResponseCode(); responseHeaders=new HashMap<String, List<String>>(uc.getHeaderFields()); lastModified=uc.getLastModified(); BufferedReader br = new BufferedReader(new InputStreamReader(i)); return(br); } public static void main(String args[]) throws Exception { HTTPFetch hf=new HTTPFetch(new URL(args[0])); System.out.println(hf.getStrippedData()); } }