package org.wikipedia.miner.web.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.Authenticator; import java.net.HttpURLConnection; import java.net.PasswordAuthentication; import java.net.URL; import java.util.Properties; public class WebContentRetriever { public WebContentRetriever(HubConfiguration config) { String proxyHost = config.getProxyHost() ; String proxyPort = config.getProxyPort() ; Properties systemSettings = System.getProperties(); if (proxyHost != null) systemSettings.put("http.proxyHost", proxyHost) ; if (proxyPort != null) systemSettings.put("http.proxyPort", proxyPort) ; final String proxyUser = config.getProxyUser() ; final String proxyPassword = config.getProxyPassword() ; if (proxyUser != null && proxyPassword != null) { Authenticator.setDefault(new Authenticator() { protected PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(proxyUser, proxyPassword.toCharArray()); } }); } } public String getWebContent(URL url) throws UnsupportedEncodingException, IOException { HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setInstanceFollowRedirects(true) ; BufferedReader input = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")) ; String line ; StringBuffer content = new StringBuffer() ; while ((line=input.readLine())!=null) { content.append(line + "\n") ; } return content.toString() ; } }