package utils; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.net.Authenticator; import java.net.PasswordAuthentication; import java.net.URL; import java.net.URLConnection; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; //import org.apache.commons.codec.binary.Base64; public class WebUtilities { private static final Logger logger = Logger.getLogger(WebUtilities.class); private static String cachedURLString; private static String cachedPage; public static String getPageSubstring(String surl, String startTag, String endTag, String username, String password)throws IOException{ return getPageSubstring(surl, startTag, endTag, null, username, password); } public static String getPageSubstring(String surl, String startTag, String endTag, String cookie) throws IOException{ return getPageSubstring(surl, startTag, endTag, cookie); } public static String getPageSubstring(String surl, String startTag, String endTag) throws IOException{ return getPageSubstring(surl, startTag, endTag, null, null, null); } public static String getPageNestedSubstring(String surl, String outerStartTag, String outerEndTag, String innerStartTag, String innerEndTag) throws IOException{ return getPageNestedSubstring(surl, outerStartTag, outerEndTag, innerStartTag, innerEndTag, null, null, null); } public static String getPageNestedSubstrings(String surl, String outerStartTag, String outerEndTag, String innerStartTag, String innerEndTag) throws IOException{ return getPageNestedSubstrings(surl, outerStartTag, outerEndTag, innerStartTag, innerEndTag, null, null, null); } public static String getPageNestedSubstrings(String surl, String outerStartTag, String outerEndTag, String innerStartTag, String innerEndTag, String cookie, String username, String password) throws IOException{ String page; if(cachedURLString == null || !surl.equalsIgnoreCase(cachedURLString)){ page = readPageIntoString(surl,cookie,username, password); cachedPage = page; cachedURLString = surl; } else{ page = cachedPage; } String outerSubstring = StringUtils.substringBetween(page, outerStartTag, outerEndTag); String[] innerSubstrings = StringUtils.substringsBetween(outerSubstring, innerStartTag, innerEndTag); String allStrings = ""; String DELIMITER = ","; if(innerSubstrings != null){ for(String substring: innerSubstrings){ allStrings = allStrings + StringUtils.trim(substring) + DELIMITER; } // remove last delimiter allStrings = StringUtils.removeEnd(allStrings, DELIMITER); } return allStrings; } private static String removeBlanksAndNbsp(String theString) { theString = StringUtils.remove(theString, " "); theString = StringUtils.trim(theString); return theString; } public static String getPageNestedSubstring(String surl, String outerStartTag, String outerEndTag, String innerStartTag, String innerEndTag, String cookie, String username, String password) throws IOException{ String page; if(cachedURLString == null || !surl.equalsIgnoreCase(cachedURLString)){ page = readPageIntoString(surl,cookie,username, password); cachedPage = page; cachedURLString = surl; } else{ page = cachedPage; } String outerSubstring = StringUtils.substringBetween(page, outerStartTag, outerEndTag); String innerSubstring = StringUtils.substringBetween(outerSubstring, innerStartTag, innerEndTag); return innerSubstring; } public static String getPageSubstring(String surl, String startTag, String endTag, String cookie, String username, String password) throws IOException{ String page; if(cachedURLString == null || !surl.equalsIgnoreCase(cachedURLString)){ page = readPageIntoString(surl,cookie,username, password); cachedPage = page; cachedURLString = surl; } else{ page = cachedPage; } String pageSubstring = StringUtils.substringBetween(page, startTag, endTag); return pageSubstring; } public static String readPageIntoString(String surl, String cookie, String username, String password) throws IOException{ Authenticator.setDefault(new MyAuthenticator(username,password)); URL url = new URL(surl); URLConnection conn = url.openConnection(); // Set cookie if provided if(cookie != null){ conn.setRequestProperty("Cookie", cookie); } InputStream is = conn.getInputStream(); // Read page into a string //String pageAsString = IOUtils.toString(is, "ISO-8859-1"); String pageAsString = IOUtils.toString(is, "UTF-8"); return pageAsString; } public static void readURLIntoFile(URL url, File file, String cookie, String username, String password) throws IOException{ Authenticator.setDefault(new MyAuthenticator(username,password)); URLConnection conn = url.openConnection(); // Set cookie if provided if(cookie != null){ conn.setRequestProperty("Cookie", cookie); } InputStream is = conn.getInputStream(); logger.info(url.toString()); FileUtils.copyURLToFile(url, file); //String pageAsString = IOUtils.toString(is, "ISO-8859-1"); String pageAsString = IOUtils.toString(is, "UTF-8"); } static class MyAuthenticator extends Authenticator { private static String user; private static String passw; public MyAuthenticator(String username, String password) { user = username; passw = password; } public PasswordAuthentication getPasswordAuthentication () { return new PasswordAuthentication (user, passw.toCharArray()); } } }