/* * GNU LESSER GENERAL PUBLIC LICENSE Copyright (C) 2006 The Lobo Project * * This library is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 2.1 of the License, or (at your option) * any later version. * * This library is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Contact info: lobochief@users.sourceforge.net */ /* * Created on Jun 12, 2005 */ package com.nvarghese.beowulf.common.cobra.util; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.net.URLStreamHandler; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.StringTokenizer; import java.util.TimeZone; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; public class Urls { private static final Logger logger = Logger.getLogger(Urls.class.getName()); public static final DateFormat PATTERN_RFC1123 = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); static { DateFormat df = PATTERN_RFC1123; df.setTimeZone(TimeZone.getTimeZone("GMT")); } private Urls() { super(); } /** Whether the URL refers to a resource in the local file system. */ public static boolean isLocal(final java.net.URL url) { if (isLocalFile(url)) { return true; } String protocol = url.getProtocol(); if ("jar".equalsIgnoreCase(protocol)) { String path = url.getPath(); int emIdx = path.lastIndexOf('!'); String subUrlString = emIdx == -1 ? path : path.substring(0, emIdx); try { URL subUrl = new URL(subUrlString); return isLocal(subUrl); } catch (java.net.MalformedURLException mfu) { return false; } } else { return false; } } /** Whether the URL is a file in the local file system. */ public static boolean isLocalFile(final java.net.URL url) { String scheme = url.getProtocol(); return "file".equalsIgnoreCase(scheme) && !hasHost(url); } public static boolean hasHost(final java.net.URL url) { String host = url.getHost(); return host != null && !"".equals(host); } /** * Creates an absolute URL in a manner equivalent to major browsers. */ public static URL createURL(final URL baseUrl, final String relativeUrl) throws java.net.MalformedURLException { return normalizeURL(new URL(baseUrl, relativeUrl)); } /** * Normalize URL * * @param url * @return * @throws MalformedURLException */ private static URL normalizeURL(final URL url) throws MalformedURLException { /* * * Collection of PATH_REPLACEMENT patterns. */ Map<Pattern, String> PATH_REPLACEMENTS = new HashMap<Pattern, String>(5); /* * Replace occurance of /../../a/b/c.php to /a/b/c.php Pattern for root * or null directory traversal */ Pattern p = Pattern.compile("^[/[.]+/]+"); PATH_REPLACEMENTS.put(p, "/"); final URLStreamHandler streamHandler = new URLStreamHandler() { @Override protected URLConnection openConnection(URL u) throws IOException { return null; } }; URL newURL = new URL(null, url.toString(), streamHandler); URI newUri = null; try { newUri = newURL.toURI(); boolean flag = true; String relative = newURL.getPath(); String prevPath = relative; while (flag) { /* * Normalizing and replacing path */ if (relative != null && !"".equals(relative)) { for (Pattern pattern : PATH_REPLACEMENTS.keySet()) { relative = pattern.matcher(relative).replaceAll(PATH_REPLACEMENTS.get(pattern)); } } newUri = new URI(relative).normalize(); relative = newUri.toString(); if (prevPath.equalsIgnoreCase(relative)) { /* * If there is no more to normalize, exit the loop */ flag = false; } else { prevPath = relative; } } if (url.toURI().isAbsolute()) { newURL = new URL(null, url.toString().replace(url.getPath(), relative), streamHandler); } } catch (URISyntaxException e) { logger.log(Level.WARNING, "normalizeURL(): URI Syntax Error: [" + url + "].", e); } return newURL; } /** * Returns the time when the document should be considered expired. The time * will be zero if the document always needs to be revalidated. It will be * <code>null</code> if no expiration time is specified. */ public static Long getExpiration(final URLConnection connection, final long baseTime) { String cacheControl = connection.getHeaderField("Cache-Control"); if (cacheControl != null) { StringTokenizer tok = new StringTokenizer(cacheControl, ","); while (tok.hasMoreTokens()) { String token = tok.nextToken().trim().toLowerCase(); if ("must-revalidate".equals(token)) { return new Long(0); } else if (token.startsWith("max-age")) { int eqIdx = token.indexOf('='); if (eqIdx != -1) { String value = token.substring(eqIdx + 1).trim(); int seconds; try { seconds = Integer.parseInt(value); return new Long(baseTime + seconds * 1000); } catch (NumberFormatException nfe) { logger.warning("getExpiration(): Bad Cache-Control max-age value: " + value); // ignore } } } } } String expires = connection.getHeaderField("Expires"); if (expires != null) { try { synchronized (PATTERN_RFC1123) { Date expDate = PATTERN_RFC1123.parse(expires); return new Long(expDate.getTime()); } } catch (java.text.ParseException pe) { int seconds; try { seconds = Integer.parseInt(expires); return new Long(baseTime + seconds * 1000); } catch (NumberFormatException nfe) { logger.warning("getExpiration(): Bad Expires header value: " + expires); } } } return null; } public static List getHeaders(final URLConnection connection) { // Random access index recommended. List headers = new ArrayList(); for (int n = 0;; n++) { String value = connection.getHeaderField(n); if (value == null) { break; } // Key may be null for n == 0. String key = connection.getHeaderFieldKey(n); if (key != null) { headers.add(new NameValuePair(key, value)); } } return headers; } public static URL guessURL(URL baseURL, String spec) throws MalformedURLException { URL finalURL; try { if (baseURL != null) { int colonIdx = spec.indexOf(':'); String newProtocol = colonIdx == -1 ? null : spec.substring(0, colonIdx); if (newProtocol != null && !newProtocol.equalsIgnoreCase(baseURL.getProtocol())) { baseURL = null; } } finalURL = createURL(baseURL, spec); } catch (MalformedURLException mfu) { spec = spec.trim(); int idx = spec.indexOf(':'); if (idx == -1) { int slashIdx = spec.indexOf('/'); if (slashIdx == 0) { // A file, absolute finalURL = new URL("file:" + spec); } else { if (slashIdx == -1) { // No slash, no colon, must be host. finalURL = new URL(baseURL, "http://" + spec); } else { String possibleHost = spec.substring(0, slashIdx).toLowerCase(); if (Domains.isLikelyHostName(possibleHost)) { finalURL = new URL(baseURL, "http://" + spec); } else { finalURL = new URL(baseURL, "file:" + spec); } } } } else { if (idx == 1) { // Likely a drive finalURL = new URL(baseURL, "file:" + spec); } else { throw mfu; } } } if (!"".equals(finalURL.getHost()) && finalURL.toExternalForm().indexOf(' ') != -1) { throw new MalformedURLException("There are blanks in the URL: " + finalURL.toExternalForm()); } return finalURL; } public static URL guessURL(final String spec) throws MalformedURLException { return guessURL(null, spec); } public static String getCharset(final URLConnection connection) { String contentType = connection.getContentType(); if (contentType == null) { return getDefaultCharset(connection); } StringTokenizer tok = new StringTokenizer(contentType, ";"); if (tok.hasMoreTokens()) { tok.nextToken(); while (tok.hasMoreTokens()) { String assignment = tok.nextToken().trim(); int eqIdx = assignment.indexOf('='); if (eqIdx != -1) { String varName = assignment.substring(0, eqIdx).trim(); if ("charset".equalsIgnoreCase(varName)) { String varValue = assignment.substring(eqIdx + 1); return Strings.unquote(varValue.trim()); } } } } return getDefaultCharset(connection); } private static String getDefaultCharset(final URLConnection connection) { URL url = connection.getURL(); if (Urls.isLocalFile(url)) { String charset = System.getProperty("file.encoding"); return charset == null ? "ISO-8859-1" : charset; } else { return "ISO-8859-1"; } } public static String getNoRefForm(URL url) { String host = url.getHost(); int port = url.getPort(); String portText = port == -1 ? "" : ":" + port; String userInfo = url.getUserInfo(); String userInfoText = userInfo == null || userInfo.length() == 0 ? "" : userInfo + "@"; String hostPort = host == null || host.length() == 0 ? "" : "//" + userInfoText + host + portText; return url.getProtocol() + ":" + hostPort + url.getFile(); } /** * Comparison that does not consider Ref. * * @param url1 * @param url2 */ public static boolean sameNoRefURL(final URL url1, final URL url2) { return Objects.equals(url1.getHost(), url2.getHost()) && Objects.equals(url1.getProtocol(), url2.getProtocol()) && url1.getPort() == url2.getPort() && Objects.equals(url1.getFile(), url2.getFile()) && Objects.equals(url1.getUserInfo(), url2.getUserInfo()); } }