/* * aitools utilities * Copyright (C) 2006 Noel Bush * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package org.aitools.util.resource; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.aitools.util.runtime.DeveloperError; import org.aitools.util.runtime.UserError; import org.apache.log4j.Logger; /** * <code>URLTools</code> contains helper methods for dealing with URLs. * * @author <a href="mailto:noel@aitools.org">Noel Bush</a> */ public class URLTools { /** A slash. */ private static final String SLASH = "/"; /** The string ":/". */ private static final String COLON_SLASH = ":/"; /** A dot (period). */ private static final String DOT = "."; /** The empty string. */ private static final String EMPTY_STRING = ""; private static final Logger logger = Logger.getLogger("programd"); /** * Indicates whether the given URLs are effectively equal, in terms of "effectively" resolving to the same thing. * Probably there is a more standard way to do this. * * @param path1 * @param path2 * @return whether the given URLs are effectively equal */ public static boolean areEffectivelyEqual(URL path1, URL path2) { if (path1 == null || path2 == null) { return false; } if (!(path1.getProtocol() == null && path2.getProtocol() == null || !path1.getProtocol() .equals(path2.getProtocol()))) { return false; } if (!(path1.getAuthority() == null && path2.getAuthority() == null || path1.getAuthority().equals( path2.getAuthority()))) { return false; } if (path1.getPort() != path2.getPort()) { return false; } if (!(path1.getHost() == null && path2.getHost() == null || !path1.getHost().equals(path2.getHost()))) { return false; } if (!(path1.getQuery() == null && path2.getQuery() == null || !path1.getQuery().equals(path2.getQuery()))) { return false; } if (!(path1.getRef() == null && path2.getRef() == null || !path1.getRef().equals(path2.getRef()))) { return false; } if (!(path1.getUserInfo() == null && path2.getUserInfo() == null || !path1.getUserInfo() .equals(path2.getUserInfo()))) { return false; } return pathsAreEquivalent(path1.getPath(), path2.getPath()); } /** * Same as {@link #contextualize(URL, URL)}, except both <code>context</code> and <code>subject</code> are Strings. * * @param context * @param subject * @return the result of "contextualizing" the given <code>subject</code> in the <code>context</code> */ public static URL contextualize(String context, String subject) { try { return contextualize(createValidURL(context, false), subject); } catch (FileNotFoundException e) { throw new DeveloperError("Given subject cannot be contextualized in given context.", e); } } /** * Same as {@link #contextualize(URL, URL)}, except the <code>subject</code> is a String which is supposed to * <i>not</i> be absolute (a quick check is made of this, and if the <code>subject</code> does look absolute, it is * made into a URL and sent to {@link #contextualize(URL, URL)}). * * @param context * @param subject * @return the result of "contextualizing" the given <code>subject</code> in the <code>context</code> */ public static URL contextualize(URL context, String subject) { // Avoid the most obvious problem... String _subject = escape(subject.replace(File.separatorChar, '/')); URL _context = null; try { _context = new URL(escape(context.toString().replace(File.separatorChar, '/'))); } catch (MalformedURLException e) { // Do nothing, but we may fail. logger.warn(String.format("Could not escape context URL \"%s\".", _context)); } if (_context == null) { throw new DeveloperError(String.format("Escaped context became null: \"%s\".", context)); } if (_context.toString().equals(_subject)) { return _context; } // See if the subject seems to specify a URL, and if so, send it to the other method. if (_subject.matches("^[a-z]+:/.*")) { try { return contextualize(_context, new URL(_subject)); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Subject URL is malformed: \"%s\".", _subject), e); } } if (probablyIsNotFile(_context)) { URI resolved = null; try { String contextString = _context.toString(); int colon = contextString.indexOf(COLON_SLASH); if (colon > -1) { try { resolved = new URI(_context.getProtocol() + ':' + new URI(contextString.substring(colon + 1) + SLASH).resolve(_subject).toString()); } catch (IllegalArgumentException e) { throw new UserError(String.format("Could not resolve \"%s\" against \"%s\".", _subject, _context), e); } } else { resolved = _context.toURI().resolve(_subject); } } catch (URISyntaxException e) { throw new DeveloperError(String.format("Context URL is malformed. (\"%s\")", _context), e); } if (resolved.isAbsolute()) { try { return resolved.toURL(); } catch (MalformedURLException e) { throw new DeveloperError(String.format("URI cannot be converted to URL (\"%s\")", resolved.toString()), e); } } // otherwise... throw new DeveloperError(String.format("URI is not absolute (\"%s\")", resolved.toString()), new IllegalArgumentException()); } // If the context *does* specify a file, then we need to remove the file (get the parent) first. URL parent = getParent(_context); if (!parent.getFile().equals(_context.getFile())) { return contextualize(parent, _subject); } // otherwise... try { return new URL(_context.getProtocol(), _context.getHost(), _context.getPort(), _subject); } catch (MalformedURLException e) { throw new DeveloperError("Given subject cannot be contextualized in given context.", e); } } /** * <p> * Tries to put the <code>subject</code> in the "context" of the <code>context</code>. If the * <code>context</code> URL does not appear to specify a file, this will essentially be the equivalent of * {@link java.net.URI#resolve(URI) URI.resolve}; if a file <i>is</i> specified by <code>context</code>, and if * <code>subject</code> is relative, then this will replace the file component of <code>context</code> with * <code>subject</code>. * </p> * <p> * If <code>subject</code> is not relative, this will throw a {@link java.net.MalformedURLException * MalformedURLException} is thrown. * </p> * * @param context * @param subject * @return the result of "contextualizing" the given <code>subject</code> in the <code>context</code> */ public static URL contextualize(URL context, URL subject) { if (context.equals(subject)) { return subject; } if (probablyIsNotFile(context)) { // Transform the subject into a URI for manipulation. URI subjectURI = null; try { subjectURI = subject.toURI(); } catch (URISyntaxException e) { throw new DeveloperError(String.format("Subject URL is malformed: \"%s\".", subject), e); } // If the subject has "file" scheme, try to make sure it is absolute (in a file path sense). if (subjectURI.getScheme().equals(Filesystem.FILE)) { String originalPath = subjectURI.getPath(); if (originalPath != null) { String path; try { path = Filesystem.getBestFile(originalPath).toURI().toURL().getPath(); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Error getting URL from file \"%s\".", originalPath), e); } if (!pathsAreEquivalent(path, originalPath)) { try { subjectURI = new URI(Filesystem.FILE, subjectURI.getAuthority(), path, subjectURI.getQuery(), subjectURI.getFragment()); } catch (URISyntaxException e) { throw new DeveloperError(String.format("Error resolving file URI \"%s\".", subjectURI), e); } } } } // If the subject is absolute (in a URI sense), if (subjectURI.isAbsolute()) { // then we just return it. try { return subjectURI.toURL(); } catch (MalformedURLException e) { throw new DeveloperError("Subject URL is malformed.", e); } } // otherwise, we try resolving it against the context. try { return context.toURI().resolve(subjectURI).toURL(); } catch (URISyntaxException e) { throw new DeveloperError("Context URL is malformed.", e); } catch (MalformedURLException e) { throw new DeveloperError("Given subject cannot be contextualized in given context.", e); } } // If the context *does* specify a file, then we need to remove the file (get the parent) first. URL parent = getParent(context); if (!parent.getFile().equals(context.getFile())) { return contextualize(parent, subject); } // otherwise... try { return new URL(context.getProtocol(), context.getHost(), context.getPort(), subject.getPath()); } catch (MalformedURLException e) { throw new DeveloperError("Given subject cannot be contextualized in given context.", e); } } /** * Attempts to create the given <code>path</code> into a valid URL, using a few heuristics. Tries to validate the * given path (if it is a file). * * @param path * @return a valid URL, if possible * @throws FileNotFoundException */ public static URL createValidURL(String path) throws FileNotFoundException { return createValidURL(path, null, true); } /** * Attempts to create the given <code>path</code> into a valid URL, using a few heuristics. * * @param path * @param tryToValidate whether the method should try to validate the existence of the path * @return a valid URL, if possible * @throws FileNotFoundException */ public static URL createValidURL(String path, boolean tryToValidate) throws FileNotFoundException { return createValidURL(path, null, tryToValidate); } /** * Attempts to create the given <code>path</code> into a valid URL, using a few heuristics. Tries to validate the * given path (if it is a file). * * @param path * @param context the context in which to resolve relative URLs (may be null) * @return a valid URL, if possible * @throws FileNotFoundException */ public static URL createValidURL(String path, URL context) throws FileNotFoundException { return createValidURL(path, context, true); } /** * Attempts to create the given <code>path</code> into a valid URL, using a few heuristics. * * @param path * @param context the context in which to resolve relative URLs (may be null) * @param tryToValidate whether the method should try to validate the existence of the path * @return a valid URL, if possible * @throws FileNotFoundException */ public static URL createValidURL(String path, URL context, boolean tryToValidate) throws FileNotFoundException { URL url = null; if (path == null) { throw new NullPointerException("path may not be null for createValidURL()."); } // See if this already appears to be a URL (over-simple heuristic). if (path.indexOf(COLON_SLASH) > 0) { try { url = new URL(path); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Cannot convert to URL: \"%s\"", path), e); } } // If it is not a URL by itself, try contextualizing it. else if (context != null) { // Otherwise... url = URLTools.contextualize(context, path); } // But if there's no context, try treating it as a file. else { File file = new File(path); try { url = file.toURI().toURL(); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Malformed URL: \"%s\"", path), e); } } // Now try to validate, if requested. if (tryToValidate) { if (seemsToExist(url)) { return url; } throw new FileNotFoundException(String.format("Could not find \"%s\"", url)); } // otherwise return url; } /** * URL-decodes the given string using UTF-8. * * @param value * @return the encoded string */ public static String decodeUTF8(String value) { try { return URLDecoder.decode(value, "utf-8"); } catch (UnsupportedEncodingException e) { assert false : "This platform does not support UTF-8!"; } return ""; } /** * URL-encodes the given string using UTF-8. * * @param value * @return the encoded string */ public static String encodeUTF8(String value) { try { return URLEncoder.encode(value, "utf-8"); } catch (UnsupportedEncodingException e) { assert false : "This platform does not support UTF-8!"; } return ""; } /** * Does very minimal URL escaping -- just enough to avoid complaints from the URI & URL constructors (maybe). * * @param url the URL to escape * @return the escaped URL */ public static String escape(String url) { return url.replace(" ", "%20"); } /** * Returns the common parent of all the given URLs. If they don't have one, returns null. * * @param urls * @return the common parent */ public static URL getCommonParent(URL... urls) { return getCommonParent(null, null, urls); } /** * Returns the common parent of all the given URLs. If they don't have one, returns the given fallback. * * @param fallback * @param urls * @return the common parent */ public static URL getCommonParent(URL fallback, URL... urls) { return getCommonParent(fallback, null, urls); } private static URL getCommonParent(URL fallback, URL candidate, URL... urls) { int length = urls.length; if (length == 0) { return fallback; } if (length == 1) { return getParent(urls[0]); } URL url0 = urls[0]; URL url1 = urls[1]; URL parent1 = url1; do { URL parent0 = url0; parent1 = getParent(parent1); do { parent0 = getParent(parent0); if (parent0.equals(parent1)) { if (candidate == null || candidate.equals(parent0)) { if (length == 2) { return parent0; } return getCommonParent(fallback, parent0, Arrays.asList(urls).subList(1, length).toArray(new URL[] {})); } } } while (!parent0.equals(getParent(parent0))); } while (!parent1.equals(getParent(parent1))); return fallback; } /** * Tries to get the last modified timestamp for the path. * * @param path the URL to check * @return the apparent last modified timestamp, or 0 if cannot be determined */ public static long getLastModified(URL path) { URLConnection connection = null; try { connection = path.openConnection(); } catch (IOException e) { return 0; } if (connection == null) { return 0; } return connection.getLastModified(); } /** * Returns whatever part of the given path follows its final slash, unless that slash is the last character, in which * case the portion returned is that which follows the second-to-last slash. If the path does not contain a slash, * then it is returned unaltered. * * @param path * @return the last component of the path */ public static URL getLastPathComponent(URL path) { if (path.getPath().indexOf('/') == -1) { return path; } try { return new URL(path.getProtocol(), path.getHost(), path.getPath().replaceAll("^.*/(.+)/?$", "$1") + (path.getQuery() != null ? "?" + path.getQuery() : "")); } catch (MalformedURLException e) { throw new RuntimeException(String.format("Could not get the last path component of \"%s\".", path)); } } /** * @param url some URL * * @return the "parent" of the given URL, if possible */ public static URL getParent(URL url) { String file = url.getFile(); String parent = file.replaceAll("^(.+)/[^/]+/?$", "$1"); try { return new URL(url.getProtocol(), url.getHost(), url.getPort(), parent); } catch (MalformedURLException e) { throw new RuntimeException(String.format("Could not determine parent of \"%s\".", url)); } } /** * Take a path spec that may, or may not, use glob-style wildcards to indicate multiple files, and returns a list of * URLs pointing to those files. * * @param pathspec the path specification that may point to one or many files * @param context * @return a list of URLs */ public static List<URL> getURLs(String pathspec, URL context) { ArrayList<URL> result = new ArrayList<URL>(); if (pathspec.indexOf('*') != -1 || pathspec.indexOf('?') != -1) { List<File> files; try { files = Filesystem.glob(pathspec); } catch (FileNotFoundException e) { throw new UserError(String.format("File not found when globbing \"%s\".", pathspec), e); } int fileCount = files.size(); for (int index = 0; index < fileCount; index++) { try { result.add(createValidURL(files.get(index).getAbsolutePath(), context)); } catch (FileNotFoundException e) { throw new UserError(String.format("Could not find file \"%s\" from \"%s\".", files.get(index), pathspec), e); } } } else { try { result.add(createValidURL(pathspec, context)); } catch (FileNotFoundException e) { throw new UserError(String.format("Could not find file \"%s\".", pathspec), e); } } return result; } /** * Using some rather uncomfortable heuristics, judges whether two given paths are (probably) equivalent, by ignoring * certain differences like platform-specific path separators vs. the URI/URL standard slash, and the use of a Windows * drive letter preceded, or not, by a slash. Yuck. * * @param path1 * @param path2 * @return whether or not they are (probably) equivalent */ public static boolean pathsAreEquivalent(String path1, String path2) { // Get out as fast as possible. if (path1.equals(path2)) { return true; } // Now try fixing path separators and check again. String _path1 = path1.replace(File.separatorChar, '/'); String _path2 = path2.replace(File.separatorChar, '/'); if (_path1.equals(_path2)) { return true; } // Try removing trailing slashes. _path1 = _path1.replaceAll("^(.+)/+$", "$1"); _path2 = _path2.replaceAll("^(.+)/+$", "$1"); if (_path1.equals(_path2)) { return true; } // Now try the yucky drive letter check. _path1 = _path1.replaceAll("^/(\\p{Upper}:)", "$1"); _path2 = _path2.replaceAll("^/(\\p{Upper}:)", "$1"); // This is the last check, so return whether or not these match. return _path1.equals(_path2); } /** * Uses a couple of simple heuristics to guess whether a given URL probably is not pointing at a file. * * NOTE: This is <em>way</em> imperfect! :-) * * @param file the path to check * @return whether it probably is not a file */ private static boolean probablyIsNotFile(String file) { /* * If the part of the context URL after the last "/" does not contain a ".", this is good enough (for our purposes) * to regard this as "not specifying a file", even though, of course, it could actually point to one. * * We first test the simpler cases that contextFile is "" or "/", or ends with "/". */ int slash = file.lastIndexOf(SLASH); return slash == -1 || file.equals(EMPTY_STRING) || file.equals(SLASH) || file.endsWith(SLASH) || slash < file.length() - 1 && !file.substring(slash).contains(DOT); } /** * Uses a couple of simple heuristics to guess whether a given URL probably is not pointing at a file. * * NOTE: This is <em>way</em> imperfect! :-) * * @param url the URL to check * @return whether it probably is not a file */ private static boolean probablyIsNotFile(URL url) { return probablyIsNotFile(url.getFile()); } /** * Attempts to "relativize" the given subject to the given context, first trying to create a URL from the subject. * * @param context * @param subject * @return the subject, relative to the context */ public static URL relativize(URL context, String subject) { try { return relativize(context, new URL(subject)); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Cannot create URL from subject \"%s\".", subject), e); } } /** * A smarter version of {@link URI#relativize(URI)}. In addition to performing the relativation (and catching * exceptions when converting from URL to URI and back again), this will try to replace common "parent" portions of * the resulting path with "../" structures. * * @param context * @param subject * @return the subject relative to the context */ public static URL relativize(URL context, URL subject) { // Quick shortcut if they're equal if (areEffectivelyEqual(context, subject)) { if (probablyIsNotFile(context)) { try { return new URL(context.getProtocol() + ":."); } catch (MalformedURLException e) { throw new DeveloperError("Couldn't construct a \".\" URL.", e); } } return context; } /* * // Quick shortcut if they're files in the same directory. if (getParent(context).equals(getParent(subject))) { * return (getLastPathComponent(subject)); } */ URI contextURI = null; try { contextURI = context.toURI(); } catch (URISyntaxException e) { throw new DeveloperError(String.format("Cannot create URI from context URL \"%s\".", context), e); } URI relativizedURI = null; try { relativizedURI = contextURI.relativize(subject.toURI()); } catch (URISyntaxException e) { throw new DeveloperError(String.format("Cannot create URI from subject URL \"%s\".", subject), e); } URL relativizedURL = null; try { /* * Do this instead of URI.toURL(), since the latter will fail if the URI is not absolute! */ String path = relativizedURI.getPath(); if ("".equals(path)) { path = "."; } relativizedURL = new URL(context.getProtocol() + ':' + path); } catch (MalformedURLException e) { throw new DeveloperError(String.format("Cannot create URL from relativization result \"%s\".", relativizedURI), e); } String relativizedString = relativizedURL.toString(); URL parent = getParent(context); int levelUp = 0; if (probablyIsNotFile(subject)) { levelUp = 1; } do { relativizedString = relativizedString.replace(parent.toString(), upLevel(levelUp)); parent = getParent(parent); levelUp++; } while (!parent.equals(getParent(parent))); try { return new URL(relativizedString); } catch (MalformedURLException e) { try { return new URL(String.format("%s:%s", context.getProtocol(), relativizedString)); } catch (MalformedURLException ee) { throw new DeveloperError(String.format("Cannot create URL from relativization result \"%s\".", relativizedString), ee); } } } /** * Produces a string version of {@link #relativize(URL, URL)}. * * @param context * @param subject * @return a string version of {@link #relativize(URL, URL)} */ public static String relativizeToString(URL context, URL subject) { String result = relativize(context, subject).getPath(); if ("".equals(result)) { result = "."; } if (result.endsWith("/")) { return result.substring(0, result.length() - 1); } return result; } /** * Tests whether a resource seems to exist at the given URL * * @param url the URL to test * @return whether a resource seems to exist at the URL */ public static boolean seemsToExist(URL url) { if (url.getProtocol().equals(Filesystem.FILE)) { File file = new File(unescape(url.getFile())); return file.exists(); } InputStream test = null; try { test = url.openStream(); } catch (IOException e) { return false; } if (test != null) { try { test.close(); } catch (IOException e) { return false; } return true; } return false; } /** * Reverses {@link #escape(String)}. * * @param url the URL to unescape * @return the unescaped URL */ public static String unescape(String url) { return url.replace("%20", " "); } /** * A convenience method that calls toString() on the given URL, then returns the result of {@link #unescape(String)}. * * @param url * @return the unescaped URL */ public static String unescape(URL url) { return unescape(url.toString()); } private static String upLevel(int count) { if (count == 0) { return "."; } StringBuilder result = new StringBuilder(count * 3); for (int index = 0; index < count; index++) { if (result.length() > 0) { result.append('/'); } result.append(".."); } return result.toString(); } private URLTools() { // Nobody can instantiate this. } }