/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package org.esigate.util; import java.net.URI; import java.net.URISyntaxException; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpHost; import org.apache.http.NameValuePair; import org.apache.http.client.utils.URIUtils; import org.apache.http.client.utils.URLEncodedUtils; import org.esigate.Parameters; /** * Utility class to manipulate {@link URI} represented as a {@link String} or as a {@link URI}. * * @author Francois-Xavier Bonnet * */ public final class UriUtils { private static final int CONVERSION_TABLE_SIZE = 128; private static final String RESERVED_CHARACTERS = ":/?&=#%"; private static final String[] CONVERSION_TABLE = new String[CONVERSION_TABLE_SIZE]; static { for (int i = 0; i < CONVERSION_TABLE_SIZE; i++) { char character = (char) i; String charString = Character.toString(character); if (RESERVED_CHARACTERS.indexOf(i) == -1) { charString = encode(charString); } CONVERSION_TABLE[i] = charString; } } private UriUtils() { // Do not instantiate } private static String encode(char character) { return Character.toString(character); } private static String encode(String charString) { try { return new URI(null, null, null, -1, charString, null, null).toASCIIString(); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } /** * Fixes common mistakes in URI by replacing all illegal characters by their encoded value. * * @param uri * the URI to fix * @return the fixed URI */ public static String encodeIllegalCharacters(String uri) { StringBuilder result = new StringBuilder(); int length = uri.length(); for (int i = 0; i < length; i++) { char character = uri.charAt(i); if (character == '%') { // Encode invalid escape sequences if (i >= length - 2 || !isHex(uri.charAt(i + 1)) || !isHex(uri.charAt(i + 2))) { result.append("%25"); } else { result.append('%'); } } else { int j = (int) character; if (j >= CONVERSION_TABLE_SIZE || j < 0) { result.append(encode(character)); } else { result.append(CONVERSION_TABLE[j]); } } } return result.toString(); } private static boolean isHex(char character) { return character == '0' || character == '1' || character == '2' || character == '3' || character == '4' || character == '5' || character == '6' || character == '7' || character == '8' || character == '9' || character == 'a' || character == 'b' || character == 'c' || character == 'd' || character == 'e' || character == 'f' || character == 'A' || character == 'B' || character == 'C' || character == 'D' || character == 'E' || character == 'F'; } private static final class InvalidUriException extends RuntimeException { private static final long serialVersionUID = 7013885420191182730L; private InvalidUriException(URISyntaxException cause) { super(cause); } } /** * Creates an URI as a String. * * @param scheme * the scheme * @param host * the host * @param port * the port * @param path * the path * @param query * the query * @param fragment * the fragment * @return the uri */ public static String createURI(final String scheme, final String host, int port, final String path, final String query, final String fragment) { StringBuilder buffer = new StringBuilder(Parameters.SMALL_BUFFER_SIZE); if (host != null) { if (scheme != null) { buffer.append(scheme); buffer.append("://"); } buffer.append(host); if (port > 0) { buffer.append(':'); buffer.append(port); } } if (path == null || !path.startsWith("/")) { buffer.append('/'); } if (path != null) { buffer.append(path); } if (query != null) { buffer.append('?'); buffer.append(query); } if (fragment != null) { buffer.append('#'); buffer.append(fragment); } return buffer.toString(); } /** * Extracts the host name from a URI. * * @param uri * the uri * @return the host name */ public static String extractHostName(final String uri) { return extractHost(uri).getHostName(); } /** * Extracts the {@link HttpHost} from a URI. * * @param uri * the URI * @return the {@link HttpHost} */ public static HttpHost extractHost(final String uri) { return URIUtils.extractHost(createURI(uri)); } /** * Extracts the {@link HttpHost} from a URI. * * @param uri * the {@link URI} * @return the {@link HttpHost} */ public static HttpHost extractHost(final URI uri) { return URIUtils.extractHost(uri); } /** * Creates an {@link URI} after escaping some special characters in order to tolerate some incorrect URI types. If * the uri contains a server name but no path, the path is set to "/" as a browser would do. * * @param uri * the URI as a {@link String} * @return the URI as a {@link URI} object */ public static URI createURI(String uri) { uri = encodeIllegalCharacters(uri); URI result = URI.create(uri); if (result.getHost() != null && StringUtils.isEmpty(result.getPath())) { result = URI.create(createURI(result.getScheme(), result.getHost(), result.getPort(), "/", result.getRawQuery(), result.getRawFragment())); } return result; } /** * Replaces the scheme, host and port in a URI. * * @param uri * the URI * @param targetHost * the target host * @return the rewritten URI */ public static String rewriteURI(String uri, HttpHost targetHost) { try { return URIUtils.rewriteURI(createURI(uri), targetHost).toString(); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } /** * Removes the jsessionid that may have been added to a URI on a java application server. * * @param sessionId * the value of the sessionId that can also be found in a JSESSIONID cookie * @param page * the html code of the page * @return the fixed html */ public static String removeSessionId(String sessionId, String page) { String regexp = ";?jsessionid=" + Pattern.quote(sessionId); return page.replaceAll(regexp, ""); } /** * Extracts the scheme of a URI. * * @param uri * the URI * @return the scheme */ public static String extractScheme(String uri) { return extractHost(uri).getSchemeName(); } /** * Returns the raw query component of this URI. The query component of a URI, if defined, only contains legal URI * characters. * * @param uri * the URI * @return The raw query component of this URI, or null if the query is undefined */ public static String getRawQuery(String uri) { return createURI(uri).getRawQuery(); } /** * Returns the decoded path component of this URI. The string returned by this method is equal to that returned by * the getRawPath method except that all sequences of escaped octets are decoded. * * @param uri * the uri to retrieve the path from * @return The decoded path component of this URI, or null if the path is undefined */ public static String getPath(String uri) { return createURI(uri).getPath(); } /** * Returns a list of {@link NameValuePair NameValuePairs} as built from the URI's query portion. For example, a URI * of http://example.org/path/to/file?a=1&b=2&c=3 would return a list of three NameValuePairs, one for a=1, one for * b=2, and one for c=3. By convention, {@code '&'} and {@code ';'} are accepted as parameter separators. * <p> * This is typically useful while parsing an HTTP PUT. * * This API is currently only used for testing. * * @param uri * URI to parse * @param charset * Charset name to use while parsing the query * @return a list of {@link NameValuePair} as built from the URI's query portion. */ public static List<NameValuePair> parse(final String uri, final String charset) { return URLEncodedUtils.parse(createURI(uri), charset); } /** * Checks if a URI starts with a protocol. * * @param uri * the URI * @return true if the URI starts with "http://" or "https://" */ public static boolean isAbsolute(String uri) { return (uri.startsWith("http://") || uri.startsWith("https://")); } /** * Concatenates 2 {@link URI} by taking the beginning of the first (up to the path) and the end of the other * (starting from the path). While concatenating, checks that there is no doubled "/" character between the path * fragments. * * @param base * the base uri * @param relPath * the path to concatenate with the base uri * @return the concatenated uri */ public static URI concatPath(URI base, String relPath) { String resultPath = base.getPath() + StringUtils.stripStart(relPath, "/"); try { return new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), resultPath, null, null); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } /** * Removes the server information frome a {@link URI}. * * @param uri * the {@link URI} * @return a new {@link URI} with no scheme, host and port */ public static URI removeServer(URI uri) { try { return new URI(null, null, null, -1, uri.getPath(), uri.getQuery(), uri.getFragment()); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } /** * Interpret the url relatively to the request url (may be relative). Due to a bug in {@link URI} class when using a * relUri containing only a query string, we cannot use directly the method provided by {@link URI} class. * * @param relUri * the relative URI * @param base * the reference {@link URI} * @return the resolved {@link URI} */ public static URI resolve(String relUri, URI base) { URI uri = createURI(relUri); if (uri.getScheme() == null && uri.getUserInfo() == null && uri.getHost() == null && uri.getPort() == -1 && StringUtils.isEmpty(uri.getPath()) && uri.getQuery() != null) { try { return new URI(base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), base.getPath(), uri.getQuery(), uri.getFragment()); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } else { return base.resolve(uri); } } /** * Removes the query and fragment at the end of a URI. * * @param uriString * the original URI as a String * * @return the URI without querystring nor fragment */ public static String removeQuerystring(String uriString) { URI uri = createURI(uriString); try { return new URI(uri.getScheme(), uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), null, null) .toASCIIString(); } catch (URISyntaxException e) { throw new InvalidUriException(e); } } }