// ================================================================================================= // Copyright 2011 Twitter, Inc. // ------------------------------------------------------------------------------------------------- // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this work except in compliance with the License. // You may obtain a copy of the License in the LICENSE file, or at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ================================================================================================= package com.twitter.common.net; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.Arrays; import java.util.List; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author William Farner */ public class UrlHelper { private static final Logger LOG = Logger.getLogger(UrlHelper.class.getName()); /** * Gets the domain from {@code url}. * * @param url A url. * @return The domain portion of the URL, or {@code null} if the url is invalid. */ public static String getDomain(String url) { try { return getDomainChecked(url); } catch (URISyntaxException e) { LOG.finest("Malformed url: " + url); return null; } } /** * Gets the domain from {@code uri}, and throws an exception if it's not a valid uri. * * @param url A url. * @throws URISyntaxException if url is not a valid {@code URI} * @return The domain portion of the given url, or {@code null} if the host is undefined. */ public static String getDomainChecked(String url) throws URISyntaxException { Preconditions.checkNotNull(url); url = addProtocol(url); return new URI(url).getHost(); } /** * Gets the path from {@code url}. * * @param url A url. * @return The path portion of the URL, or {@code null} if the url is invalid. */ public static String getPath(String url) { Preconditions.checkNotNull(url); url = addProtocol(url); try { return new URI(url).getPath(); } catch (URISyntaxException e) { LOG.info("Malformed url: " + url); return null; } } /** * Strips URL parameters from a url. * This will remove anything after and including a question mark in the URL. * * @param url The URL to strip parameters from. * @return The original URL with parameters stripped, which will be the original URL if no * parameters were found. */ public static String stripUrlParameters(String url) { Preconditions.checkNotNull(url); int paramStartIndex = url.indexOf("?"); if (paramStartIndex == -1) { return url; } else { return url.substring(0, paramStartIndex); } } /** * Convenience method that calls #stripUrlParameters(String) for a URL. * * @param url The URL to strip parameters from. * @return The original URL with parameters stripped, which will be the original URL if no * parameters were found. */ public static String stripUrlParameters(URL url) { return stripUrlParameters(url.toString()); } private static final Pattern URL_PROTOCOL_REGEX = Pattern.compile("^https?://", Pattern.CASE_INSENSITIVE); /** * Checks whether a URL specifies its protocol, prepending http if it does not. * * @param url The URL to fix. * @return The URL with the http protocol specified if no protocol was already specified. */ public static String addProtocol(String url) { Preconditions.checkNotNull(url); Matcher matcher = URL_PROTOCOL_REGEX.matcher(url); if (!matcher.find()) { url = "http://" + url; } return url; } /** * Gets the domain levels for a host. * For example, sub1.sub2.domain.co.uk would return * [sub1.sub2.domain.co.uk, sub2.domain.co.uk, domain.co.uk, co.uk, uk]. * * * @param host The host to peel subdomains off from. * @return The domain levels in this host. */ public static List<String> getDomainLevels(String host) { Preconditions.checkNotNull(host); // Automatically include www prefix if not present. if (!host.startsWith("www")) { host = "www." + host; } Joiner joiner = Joiner.on("."); List<String> domainParts = Lists.newLinkedList(Arrays.asList(host.split("\\."))); List<String> levels = Lists.newLinkedList(); while (domainParts.size() > 1) { levels.add(joiner.join(domainParts)); domainParts.remove(0); } return levels; } }