package com.crawljax.util;
import java.net.URI;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class UrlUtils {
private static final Logger LOG = LoggerFactory.getLogger(UrlUtils.class);
/**
* @param currentUrl
* The current url
* @param href
* The target URL, relative or not
* @return The new URL.
*/
public static URI extractNewUrl(String currentUrl, String href) {
if (href == null || isJavascript(href) || href.startsWith("mailto:")
|| href.equals("about:blank")) {
throw new IllegalArgumentException(String.format(
"%s is not a HTTP url", href));
} else if (href.contains("://")) {
return URI.create(href);
} else {
URI current = URI.create(currentUrl);
if (current.getPath().isEmpty() && !href.startsWith("/")) {
return URI.create(currentUrl).resolve("/" + href);
}
return URI.create(currentUrl).resolve(href);
}
}
private static boolean isJavascript(String href) {
return href.startsWith("javascript:");
}
/**
* @param url
* the URL string. It must contain with ":" e.g, http: or https:
* @return the base part of the URL.
*/
public static String getBaseUrl(String url) {
String head = url.substring(0, url.indexOf(':'));
String subLoc = url.substring(head.length() + DomUtils.BASE_LENGTH);
int index = subLoc.indexOf('/');
String base;
if (index == -1) {
base = url;
} else {
base = head + "://" + subLoc.substring(0, index);
}
return base;
}
/**
* Retrieve the var value for varName from a HTTP query string (format is
* "var1=val1&var2=val2").
*
* @param varName
* the name.
* @param haystack
* the haystack.
* @return variable value for varName
*/
public static String getVarFromQueryString(String varName, String haystack) {
if (haystack == null || haystack.length() == 0) {
return null;
}
String modifiedHaystack = haystack;
if (modifiedHaystack.charAt(0) == '?') {
modifiedHaystack = modifiedHaystack.substring(1);
}
String[] vars = modifiedHaystack.split("&");
for (String var : vars) {
String[] tuple = var.split("=");
if (tuple.length == 2 && tuple[0].equals(varName)) {
return tuple[1];
}
}
return null;
}
/**
* Checks if the given URL is part of the domain, or a subdomain of the
* given {@link java.net.URI}.
*
* @param currentUrl
* The url you want to check.
* @param url
* The URL acting as the base.
* @return If the URL is part of the domain.
*/
public static boolean isSameDomain(String currentUrl, URI url) {
String current = URI.create(getBaseUrl(currentUrl)).getHost()
.toLowerCase();
String original = url.getHost().toLowerCase();
return current.endsWith(original);
}
private UrlUtils() {
}
}