package com.netifera.platform.util.patternmatching;
import java.util.EnumMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// TODO rename WebServiceMatcher ?
// FIXME can this also support FTP, SVN, ... ?
// TODO see http://www.ietf.org/rfc/rfc3986.txt
// xml:urn?
public final class HttpUrlMatcher implements ITextMatcher {
private enum KEY { SCHEME, HOSTNAME, PORT, USERNAME, PASSWORD, PATH }
// FIXME vCard scheme?
/*
* RFC 1738:
*
* In general, URLs are written as follows:
*
* <scheme>:<scheme-specific-part>
*
* URL schemes that involve the direct use of an IP-based protocol to a
* specified host on the Internet use a common syntax for the
* scheme-specific data:
*
* //<user>:<password>@<host>:<port>/<url-path>
*
* Some or all of the parts "<user>:<password>@", ":<password>", ":<port>",
* and "/<url-path>" may be excluded.
*
* Note that the "/" between the host (or port) and the url-path is NOT
* part of the url-path.
*/
private static final String HTTPURL_REGEX =
"^(https?):\\/\\/" // scheme
+ "(?:" // user(:pass)@
+ "([\\S&&[^:]]*)" // user (chars but ':')
+ "(?::(\\S+))?" // ':' + pass
+ "@" // '@'
+ ")?"
+ "(" // ip or hostname
+ "\\[[\\p{XDigit}:\\.%]*\\]" // IP
+ "|" // or
+ "[\\S&&[^:\\/]]+" // host
+ ")"
+ "(?::(\\d+))?" // tcp port
+ "(?:\\/(\\S*))?$"; // path
/** Compiled representation of a regular expression matching a http URL. */
private static final Pattern HTTPURL_PATTERN =
Pattern.compile(HTTPURL_REGEX);
private final boolean matched;
private final Map<KEY, String> map;
// by default, match RFC compliant HTTP URL.
public HttpUrlMatcher(final String text) {
this(text, false);
}
/**
* @param matchtext The text to match.
* @param extendedFlag If <code>true</code>, this class with match some
* extended URL commonly used by web users (not strictly following the RFC).
*/
public HttpUrlMatcher(final String matchtext, final boolean extendedFlag) {
if (matchtext == null) {
matched = false;
map = null;
return;
}
String text = matchtext;
// FIXME: lazy user
if (extendedFlag && !matchtext.matches("^https?:\\/\\/.+")) {
text = "http://" + matchtext; // FIXME correct scheme?
}
Matcher matcher = HTTPURL_PATTERN.matcher(text);
if (!matcher.matches()) {
matched = false;
map = null;
return;
}
String port = matcher.group(5);
if (port == null) {
if (matcher.group(1).length() == 4) {
port = "80";
} else {
port = "443";
}
} else if (!TcpUdpPortMatcher.matches(port)) {
matched = false;
map = null;
return;
}
String host = matcher.group(4);
if (!HostnameMatcher.matches(host)
&& !InternetAddressMatcher.matches(host)) {
matched = false;
map = null;
return;
}
if (host.charAt(0) == '[') { // normalize ipv6
host = host.substring(1, host.length() - 1); // ']' safely parsed
}
map = new EnumMap<KEY, String>(KEY.class);
matched = true;
map.put(KEY.SCHEME, matcher.group(1));
map.put(KEY.USERNAME, matcher.group(2));
map.put(KEY.PASSWORD, matcher.group(3));
map.put(KEY.HOSTNAME, host);
map.put(KEY.PORT, port);
map.put(KEY.PATH, matcher.group(6));
}
@Override
public String toString() {
return matched ? map.toString() : "No match";
}
public boolean matches() {
return matched;
}
public String getScheme() {
return matched ? map.get(KEY.SCHEME) : null;
}
public String getHostname() {
return matched ? map.get(KEY.HOSTNAME) : null;
}
public String getPath() {
return matched ? map.get(KEY.PATH) : null;
}
public String getPort() {
return matched && map.containsKey(KEY.PORT) ?
map.get(KEY.PORT) : null;
}
public String getUsername() {
return matched && map.containsKey(KEY.USERNAME) ?
map.get(KEY.USERNAME) : null;
}
public String getPassword() {
return matched && map.containsKey(KEY.PASSWORD) ?
map.get(KEY.PASSWORD) : null;
}
public static boolean matches(final String text) {
return new HttpUrlMatcher(text).matches();
}
public static boolean matchesExtended(final String text) {
return new HttpUrlMatcher(text, true).matches();
}
/*
public static boolean matchesFast(final String text) {
URI uri;
try {
uri = new URI(text);
} catch (URISyntaxException e) {
return false;
}
if ("http".compareToIgnoreCase(uri.getScheme()) != 0) {
return false;
}
return CommonMatcher.urlPathMatcher(uri.getPath())
&& HostnameMatcher.matches(uri.getHost())
|| InternetAddressMatcher.matches(uri.getHost());
}
*/
}