/*
* The MIT License
*
* Copyright 2013 Tim Boudreau.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package com.mastfrog.url;
import com.mastfrog.util.Checks;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
* @author tim
*/
final class URLParser {
private final CharSequence url;
private static final Pattern PROTOCOL_SPLIT = Pattern.compile("(.*?)://(.*)");
private static final Pattern FILE_PROTOCOL_SPLIT = Pattern.compile("(.*?):[/{1}/{3}](.*)");
private static final Pattern FILE_PROTOCOL_WITH_HOST = Pattern.compile("(.*?)://(.*?)/(.*)");
private static final Pattern SLASH_SPLIT = Pattern.compile("(.*?)/(.*)");
private static final Pattern PARAMS_SPLIT = Pattern.compile("(.*?)\\?(.*)");
private static final Pattern ANCHOR_SPLIT = Pattern.compile("(.*)\\#(.*)");
private static final Pattern USERINFO_SPLIT = Pattern.compile("(.*)\\@(.*)", Pattern.DOTALL);
private static final Pattern USER_PASSWORD_SPLIT = Pattern.compile("(.*?)\\:(.*)");
private static final Pattern HOST_PORT_SPLIT = Pattern.compile("(.*?)\\:(\\d*)");
private static final Pattern PARAMETER_ELEMENT_SPLIT = Pattern.compile("(.*?)[\\;\\&$]");
private static final Pattern IPV6_HOST_AND_PORT = Pattern.compile("^\\[([0-9A-Za-z\\:]+)\\]\\:(\\d+)$");
URLParser(CharSequence url) {
Checks.notNull("url", url);
this.url = url;
}
public URL getURL() {
String protocol = null;
Matcher m = PROTOCOL_SPLIT.matcher(url);
String remainder;
String u = url.toString();
String host = null;
if (m.find()) {
protocol = m.group(1);
remainder = m.group(2);
} else {
remainder = u;
}
if (protocol != null && protocol.trim().length() == 0) {
protocol = null;
}
boolean isFile = protocol == null && u.toLowerCase().startsWith("file:");
if (isFile) {
m = FILE_PROTOCOL_SPLIT.matcher(u);
if (m.find()) {
protocol = m.group(1);
remainder = m.group(2);
host = "";
} else {
m = FILE_PROTOCOL_WITH_HOST.matcher(u);
protocol = m.group(1);
host = m.group(2);
remainder = m.group(3);
}
}
String port = null;
boolean isIpV6 = false;
if (host == null) {
m = SLASH_SPLIT.matcher(remainder);
if (m.find()) {
host = m.group(1);
remainder = m.group(2);
} else {
host = remainder;
remainder = null;
}
}
String username = null;
String password = null;
Checks.notNull("host", host);
m = USERINFO_SPLIT.matcher(host);
String unpw = null;
if (m.find()) {
unpw = m.group(1);
host = m.group(2);
}
if (unpw != null) {
m = USER_PASSWORD_SPLIT.matcher(unpw);
if (m.find()) {
username = m.group(1);
password = m.group(2);
}
}
Matcher hm = IPV6_HOST_AND_PORT.matcher(host);
if (hm.find()) {
host = hm.group(1);
port = hm.group(2);
isIpV6 = true;
} else {
hm = Host.IPV6_REGEX.matcher(host);
isIpV6 = hm.lookingAt();
}
if (port == null && !isIpV6) {
m = HOST_PORT_SPLIT.matcher(host);
if (m.lookingAt()) {
host = m.group(1);
if (m.groupCount() > 1) {
port = m.group(2);
} else {
port = null;
}
}
}
if (remainder == null) {
Protocol prot = protocol == null ? null : Protocols.forName(protocol);
Port prt = port == null ? prot == null ? null : prot.getDefaultPort() : new Port(port);
Host hst = host == null ? null : Host.parse(host);
return new URL (username, password, prot, hst, prt, null, null, null);
}
String anchor = null;
m = ANCHOR_SPLIT.matcher(remainder);
if (m.find()) {
anchor = m.group(2);
remainder = m.group(1);
}
Parameters parameterSet = null;
String path = null;
String parameters = null;
if (isFile) {
path = remainder;
} else {
List<ParametersElement> params = new ArrayList<ParametersElement>();
m = PARAMS_SPLIT.matcher(remainder);
if (m.find()) {
path = URLBuilder.unescape(m.group(1));
parameters = m.group(2);
} else {
if (remainder.contains("=")) {
parameters = remainder;
} else {
path = URLBuilder.unescape(remainder);
}
}
ParametersDelimiter delim = ParametersDelimiter.AMPERSAND;
if (parameters != null) {
delim = processParameters(params, parameters);
}
parameterSet = params.isEmpty() ? parameters == null ? null : ParsedParameters.parse(parameters)
: new ParsedParameters(delim, params.toArray(new ParametersElement[params.size()]));
}
Port prt = port == null ? null : port.trim().length() == 0 ? null : new Port (port);
Host hst = host == null ? null : Host.parse(host);
if (host != null && !host.isEmpty()) {
hst = hst.canonicalize();
}
Protocol proto = protocol == null ? null : Protocols.forName(protocol);
Path pth = path == null ? null : Path.parse(path);
if (pth != null) {
pth = pth.normalize();
}
Anchor anch = anchor == null ? null : new Anchor(anchor);
URL result = new URL(username, password, proto, hst, prt, pth, parameterSet, anch);
return result;
}
private static ParametersDelimiter processParameters(List<ParametersElement> l, String parameters) {
Matcher m = PARAMETER_ELEMENT_SPLIT.matcher(parameters);
boolean match = m.find();
ParametersDelimiter result = parameters.indexOf(ParametersDelimiter.AMPERSAND.charValue()) >= 0
? ParametersDelimiter.AMPERSAND : ParametersDelimiter.SEMICOLON;
if (match) {
do {
l.add(ParametersElement.parse(m.group(1)));
int end = m.end(1);
match = m.find();
if (!match && end != parameters.length() - 1) {
String rem = parameters.substring(end + 1);
l.add(ParametersElement.parse(rem));
}
} while (match);
}
for (int i = parameters.length() - 1; i > 0; i--) {
if (result.charValue() == parameters.charAt(i)) {
l.add(ParametersElement.EMPTY);
} else {
break;
}
}
return result;
}
}