package com.fsck.k9.message.html; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Parses and "linkifies" http links. * <p> * This class is in parts inspired by OkHttp's * <a href="https://github.com/square/okhttp/blob/master/okhttp/src/main/java/okhttp3/HttpUrl.java">HttpUrl</a>. * But much of the parsing parts have been left out. * </p> */ class HttpUriParser implements UriParser { // This string represent character group sub-delim as described in RFC 3986 private static final String SUB_DELIM = "!$&'()*+,;="; private static final Pattern DOMAIN_PATTERN = Pattern.compile("[\\da-z](?:[\\da-z-]*[\\da-z])*(?:\\.[\\da-z](?:[\\da-z-]*[\\da-z])*)*(?::(\\d{0,5}))?", Pattern.CASE_INSENSITIVE); private static final Pattern IPv4_PATTERN = Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})(:(\\d{0,5}))?"); @Override public int linkifyUri(String text, int startPos, StringBuffer outputBuffer) { int currentPos = startPos; // Scheme String shortScheme = text.substring(currentPos, Math.min(currentPos + 7, text.length())); String longScheme = text.substring(currentPos, Math.min(currentPos + 8, text.length())); if (longScheme.equalsIgnoreCase("https://")) { currentPos += "https://".length(); } else if (shortScheme.equalsIgnoreCase("http://")) { currentPos += "http://".length(); } else if (shortScheme.equalsIgnoreCase("rtsp://")) { currentPos += "rtsp://".length(); } else { return startPos; } // Authority int matchedAuthorityEnd = tryMatchAuthority(text, currentPos); if (matchedAuthorityEnd == currentPos) { return startPos; } currentPos = matchedAuthorityEnd; // Path if (currentPos < text.length() && text.charAt(currentPos) == '/') { currentPos = matchUnreservedPCTEncodedSubDelimClassesGreedy(text, currentPos + 1, "/:@"); } // Query if (currentPos < text.length() && text.charAt(currentPos) == '?') { currentPos = matchUnreservedPCTEncodedSubDelimClassesGreedy(text, currentPos + 1, ":@/?"); } // Fragment if (currentPos < text.length() && text.charAt(currentPos) == '#') { currentPos = matchUnreservedPCTEncodedSubDelimClassesGreedy(text, currentPos + 1, ":@/?"); } String httpUri = text.substring(startPos, currentPos); outputBuffer.append("<a href=\"") .append(httpUri) .append("\">") .append(httpUri) .append("</a>"); return currentPos; } private int tryMatchAuthority(String text, int startPos) { int authorityLimit = text.indexOf('/', startPos); if (authorityLimit == -1) { authorityLimit = text.length(); } int authorityStart = tryMatchUserInfo(text, startPos, authorityLimit); int authorityEnd = tryMatchDomainName(text, authorityStart); if (authorityEnd != authorityStart) { return authorityEnd; } authorityEnd = tryMatchIpv4Address(text, authorityStart, true); if (authorityEnd != authorityStart) { return authorityEnd; } authorityEnd = tryMatchIpv6Address(text, authorityStart); if (authorityEnd != authorityStart) { return authorityEnd; } return startPos; } private int tryMatchUserInfo(String text, int startPos, int limit) { int userInfoEnd = text.indexOf('@', startPos); if (userInfoEnd != -1 && userInfoEnd < limit) { if (matchUnreservedPCTEncodedSubDelimClassesGreedy(text, startPos, ":") != userInfoEnd) { // Illegal character in user info return startPos; } return userInfoEnd + 1; } return startPos; } private int tryMatchDomainName(String text, int startPos) { try { Matcher matcher = DOMAIN_PATTERN.matcher(text); if (!matcher.find(startPos) || matcher.start() != startPos) { return startPos; } String portString = matcher.group(1); if (portString != null && !portString.isEmpty()) { int port = Integer.parseInt(portString); if (port > 65535) { return startPos; } } return matcher.end(); } catch (IllegalArgumentException e) { return startPos; } } private int tryMatchIpv4Address(String text, int startPos, boolean portAllowed) { Matcher matcher = IPv4_PATTERN.matcher(text); if (!matcher.find(startPos) || matcher.start() != startPos) { return startPos; } for (int i = 1; i <= 4; i++) { int segment = Integer.parseInt(matcher.group(1)); if (segment > 255) { return startPos; } } if (!portAllowed && matcher.group(5) != null) { return startPos; } String portString = matcher.group(6); if (portString != null && !portString.isEmpty()) { int port = Integer.parseInt(portString); if (port > 65535) { return startPos; } } return matcher.end(); } private int tryMatchIpv6Address(String text, int startPos) { if (startPos == text.length() || text.codePointAt(startPos) != '[') { return startPos; } int addressEnd = text.indexOf(']'); if (addressEnd == -1) { return startPos; } // Actual parsing int currentPos = startPos + 1; int beginSegmentsCount = 0; int endSegmentsCount = 0; // Handle :: separator and segments in front of it int compressionPos = text.indexOf("::"); boolean compressionEnabled = compressionPos != -1 && compressionPos < addressEnd; if (compressionEnabled) { while (currentPos < compressionPos) { // Check segment separator if (beginSegmentsCount > 0) { if (text.codePointAt(currentPos) != ':') { return startPos; } else { ++currentPos; } } // Parse segment int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, compressionPos)); if (possibleSegmentEnd == currentPos) { return startPos; } currentPos = possibleSegmentEnd; ++beginSegmentsCount; } currentPos += 2; // Skip :: separator } // Parse end segments while (currentPos < addressEnd && (beginSegmentsCount + endSegmentsCount) < 8) { // Check segment separator if (endSegmentsCount > 0) { if (text.codePointAt(currentPos) != ':') { return startPos; } else { ++currentPos; } } // Small look ahead, do not run into IPv4 tail (7 is IPv4 minimum length) int nextColon = text.indexOf(':', currentPos); if ((nextColon == -1 || nextColon > addressEnd) && (addressEnd - currentPos) >= 7) { break; } // Parse segment int possibleSegmentEnd = parse16BitHexSegment(text, currentPos, Math.min(currentPos + 4, addressEnd)); if (possibleSegmentEnd == currentPos) { return startPos; } currentPos = possibleSegmentEnd; ++endSegmentsCount; } // We have 3 valid cases here if (currentPos == addressEnd) { // 1) No compression and full address, everything fine // 2) Compression enabled and whole address parsed, everything fine as well if ((!compressionEnabled && beginSegmentsCount + endSegmentsCount == 8) || (compressionEnabled && beginSegmentsCount + endSegmentsCount < 8)) { // Only optional port left, skip address bracket ++currentPos; } else { return startPos; } } else { // 3) Still some stuff missing, check for IPv4 as tail necessary if (tryMatchIpv4Address(text, currentPos, false) != addressEnd) { return startPos; } currentPos = addressEnd + 1; } // Check optional port if (currentPos == text.length() || text.codePointAt(currentPos) != ':') { return currentPos; } ++currentPos; int port = 0; for (; currentPos < text.length(); currentPos++) { int c = text.codePointAt(currentPos); if (c < '0' || c > '9') { break; } port = port * 10 + c - '0'; } return (port <= 65535) ? currentPos : startPos; } private int parse16BitHexSegment(String text, int startPos, int endPos) { int currentPos = startPos; while (isHexDigit(text.codePointAt(currentPos)) && currentPos < endPos) { ++currentPos; } return currentPos; } private int matchUnreservedPCTEncodedSubDelimClassesGreedy(String text, int startPos, String additionalCharacters) { String allowedCharacters = SUB_DELIM + "-._~" + additionalCharacters; int currentPos; int shouldBeHex = 0; for (currentPos = startPos; currentPos < text.length(); currentPos++) { int c = text.codePointAt(currentPos); if (isHexDigit(c)) { shouldBeHex = Math.max(shouldBeHex - 1, 0); } else if (shouldBeHex == 0) { if (allowedCharacters.indexOf(c) != -1) { // Everything ok here :) } else if (c == '%') { shouldBeHex = 2; } else { break; } } else { break; } } return currentPos; } private boolean isHexDigit(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); } }