package org.limewire.util; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class EmailAddressUtils { private static final int LABEL_LENGTH = 63; private static final int DOMAIN_LENGTH = 255; private static final int LOCAL_PART_LENGTH = 64; private static final int ADDRESS_LENGTH = 256; private static final String FOLDING_WHITE_SPACE = "(?:(?:(?:[ \\t]*(?:\\r\\n))?[ \\t]+)|(?:[ \\t]+(?:(?:\\r\\n)[ \\t]+)*))"; private static final String EMPTY_STR = ""; /* * Shared regular expression patterns by all class methods */ private static final Pattern LEAD_TRAIL_FOLDING_WHITE_SPACE_PATTERN = Pattern.compile("^" + FOLDING_WHITE_SPACE + "|" + FOLDING_WHITE_SPACE + "$"); private static final Pattern DOMAIN_PATTERN = Pattern .compile("[\\.|/](?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))"); private static final Pattern ILLEGAL_COMMENT_CHAR_OPEN_P_PATTERN = Pattern .compile("(?<!\\\\)[\\(\\)]"); private static final Pattern ILLEGAL_COMMENT_CHAR_CLOSE_P_PATTERN = Pattern .compile("(?<!\\\\)(?:[\\(\\)])"); /** * Checks that address is a valid address based on <a * href="http://tools.ietf.org/html/rfc5322" * >http://tools.ietf.org/html/rfc532</a> * * @param address the email address to validate * @throws NullPointerException if the given <code>address</code> is * <code>null</code> */ public static boolean isValidAddress(String address) { if (!hasValidLengthAndParts(address)) return false; String sanitizedAaddress = escapeAndReplaceCharacters(address); if (!hasValidLocalPart(sanitizedAaddress)) return false; if (!hasValidDomainPart(sanitizedAaddress)) return false; return true; } /** * Checks the domain part of the email address. (Domain name can also be * replaced by an IP address in square brackets) */ private static boolean hasValidDomainPart(final String address) { final Pattern DOMAIN_ADDR_LITERAL_PATTERN = Pattern.compile("^\\[(.)+]$"); final int atIndex = address.lastIndexOf('@'); final String domain = address.substring(atIndex + 1); if (DOMAIN_ADDR_LITERAL_PATTERN.matcher(domain).find()) return hasValidDomainLiteral(domain); else return hasValidDomainName(domain); } /** * Checks to make sure the domain part of the address is valid. Domain is * not an address-literal here (not an IP address) */ private static boolean hasValidDomainName(String domain) { final Pattern ADDRESS_LABEL_PATTERN = Pattern .compile("[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]|^-|-$"); final Pattern DOMAIN_NUMERIC_PATTERN = Pattern.compile("^[0-9]+$"); String[] dotArray = DOMAIN_PATTERN.split(domain, -1); int partLength = 0; int indexBrace = -1; if (dotArray.length == 1) return false; String element = EMPTY_STR; for (final String element2 : dotArray) { element = LEAD_TRAIL_FOLDING_WHITE_SPACE_PATTERN.matcher(element2) .replaceAll(EMPTY_STR); if (element.length() == 0) return false; if (element.charAt(0) == '(') { indexBrace = element.indexOf(")"); if (indexBrace != -1) { if (ILLEGAL_COMMENT_CHAR_OPEN_P_PATTERN.matcher( element.substring(1, indexBrace - 1)).find()) return false; // Illegal characters in comment element = element.substring(indexBrace + 1, element.length()); } } if (element.charAt(element.length() - 1) == ')') { indexBrace = element.indexOf("("); if (indexBrace != -1) { if (ILLEGAL_COMMENT_CHAR_CLOSE_P_PATTERN.matcher( element.substring(indexBrace + 1, element.length() - 1)).find()) return false; // Illegal characters in comment element = element.substring(0, indexBrace); } } /* * Remove any leading or trailing FWS around the element (inside any * comments) */ element = LEAD_TRAIL_FOLDING_WHITE_SPACE_PATTERN.matcher(element).replaceAll(EMPTY_STR); // What's left counts towards the maximum length for this part if (partLength > 0) partLength++; // for the dot partLength += element.length(); // Label must be 63 characters or less if (element.length() > LABEL_LENGTH || ADDRESS_LABEL_PATTERN.matcher(element).find()) return false; } if (partLength > DOMAIN_LENGTH || DOMAIN_NUMERIC_PATTERN.matcher(element).find()) return false; return true; } /** * Checks to make sure the domain part of the address is valid. Domain is * not an address-literal here (an IP address) */ private static boolean hasValidDomainLiteral(String domain) { final Pattern ADDRESS_LITERAL_PATTERN = Pattern .compile("\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"); final Pattern SPLIT_BY_COLON_PATTERN = Pattern.compile(":"); final String addressLiteral = domain.substring(1, domain.length() - 1); final List<String> matchesIP = new ArrayList<String>(); /* * Extract IPv4 part from the end of the address-literal (if there is * one) */ final Matcher matcher = ADDRESS_LITERAL_PATTERN.matcher(addressLiteral); while (matcher.find()) matchesIP.add(matcher.group()); String ipv6 = null; int groupMax = 0; if (matchesIP.size() > 0) { final int index = addressLiteral.lastIndexOf(matchesIP.get(0)); if (index == 0) // Nothing there except a valid IPv4 address, so... return true; else { // Assume it's an attempt at a mixed address (IPv6 + IPv4) if (addressLiteral.charAt(index - 1) != ':') // character preceding IPv4 address must be ':' return false; if (!addressLiteral.substring(0, 5).equals("IPv6:")) return false; if (index == 7) ipv6 = addressLiteral.substring(5, index); else ipv6 = addressLiteral.substring(5, index - 1); groupMax = 6; } } else { // It must be an attempt at pure IPv6 if (!addressLiteral.substring(0, 5).equals("IPv6:")) return false; ipv6 = addressLiteral.substring(5); groupMax = 8; } matchesIP.clear(); final String[] matches = SPLIT_BY_COLON_PATTERN.split(ipv6); final int index = ipv6.indexOf("::"); if (index == -1) { // We need exactly the right number of groups if (matches.length != groupMax) return false; } else { if (index != ipv6.lastIndexOf("::")) return false; groupMax = index == 0 || index == ipv6.length() - 2 ? groupMax : groupMax - 1; if (matches.length > groupMax) return false; } for (final String string : matches) if (string.length() > 0) { if (string.length() > 4) return false; try { Long.parseLong(string); } catch (final NumberFormatException e) { return false; } } return true; } /** * Checks the local part of the email address. */ private static boolean hasValidLocalPart(final String address) { final Pattern ADDRESS_LOCAL_PATTERN = Pattern .compile("\\.(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*(?![^\\\"]*\\\"))"); final Pattern ESCAPE_CR_LF_NULL_PATTERN = Pattern .compile("(?<!\\\\|^)[\"\\r\\n\\x00](?!$)|\\\\\"$|\"\""); final Pattern CHARS_IN_QUOTED_STR_PATTERN = Pattern .compile("[\\x00-\\x20\\(\\)<>\\[\\]:;@\\\\,\\.\"]"); final Pattern QUOTED_STR_PATTERN = Pattern.compile("^\"(?:.)*\"$", Pattern.DOTALL); final Pattern DBL_BACK_SLASH_PATTERN = Pattern.compile("\\\\\\\\"); final Pattern QUOTED_STR_FOLDING_WHITE_SPACE_PATTERN = Pattern.compile("(?<!\\\\)" + FOLDING_WHITE_SPACE); final int atIndex = address.lastIndexOf('@'); final String localPart = address.substring(0, atIndex); String[] dotArray = ADDRESS_LOCAL_PATTERN.split(localPart, -1); int partLength = 0; for (String element : dotArray) { // Remove any leading or trailing folding white space element = LEAD_TRAIL_FOLDING_WHITE_SPACE_PATTERN.matcher(element).replaceAll(EMPTY_STR); if (element.length() == 0) return false; /* * Can't have empty element (consecutive dots or dots at the start * or end) We need to remove any valid comments (i.e. those at the * start or end of the element) */ if (element.charAt(0) == '(') { final int indexBrace = element.indexOf(')'); if (indexBrace != -1) { if (indexBrace > 1 && ILLEGAL_COMMENT_CHAR_OPEN_P_PATTERN.matcher( element.substring(1, indexBrace - 1)).find()) return false; // Illegal characters in comment element = element.substring(indexBrace + 1); } } if (element.charAt(element.length() - 1) == ')') { final int indexBrace = element.lastIndexOf('('); if (indexBrace != -1) { if (ILLEGAL_COMMENT_CHAR_CLOSE_P_PATTERN.matcher( element.substring(indexBrace + 1, element.length() - 1)).find()) return false; // Illegal characters in comment element = element.substring(0, indexBrace); } } /* * Remove any leading or trailing FWS around the element (inside any * comments) */ element = LEAD_TRAIL_FOLDING_WHITE_SPACE_PATTERN.matcher(element).replaceAll(EMPTY_STR); // What's left counts towards the maximum length for this part if (partLength > 0) partLength++; // for the dot // Each dot-delimited component can be an atom or a quoted string partLength += element.length(); if (QUOTED_STR_PATTERN.matcher(element).find()) { // Quoted-string tests: // // Remove any FWS element = QUOTED_STR_FOLDING_WHITE_SPACE_PATTERN.matcher(element).replaceAll( EMPTY_STR); // So remove all \\ from the string first... element = DBL_BACK_SLASH_PATTERN.matcher(element).replaceAll(" "); // ", CR, LF and NUL must be escaped, "" is too short if (ESCAPE_CR_LF_NULL_PATTERN.matcher(element).find()) return false; } else { /* * Period (".") may...appear, but may not be used to start or * end the local part, nor may two or more consecutive periods * appear. A zero-length element implies a period at the * beginning or end of the local part, or two periods together. * Either way it's not allowed. */ if (element.isEmpty()) return false; // Dots in wrong place /* * Any ASCII graphic (printing) character other than the at-sign * ("@"), backslash, double quote, comma, or square brackets may * appear without quoting. If any of that list of excluded * characters are to appear, they must be quoted Any excluded * characters? i.e. 0x00-0x20, (, ), <, >, [, ], :, ;, @, \, * comma, period, " */ if (CHARS_IN_QUOTED_STR_PATTERN.matcher(element).find()) return false; // Characters must be in a quoted string } } if (partLength > LOCAL_PART_LENGTH) return false; return true; } /** * Checks the given email address for the following: <br/> * <br/> * The maximum total length of a reverse-path or forward-path <br/> * Email addresses must consist of a "local part" separated from a * "domain part" (a fully-qualified domain name) by an at-sign ("@"). */ private static boolean hasValidLengthAndParts(String address) { boolean isValid = false; if (address.length() <= ADDRESS_LENGTH) { final int atIndex = address.lastIndexOf('@'); if (atIndex > 0 && atIndex != address.length() - 1) isValid = true; } return isValid; } /** * Sanitize comments: <br/> * <br/> * Removes nested comments, quotes and dots in comments <br/> * Removes parentheses and dots from quoted strings */ private static String escapeAndReplaceCharacters(String address) { int braceDepth = 0; boolean inQuote = false; boolean escapeThisChar = false; StringBuilder builder = new StringBuilder(address); for (int i = 0; i < address.length(); ++i) { final char chr = address.charAt(i); boolean replaceChar = false; // Escape the next character? if (chr == '\\') escapeThisChar = !escapeThisChar; else { switch (chr) { case '(': if (escapeThisChar) replaceChar = true; else if (inQuote) replaceChar = true; else // Increment brace depth if (braceDepth++ > 0) replaceChar = true; break; case ')': if (escapeThisChar) replaceChar = true; else if (inQuote) replaceChar = true; else { // Decrement brace depth if (--braceDepth > 0) replaceChar = true; if (braceDepth < 0) braceDepth = 0; } break; case '"': if (escapeThisChar) replaceChar = true; else if (braceDepth == 0) // Are we inside a quoted string? inQuote = !inQuote; else replaceChar = true; break; case '.': // Dots don't help either if (escapeThisChar) replaceChar = true; else if (braceDepth > 0) replaceChar = true; break; default: } escapeThisChar = false; if (replaceChar) // Replace the offending character with something harmless builder = builder.replace(i, i + 1, "x"); } } return builder.toString(); } }