package er.extensions.net; import java.io.Serializable; import java.util.Hashtable; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.regex.Pattern; import javax.naming.NameNotFoundException; import javax.naming.NamingException; import javax.naming.directory.Attribute; import javax.naming.directory.Attributes; import javax.naming.directory.DirContext; import javax.naming.directory.InitialDirContext; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.webobjects.foundation.NSForwardException; import er.extensions.foundation.ERXValueUtilities; /** * Email validation class inspired by <a * href="http://leshazlewood.com/2006/11/06/emailaddress-java-class/">Les * Hazlewood's email validator.</a> This class is immutable and thread safe. * * @author Les Hazlewood (regular expressions) * @author Ramsey Gurley (threaded domain validation) */ public final class ERXEmailValidator implements Serializable { /** * Do I need to update serialVersionUID? See section 5.6 <cite>Type Changes * Affecting Serialization</cite> on page 51 of the <a * href="http://java.sun.com/j2se/1.4/pdf/serial-spec.pdf">Java Object * Serialization Spec</a> */ private static final long serialVersionUID = 1L; private static final Logger log = LoggerFactory.getLogger(ERXEmailValidator.class); // RFC 2822 2.2.2 Structured Header Field Bodies private static final String wsp = "[ \\t]"; // space or tab private static final String fwsp = wsp + "*"; // RFC 2822 3.2.1 Primitive tokens private static final String dquote = "\\\""; // ASCII Control characters excluding white space: private static final String noWsCtl = "\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x7F"; // all ASCII characters except CR and LF: private static final String asciiText = "[\\x01-\\x09\\x0B\\x0C\\x0E-\\x7F]"; // RFC 2822 3.2.2 Quoted characters: // single backslash followed by a text char private static final String quotedPair = "(\\\\" + asciiText + ")"; // RFC 2822 3.2.4 Atom: private static final String atext = "[a-zA-Z0-9\\!\\#\\$\\%\\&\\'\\*\\+\\-\\/\\=\\?\\^\\_\\`\\{\\|\\}\\~]"; private static final String atom = fwsp + atext + "+" + fwsp; private static final String dotAtomText = atext + "+" + "(" + "\\." + atext + "+)*"; private static final String dotAtom = fwsp + "(" + dotAtomText + ")" + fwsp; // RFC 2822 3.2.5 Quoted strings: // noWsCtl and the rest of ASCII except the doublequote and backslash // characters: private static final String qtext = "[" + noWsCtl + "\\x21\\x23-\\x5B\\x5D-\\x7E]"; private static final String qcontent = "(" + qtext + "|" + quotedPair + ")"; private static final String quotedString = dquote + "(" + fwsp + qcontent + ")*" + fwsp + dquote; // RFC 2822 3.2.6 Miscellaneous tokens private static final String word = "((" + atom + ")|(" + quotedString + "))"; private static final String phrase = word + "+"; // one or more words. // RFC 1035 tokens for domain names: private static final String letter = "[a-zA-Z]"; private static final String letDig = "[a-zA-Z0-9]"; private static final String letDigHyp = "[a-zA-Z0-9-]"; private static final String rfcLabel = letDig + "(" + letDigHyp + "{0,61}" + letDig + ")?"; private static final String rfc1035DomainName = rfcLabel + "(\\." + rfcLabel + ")*\\." + letter + "{2,6}"; // RFC 2822 3.4 Address specification // domain text - non white space controls and the rest of ASCII chars not // including [, ], or \: private static final String dtext = "[" + noWsCtl + "\\x21-\\x5A\\x5E-\\x7E]"; private static final String dcontent = dtext + "|" + quotedPair; private static final String domainLiteral = "\\[" + "(" + fwsp + dcontent + "+)*" + fwsp + "\\]"; private static final String rfc2822Domain = "(" + dotAtom + "|" + domainLiteral + ")"; private static final String localPart = "((" + dotAtom + ")|(" + quotedString + "))"; private final String domain; private final String addrSpec; private final String angleAddr; private final String nameAddr; private final String mailbox; private final String patternString; private final Pattern validPattern; /** * This second validator exists because there is an issue with validating * addresses that allowQuotedIdentifiers that have no quoting and a long * mailbox name. Example: blahblahblahblahblahblahblah@blah.com * * It seems that after about 25 chars, the regular expression matching * takes exponentially longer to match the string. The same address with * quoting does not exhibit the problem. * Ex. "Blah blah" <blahblahblahblahblahblahblah@blah.com> * * Nor does using a validator that does not allow quoted identifiers. In * order to work around this problem, a second internal validator is * created when allowQuotedIdentifiers is true. This internal validator * does not allow quoted identifiers. It is tried first and only if it * returns false is the full regular expression used. */ private final ERXEmailValidator _internal; /** * * @param allowQuotedIdentifiers * if true, quoted identifiers are allowed (using quotes and * angle brackets around the raw address) are allowed, e.g.: * "John Smith" <john.smith@somewhere.com> The RFC says * this is a valid mailbox. If you don't want to allow this, * because for example, you only want users to enter in a raw * address (john.smith@somewhere.com - no quotes or angle * brackets), then set this to false. * * @param allowDomainLiterals * if true, domain literals are allowed in the email address, * e.g.: someone@[192.168.1.100] or john.doe@[23:33:A2:22:16:1F] * or me@[my computer] The RFC says these are valid email * addresses, but most people don't like allowing them. If you * don't want to allow them, and only want to allow valid domain * names (RFC 1035, x.y.z.com, etc), set this to false. */ public ERXEmailValidator(boolean allowQuotedIdentifiers, boolean allowDomainLiterals) { domain = allowDomainLiterals ? rfc2822Domain : rfc1035DomainName; addrSpec = localPart + "@" + domain; angleAddr = "<" + addrSpec + ">"; nameAddr = "(" + phrase + ")?" + fwsp + angleAddr; mailbox = nameAddr + "|" + addrSpec; patternString = allowQuotedIdentifiers ? mailbox : addrSpec; validPattern = Pattern.compile(patternString); /* * See javadoc for the _internal ivar */ _internal = allowQuotedIdentifiers?new ERXEmailValidator(false, allowDomainLiterals):null; } /** * Utility method that checks to see if the specified string is a valid * email address according to the * RFC 2822 specification. * * @param email * the email address string to test for validity. * @return true if the given text valid according to RFC 2822, false * otherwise. */ public boolean isValidEmailString(String email) { /* * See javadoc for the _internal ivar */ if(_internal != null && _internal.isValidEmailString(email)) { return true; } return email != null && validPattern.matcher(email).matches(); } /** * The thread pool */ private static final ExecutorService executorService = Executors.newCachedThreadPool(); /** * Callable to actually validate the email domain. */ private static class DomainValidator implements Callable<Boolean> { private final String _hostName; /** * @param hostName * the host name to validate */ DomainValidator(String hostName) { _hostName = hostName; } public Boolean call() { Hashtable env = new Hashtable(); env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory"); try { DirContext ictx = new InitialDirContext(env); Attributes attrs = ictx.getAttributes(_hostName, new String[] { "MX" }); Attribute attr = attrs.get("MX"); return attr != null ? Boolean.TRUE : Boolean.FALSE; } catch (NameNotFoundException e) { return Boolean.FALSE; } catch (NamingException e) { throw NSForwardException._runtimeExceptionForThrowable(e); } } } /** * Checks to see if the hostName is a valid email domain. A timeout is * specified which limits the time spent waiting for the DNS lookup. If the * timeout is exceeded, the method returns null. * * @param hostName * the email hostName * @param timeout * the timeout in milliseconds * @return true if the hostName is valid, false if no hostName or MX record * is found, null if lookup times out * @throws NamingException * * @deprecated this method will throw mysterious NullPointerExceptions if used * in a loop. Evidently, something about the DirContext is not as thread safe * as the javadocs claim. Do not use it. */ public static Boolean isValidDomainString(String hostName, long timeout) { if (timeout < 1) { return null; } DomainValidator domainValidator = new DomainValidator(hostName); Future<Boolean> future = executorService.submit(domainValidator); try { Boolean result = future.get(timeout, TimeUnit.MILLISECONDS); return result; } catch (InterruptedException e) { // This really shouldn't happen log.info("Domain validation thread interrupted."); return null; } catch (ExecutionException e) { // Threw some naming exception? log.warn("Exception thrown validating domain.", e); return null; } catch (TimeoutException e) { // If the future timed out, return null. log.debug("Timeout validating email domain."); return null; } } /** * Convenience method to validate email address string and domain. If a * timeout occurs, the default boolean value is returned. * * @param email * the email string to test * @param timeout * the timeout in milliseconds * @param def * default value if timeout occurs * @return true if the email passes both validations * * @deprecated Deprecated because it relies on {@link ERXEmailValidator#isValidDomainString(String, long)} */ public boolean isValidEmailAddress(String email, long timeout, boolean def) { if (isValidEmailString(email)) { String hostName = hostNameForEmailString(email); Boolean value = ERXEmailValidator.isValidDomainString(hostName, timeout); return ERXValueUtilities.booleanValueWithDefault(value, def); } return false; } /** * Parses the host name from the email string * * @param email * the email address * @return the hostName portion of the email address */ public static String hostNameForEmailString(String email) { String hostName = StringUtils.substringAfterLast(email, "@"); // handle domain literals and quoted identifiers hostName = StringUtils.trimToEmpty(hostName); if(hostName.isEmpty()) { return hostName; } int lastIndex = hostName.length() - 1; if (hostName.lastIndexOf('>') == lastIndex) { hostName = hostName.substring(0, lastIndex); } hostName = StringUtils.trimToEmpty(hostName); lastIndex = hostName.length() - 1; if (hostName.indexOf('[') == 0 && hostName.lastIndexOf(']') == lastIndex) { hostName = hostName.substring(1, lastIndex); } hostName = StringUtils.trimToEmpty(hostName); return hostName; } }