// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.indicator.userdefine.email;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.Socket;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.naming.Context;
import javax.naming.NamingEnumeration;
import javax.naming.NamingException;
import javax.naming.directory.Attribute;
import javax.naming.directory.Attributes;
import javax.naming.directory.DirContext;
import javax.naming.directory.InitialDirContext;
import org.apache.log4j.Logger;
import org.eclipse.emf.common.util.EList;
import org.talend.dataquality.domain.Domain;
import org.talend.dataquality.domain.JavaUDIIndicatorParameter;
import org.talend.dataquality.indicators.IndicatorParameters;
import org.talend.dataquality.indicators.sql.impl.UserDefIndicatorImpl;
/**
* created by mzhao on 2012-8-27 JUDI for email validation.
*
*/
public class EMailValidationIndicator extends UserDefIndicatorImpl {
private static Logger log = Logger.getLogger(EMailValidationIndicator.class);
private static final String EMAIL_PARAM = "EMAIL"; //$NON-NLS-1$
private static final String INVALID_PARAM = "INVALID DATA FILE"; //$NON-NLS-1$
private static final String BUFFER_SIZE_PARAM = "BUFFER SIZE"; //$NON-NLS-1$
private static final String NAMING_PARAM = "java.naming.provider.url";//$NON-NLS-1$
private DirContext ictx = null;
private String emailAddress = null;
/**
* Sets the emailAddress. For test only!
*
* @param emailAddress the sender email address to set
*/
void setEmailAddress(String emailAddress) {
this.emailAddress = emailAddress;
}
private boolean storeInvalidData = false;
private FileOutputStream os = null;
private StringBuffer tempInvalidData = null;
private int buffSize = 200; // default value set to 200
private static final String HEADER = "Email Indicator - "; //$NON-NLS-1$
private static final Pattern EMAIL_PATTERN = java.util.regex.Pattern
.compile("^[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,4}$"); //$NON-NLS-1$
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.IndicatorImpl#handle(java.lang.Object)
*/
@Override
public boolean handle(Object data) {
count++;
if (data == null || data.toString().trim().equals("")) { //$NON-NLS-1$
// Invalid email domain.
return false;
}
boolean isValid = isAddressValid(data.toString().trim());
if (isValid) {
matchingValueCount++;
} else if (storeInvalidData) {
storeDataInFile(data);
}
return true;
}
private void storeDataInFile(Object data) {
try {
this.tempInvalidData.append(data).append("\n"); //$NON-NLS-1$
// flush into file
if (count % this.buffSize == 0) {
this.os.write(this.tempInvalidData.toString().getBytes());
this.tempInvalidData = new StringBuffer();
}
} catch (IOException e) {
log.error(e, e);
}
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.IndicatorImpl#finalizeComputation()
*/
@Override
public boolean finalizeComputation() {
// compute non matching value
this.notMatchingValueCount = count - matchingValueCount;
if (this.os != null) {
try {
this.os.write(this.tempInvalidData.toString().getBytes());
this.os.close();
} catch (IOException e) {
log.error(e, e);
}
}
return true;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.IndicatorImpl#reset()
*/
@Override
public boolean reset() {
boolean retValue = super.reset();
matchingValueCount = new Long(0L);
// Prepare naming directory context.
Hashtable<String, String> env = new Hashtable<String, String>();
env.put("java.naming.factory.initial", "com.sun.jndi.dns.DnsContextFactory"); //$NON-NLS-1$ //$NON-NLS-2$
// if the user add the paramter for: java.naming.provider.url, if has then add it to env
// Added TDQ-6918 Allow user add parameter: java.naming.provider.url
String dnsUrl = getDNSUrl();
if (dnsUrl != null) {
env.put(Context.PROVIDER_URL, dnsUrl);
}// ~
try {
ictx = new InitialDirContext(env);
} catch (NamingException e) {
log.error("Invalid DNS in the user defined indicator: " + this.getName(), e); //$NON-NLS-1$
retValue = false;
}
retValue = retValue && this.initParameters();
return retValue;
}
/**
* Check: if the user add the paramter for: java.naming.provider.url if the parameter with this name is added,
* return its value.
*
* @return string: if has the related parameter null: no such parameter
*/
private String getDNSUrl() {
IndicatorParameters param = this.getParameters();
if (param != null) {
Domain indicatorValidDomain = param.getIndicatorValidDomain();
if (indicatorValidDomain != null) {
EList<JavaUDIIndicatorParameter> javaUDIIndicatorParameter = indicatorValidDomain.getJavaUDIIndicatorParameter();
for (JavaUDIIndicatorParameter p : javaUDIIndicatorParameter) {
if (NAMING_PARAM.equalsIgnoreCase(p.getKey())) {
return p.getValue();
}
}
}
}
return null;
}
boolean initParameters() {
// Check prerequisite
IndicatorParameters param = this.getParameters();
if (param == null) {
log.error("No parameter set in the user defined indicator " + this.getName()); //$NON-NLS-1$
return false;
}
Domain indicatorValidDomain = param.getIndicatorValidDomain();
if (indicatorValidDomain == null) {
log.error("No parameter set in the user defined indicator " + this.getName()); //$NON-NLS-1$
return false;
}
// else retrieve email from parameter
EList<JavaUDIIndicatorParameter> javaUDIIndicatorParameter = indicatorValidDomain.getJavaUDIIndicatorParameter();
for (JavaUDIIndicatorParameter p : javaUDIIndicatorParameter) {
if (EMAIL_PARAM.equalsIgnoreCase(p.getKey())) {
this.emailAddress = p.getValue();
} else if (INVALID_PARAM.equalsIgnoreCase(p.getKey())) {
this.storeInvalidData = true;
// TODO add more checks on the file
try {
this.tempInvalidData = new StringBuffer();
this.os = new FileOutputStream(new File(p.getValue()));
} catch (FileNotFoundException e) {
log.error("Invalid file path in the user defined indicator: " + this.getName(), e); //$NON-NLS-1$
return false;
}
} else if (BUFFER_SIZE_PARAM.equalsIgnoreCase(p.getKey())) {
try {
this.buffSize = Integer.valueOf(p.getValue());
} catch (Exception e) {
log.error("Invalid buffer size: " + p.getValue(), e); //$NON-NLS-1$
return false;
}
} else {
// log warn but keep running (don't return false)
if (!NAMING_PARAM.equalsIgnoreCase(p.getKey())) {
log.warn("Unknown parameter given to UDI: " + this.getName() + ": " + p.getKey() + " = " + p.getValue()); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
}
}
if (!this.isAddressValid(emailAddress)) {
log.error("Invalid sender email set in parameters of the user defined indicator \"" + this.getName() + "\": " //$NON-NLS-1$ //$NON-NLS-2$
+ emailAddress);
return false;
}
return true;
}
/**
*
* Get response status's code, 250 means OK, queuing for node node started. Requested mail action okay, completed.
* See more details at http://email.about.com/cs/standards/a/smtp_error_code_2.htm
*
* @param in
* @return
* @throws IOException
*/
private int getResponse(BufferedReader in) throws IOException {
String line = null;
int res = 0;
while ((line = in.readLine()) != null) {
String pfx = line.substring(0, 3);
try {
res = Integer.parseInt(pfx);
} catch (Exception ex) {
res = -1;
}
if (line.charAt(3) != '-') {
break;
}
}
return res;
}
/**
*
* Write the text ot buffer.
*
* @param wr
* @param text
* @throws IOException
*/
private void write(BufferedWriter wr, String text) throws IOException {
wr.write(text + "\r\n"); //$NON-NLS-1$
wr.flush();
}
private List<String> getMX(String hostName) throws NamingException {
// Perform a DNS lookup for MX records in the domain
Attributes attrs = ictx.getAttributes(hostName, new String[] { "MX" }); //$NON-NLS-1$
Attribute attr = attrs.get("MX"); //$NON-NLS-1$
List<String> res = new ArrayList<String>();
// if we don't have an MX record, try the machine itself
if ((attr == null) || (attr.size() == 0)) {
attrs = ictx.getAttributes(hostName, new String[] { "A" }); //$NON-NLS-1$
attr = attrs.get("A"); //$NON-NLS-1$
if (attr == null) {
if (log.isInfoEnabled()) {
log.info(HEADER + "No match for hostname '" + hostName + "'"); //$NON-NLS-1$ //$NON-NLS-2$
}
return res;
}
}
// we have machines to try. Return them as an array list
NamingEnumeration<?> en = attr.getAll();
Map<Integer, String> map = new TreeMap<Integer, String>();
while (en.hasMore()) {
String mailhost;
String x = (String) en.next();
String f[] = x.split(" "); //$NON-NLS-1$
Integer key = 0;
if (f.length == 1) {
mailhost = f[0];
} else if (f[1].endsWith(".")) { //$NON-NLS-1$
mailhost = f[1].substring(0, (f[1].length() - 1));
key = Integer.valueOf(f[0]);
} else {
mailhost = f[1];
key = Integer.valueOf(f[0]);
}
map.put(key, mailhost);
}
// NOTE: We SHOULD take the preference into account to be absolutely
// correct.
Iterator<Integer> keyInterator = map.keySet().iterator();
while (keyInterator.hasNext()) {
res.add(map.get(keyInterator.next()));
}
return res;
}
boolean isAddressValid(String address) {
if (address == null) {
return false;
}
// Find the separator for the domain name
int pos = address.indexOf('@');
// If the address does not contain an '@', it's not valid
if (pos == -1) {
return false;
}
// check loose email regex
final Matcher matcher = EMAIL_PATTERN.matcher(address);
if (!matcher.find()) {
if (log.isInfoEnabled()) {
log.info(HEADER + "Invalid email syntax for " + address); //$NON-NLS-1$
}
return false;
}
// Isolate the domain/machine name and get a list of mail exchangers
String domain = address.substring(++pos);
List<String> mxList = null;
try {
mxList = getMX(domain);
} catch (NamingException ex) {
return false;
}
// Just because we can send mail to the domain, doesn't mean that the
// address is valid, but if we can't, it's a sure sign that it isn't
if (mxList.size() == 0) {
return false;
}
// Now, do the SMTP validation, try each mail exchanger until we get
// a positive acceptance. It *MAY* be possible for one MX to allow
// a message [store and forwarder for example] and another [like
// the actual mail server] to reject it. This is why we REALLY ought
// to take the preference into account.
for (int mx = 0; mx < mxList.size(); mx++) {
try {
int res;
Socket skt = new Socket(mxList.get(mx), 25);
BufferedReader rdr = new BufferedReader(new InputStreamReader(skt.getInputStream()));
BufferedWriter wtr = new BufferedWriter(new OutputStreamWriter(skt.getOutputStream()));
res = getResponse(rdr);
if (res != 220) { // SMTP Service ready.
if (log.isInfoEnabled()) {
log.info(HEADER + "Invalid header:" + mxList.get(mx)); //$NON-NLS-1$
}
return false;
}
write(wtr, "EHLO " + this.emailAddress.substring(emailAddress.indexOf("@") + 1)); //$NON-NLS-1$ //$NON-NLS-2$
res = getResponse(rdr);
if (res != 250) {
if (log.isInfoEnabled()) {
log.info(HEADER + "Not ESMTP: " + this.emailAddress.substring(emailAddress.indexOf("@") + 1)); //$NON-NLS-1$ //$NON-NLS-2$
}
return false;
}
// validate the sender address
write(wtr, "MAIL FROM: <" + this.emailAddress + ">"); //$NON-NLS-1$//$NON-NLS-2$
res = getResponse(rdr);
if (res != 250) {
if (log.isInfoEnabled()) {
log.info(HEADER + "Sender rejected: " + this.emailAddress); //$NON-NLS-1$
}
return false;
}
write(wtr, "RCPT TO: <" + address + ">"); //$NON-NLS-1$//$NON-NLS-2$
res = getResponse(rdr);
// be polite
write(wtr, "RSET"); //$NON-NLS-1$
getResponse(rdr);
write(wtr, "QUIT"); //$NON-NLS-1$
getResponse(rdr);
if (res != 250) {
if (log.isInfoEnabled()) {
log.info(HEADER + "Address is not valid: " + address); //$NON-NLS-1$
}
return false;
}
rdr.close();
wtr.close();
skt.close();
return true;
} catch (Throwable e) {
// Do nothing but try next host
if (log.isDebugEnabled()) {
log.debug("Connection to " + mxList.get(mx) + " failed.", e); //$NON-NLS-1$ //$NON-NLS-2$
}
continue;
}
}
return false;
}
}