/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.mailinput; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.Enumeration; import java.util.List; import javax.mail.Header; import javax.mail.Message; import org.apache.commons.collections.iterators.ArrayIterator; import org.apache.commons.lang.StringUtils; import org.pentaho.di.core.Const; import org.pentaho.di.core.util.Utils; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.row.RowDataUtil; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.job.entries.getpop.MailConnection; import org.pentaho.di.job.entries.getpop.MailConnectionMeta; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.step.BaseStep; import org.pentaho.di.trans.step.StepDataInterface; import org.pentaho.di.trans.step.StepInterface; import org.pentaho.di.trans.step.StepMeta; import org.pentaho.di.trans.step.StepMetaInterface; /** * Read data from POP3/IMAP server and input data to the next steps. * * @author Samatar * @since 21-08-2009 */ public class MailInput extends BaseStep implements StepInterface { private static Class<?> PKG = MailInputMeta.class; // for i18n purposes, needed by Translator2!! private MailInputMeta meta; private MailInputData data; private MessageParser instance = new MessageParser(); public MailInput( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, Trans trans ) { super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); } public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { meta = (MailInputMeta) smi; data = (MailInputData) sdi; Object[] outputRowData = getOneRow(); if ( outputRowData == null ) { // no more input to be expected... setOutputDone(); return false; } if ( isRowLevel() ) { log.logRowlevel( toString(), BaseMessages.getString( PKG, "MailInput.Log.OutputRow", data.outputRowMeta .getString( outputRowData ) ) ); } putRow( data.outputRowMeta, outputRowData ); // copy row to output rowset(s); if ( data.rowlimit > 0 && data.rownr >= data.rowlimit ) { // limit has been reached: stop now. setOutputDone(); return false; } return true; } public String[] getFolders( String realIMAPFolder ) throws KettleException { data.folderenr = 0; data.messagesCount = 0; data.rownr = 0; String[] folderslist = null; if ( meta.isIncludeSubFolders() ) { String[] folderslist0 = data.mailConn.returnAllFolders( realIMAPFolder ); if ( folderslist0 == null || folderslist0.length == 0 ) { // mstor's default folder has no name folderslist = data.mailConn.getProtocol() == MailConnectionMeta.PROTOCOL_MBOX ? new String[] { "" } : new String[] { Const.NVL( realIMAPFolder, MailConnectionMeta.INBOX_FOLDER ) }; } else { folderslist = new String[folderslist0.length + 1]; folderslist[0] = Const.NVL( realIMAPFolder, MailConnectionMeta.INBOX_FOLDER ); for ( int i = 0; i < folderslist0.length; i++ ) { folderslist[i + 1] = folderslist0[i]; } } } else { folderslist = data.mailConn.getProtocol() == MailConnectionMeta.PROTOCOL_MBOX ? new String[] { "" } : new String[] { Const.NVL( realIMAPFolder, MailConnectionMeta.INBOX_FOLDER ) }; } return folderslist; } private void applySearch( Date beginDate, Date endDate ) { // apply search term? String realSearchSender = environmentSubstitute( meta.getSenderSearchTerm() ); if ( !Utils.isEmpty( realSearchSender ) ) { // apply FROM data.mailConn.setSenderTerm( realSearchSender, meta.isNotTermSenderSearch() ); } String realSearchReceipient = environmentSubstitute( meta.getRecipientSearch() ); if ( !Utils.isEmpty( realSearchReceipient ) ) { // apply TO data.mailConn.setReceipientTerm( realSearchReceipient ); } String realSearchSubject = environmentSubstitute( meta.getSubjectSearch() ); if ( !Utils.isEmpty( realSearchSubject ) ) { // apply Subject data.mailConn.setSubjectTerm( realSearchSubject, meta.isNotTermSubjectSearch() ); } // Received Date switch ( meta.getConditionOnReceivedDate() ) { case MailConnectionMeta.CONDITION_DATE_EQUAL: data.mailConn.setReceivedDateTermEQ( beginDate ); break; case MailConnectionMeta.CONDITION_DATE_GREATER: data.mailConn.setReceivedDateTermGT( beginDate ); break; case MailConnectionMeta.CONDITION_DATE_SMALLER: data.mailConn.setReceivedDateTermLT( beginDate ); break; case MailConnectionMeta.CONDITION_DATE_BETWEEN: data.mailConn.setReceivedDateTermBetween( beginDate, endDate ); break; default: break; } // set FlagTerm? if ( !data.usePOP ) { //POP3 does not support any flags. //but still use ones for IMAP and maybe for MBOX? switch ( meta.getValueImapList() ) { case MailConnectionMeta.VALUE_IMAP_LIST_NEW: data.mailConn.setFlagTermNew(); break; case MailConnectionMeta.VALUE_IMAP_LIST_OLD: data.mailConn.setFlagTermOld(); break; case MailConnectionMeta.VALUE_IMAP_LIST_READ: data.mailConn.setFlagTermRead(); break; case MailConnectionMeta.VALUE_IMAP_LIST_UNREAD: data.mailConn.setFlagTermUnread(); break; case MailConnectionMeta.VALUE_IMAP_LIST_FLAGGED: data.mailConn.setFlagTermFlagged(); break; case MailConnectionMeta.VALUE_IMAP_LIST_NOT_FLAGGED: data.mailConn.setFlagTermNotFlagged(); break; case MailConnectionMeta.VALUE_IMAP_LIST_DRAFT: data.mailConn.setFlagTermDraft(); break; case MailConnectionMeta.VALUE_IMAP_LIST_NOT_DRAFT: data.mailConn.setFlagTermNotDraft(); break; default: break; } } } /** * Build an empty row based on the meta-data... * * @return */ private Object[] buildEmptyRow() { Object[] rowData = RowDataUtil.allocateRowData( data.outputRowMeta.size() ); return rowData; } private boolean isFolderExausted() { return data.folder == null || !data.folderIterator.hasNext(); } private Object[] getOneRow() throws KettleException { while ( isFolderExausted() ) { if ( !openNextFolder() ) { return null; } } Object[] r = buildEmptyRow(); if ( meta.isDynamicFolder() ) { System.arraycopy( data.readrow, 0, r, 0, data.readrow.length ); } try { Message message = data.folderIterator.next(); if ( isDebug() ) { logDebug( BaseMessages.getString( PKG, "MailInput.Log.FetchingMessage", message.getMessageNumber() ) ); } try { instance.parseToArray( r, message ); } catch ( Exception e ) { String msg = e.getMessage(); if ( meta.isStopOnError() ) { throw new KettleException( msg, e ); } else { logError( msg, e ); } } incrementLinesInput(); data.rownr++; } catch ( Exception e ) { throw new KettleException( "Error adding values to row!", e ); } return r; } @SuppressWarnings( "unchecked" ) private boolean openNextFolder() { try { if ( !meta.isDynamicFolder() ) { // static folders list // let's check if we fetched all values in list if ( data.folderenr >= data.folders.length ) { // We have fetched all folders if ( isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "MailInput.Log.FinishedProcessing" ) ); } return false; } } else { // dynamic folders if ( first ) { first = false; data.readrow = getRow(); // Get row from input rowset & set row busy! if ( data.readrow == null ) { if ( isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "MailInput.Log.FinishedProcessing" ) ); } return false; } data.inputRowMeta = getInputRowMeta(); data.outputRowMeta = data.inputRowMeta.clone(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // Get total previous fields data.totalpreviousfields = data.inputRowMeta.size(); if ( Utils.isEmpty( meta.getFolderField() ) ) { logError( BaseMessages.getString( PKG, "MailInput.Error.DynamicFolderFieldMissing" ) ); stopAll(); setErrors( 1 ); return false; } data.indexOfFolderField = data.inputRowMeta.indexOfValue( meta.getFolderField() ); if ( data.indexOfFolderField < 0 ) { logError( BaseMessages.getString( PKG, "MailInput.Error.DynamicFolderUnreachable", meta .getFolderField() ) ); stopAll(); setErrors( 1 ); return false; } // get folder String foldername = data.inputRowMeta.getString( data.readrow, data.indexOfFolderField ); if ( isDebug() ) { logDebug( BaseMessages.getString( PKG, "MailInput.Log.FoldernameInStream", meta.getFolderField(), foldername ) ); } data.folders = getFolders( foldername ); } // end if first if ( data.folderenr >= data.folders.length ) { // we have fetched all values for input row // grab another row data.readrow = getRow(); // Get row from input rowset & set row busy! if ( data.readrow == null ) { if ( isDetailed() ) { logDetailed( BaseMessages.getString( PKG, "MailInput.Log.FinishedProcessing" ) ); } return false; } // get folder String foldername = data.inputRowMeta.getString( data.readrow, data.indexOfFolderField ); data.folders = getFolders( foldername ); } } data.start = parseIntWithSubstitute( meta.getStart() ); data.end = parseIntWithSubstitute( meta.getEnd() ); // Get the current folder data.folder = data.folders[data.folderenr]; // Move folder pointer ahead! data.folderenr++; // open folder if ( !data.usePOP && !Utils.isEmpty( data.folder ) ) { data.mailConn.openFolder( data.folder, false ); } else { data.mailConn.openFolder( false ); } if ( meta.useBatch() || ( !Utils.isEmpty( environmentSubstitute( meta.getFirstMails() ) ) && Integer.parseInt( environmentSubstitute( meta.getFirstMails() ) ) > 0 ) ) { // get data by pieces Integer batchSize = meta.useBatch() ? meta.getBatchSize() : Integer.parseInt( environmentSubstitute( meta.getFirstMails() ) ); Integer start = meta.useBatch() ? data.start : 1; Integer end = meta.useBatch() ? data.end : batchSize; data.folderIterator = new BatchFolderIterator( data.mailConn.getFolder(), batchSize, start, end ); // TODO:args if ( data.mailConn.getSearchTerm() != null ) { // add search filter data.folderIterator = new SearchEnabledFolderIterator( data.folderIterator, data.mailConn.getSearchTerm() ); } } else { // fetch all data.mailConn.retrieveMessages(); data.folderIterator = new ArrayIterator( data.mailConn.getMessages() ); } if ( isDebug() ) { logDebug( BaseMessages.getString( PKG, "MailInput.Log.MessagesInFolder", data.folder, data.messagesCount ) ); } } catch ( Exception e ) { logError( "Error opening folder " + data.folderenr + " " + data.folder + ": " + e.toString() ); logError( Const.getStackTracker( e ) ); stopAll(); setErrors( 1 ); return false; } return true; } public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { meta = (MailInputMeta) smi; data = (MailInputData) sdi; if ( !super.init( smi, sdi ) ) { return false; } if ( !meta.isDynamicFolder() ) { try { // Create the output row meta-data data.outputRowMeta = new RowMeta(); meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore ); // get the // metadata // populated } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "MailInput.ErrorInit", e.toString() ) ); logError( Const.getStackTracker( e ) ); return false; } } data.usePOP = meta.getProtocol().equals( MailConnectionMeta.PROTOCOL_STRING_POP3 ); String realserver = environmentSubstitute( meta.getServerName() ); if ( meta.getProtocol().equals( MailConnectionMeta.PROTOCOL_STRING_MBOX ) && StringUtils.startsWith( realserver, "file://" ) ) { realserver = StringUtils.remove( realserver, "file://" ); } String realusername = environmentSubstitute( meta.getUserName() ); String realpassword = Utils.resolvePassword( variables, meta.getPassword() ); int realport = Const.toInt( environmentSubstitute( meta.getPort() ), -1 ); String realProxyUsername = environmentSubstitute( meta.getProxyUsername() ); if ( !meta.isDynamicFolder() ) { //Limit field has absolute priority String reallimitrow = environmentSubstitute( meta.getRowLimit() ); int limit = Const.toInt( reallimitrow, 0 ); //Limit field has absolute priority if ( limit == 0 ) { limit = getReadFirst( meta.getProtocol() ); } data.rowlimit = limit; } Date beginDate = null; Date endDate = null; SimpleDateFormat df = new SimpleDateFormat( MailInputMeta.DATE_PATTERN ); // check search terms // Received Date try { switch ( meta.getConditionOnReceivedDate() ) { case MailConnectionMeta.CONDITION_DATE_EQUAL: case MailConnectionMeta.CONDITION_DATE_GREATER: case MailConnectionMeta.CONDITION_DATE_SMALLER: String realBeginDate = environmentSubstitute( meta.getReceivedDate1() ); if ( Utils.isEmpty( realBeginDate ) ) { throw new KettleException( BaseMessages.getString( PKG, "MailInput.Error.ReceivedDateSearchTermEmpty" ) ); } beginDate = df.parse( realBeginDate ); break; case MailConnectionMeta.CONDITION_DATE_BETWEEN: realBeginDate = environmentSubstitute( meta.getReceivedDate1() ); if ( Utils.isEmpty( realBeginDate ) ) { throw new KettleException( BaseMessages.getString( PKG, "MailInput.Error.ReceivedDatesSearchTermEmpty" ) ); } beginDate = df.parse( realBeginDate ); String realEndDate = environmentSubstitute( meta.getReceivedDate2() ); if ( Utils.isEmpty( realEndDate ) ) { throw new KettleException( BaseMessages.getString( PKG, "MailInput.Error.ReceivedDatesSearchTermEmpty" ) ); } endDate = df.parse( realEndDate ); break; default: break; } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "MailInput.Error.SettingSearchTerms", e.getMessage() ) ); setErrors( 1 ); stopAll(); } try { // create a mail connection object data.mailConn = new MailConnection( log, MailConnectionMeta.getProtocolFromString( meta.getProtocol(), MailConnectionMeta.PROTOCOL_IMAP ), realserver, realport, realusername, realpassword, meta.isUseSSL(), meta.isUseProxy(), realProxyUsername ); // connect data.mailConn.connect(); // Need to apply search filters? applySearch( beginDate, endDate ); if ( !meta.isDynamicFolder() ) { // pass static folder name String realIMAPFolder = environmentSubstitute( meta.getIMAPFolder() ); // return folders list // including sub folders if necessary data.folders = getFolders( realIMAPFolder ); } } catch ( Exception e ) { logError( BaseMessages.getString( PKG, "MailInput.Error.OpeningConnection", e.getMessage() ) ); setErrors( 1 ); stopAll(); } data.nrFields = meta.getInputFields() != null ? meta.getInputFields().length : 0; return true; } private int getReadFirst( String protocol ) { if ( protocol.equals( MailConnectionMeta.PROTOCOL_STRING_POP3 ) ) { return Const.toInt( meta.getFirstMails(), 0 ); } if ( protocol.equals( MailConnectionMeta.PROTOCOL_STRING_IMAP ) ) { return Const.toInt( meta.getFirstIMAPMails(), 0 ); } //and we do not have this option for MBOX on UI. return 0; } public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { meta = (MailInputMeta) smi; data = (MailInputData) sdi; if ( data.mailConn != null ) { try { data.mailConn.disconnect(); data.mailConn = null; } catch ( Exception e ) { /* Ignore */ } } super.dispose( smi, sdi ); } private Integer parseIntWithSubstitute( String toParse ) { toParse = environmentSubstitute( toParse ); if ( !StringUtils.isEmpty( toParse ) ) { try { return Integer.parseInt( toParse ); } catch ( NumberFormatException e ) { log.logError( e.getLocalizedMessage() ); } } return null; } /** * Extracted message parse algorithm to be able to unit test separately * */ class MessageParser { Object[] parseToArray( Object[] r, Message message ) throws Exception { // Execute for each Input field... for ( int i = 0; i < data.nrFields; i++ ) { int index = data.totalpreviousfields + i; try { switch ( meta.getInputFields()[i].getColumn() ) { case MailInputField.COLUMN_MESSAGE_NR: r[index] = new Long( message.getMessageNumber() ); break; case MailInputField.COLUMN_SUBJECT: r[index] = message.getSubject(); break; case MailInputField.COLUMN_SENDER: r[index] = StringUtils.join( message.getFrom(), ";" ); break; case MailInputField.COLUMN_REPLY_TO: r[index] = StringUtils.join( message.getReplyTo(), ";" ); break; case MailInputField.COLUMN_RECIPIENTS: r[index] = StringUtils.join( message.getAllRecipients(), ";" ); break; case MailInputField.COLUMN_DESCRIPTION: r[index] = message.getDescription(); break; case MailInputField.COLUMN_BODY: r[index] = data.mailConn.getMessageBody( message ); break; case MailInputField.COLUMN_RECEIVED_DATE: Date receivedDate = message.getReceivedDate(); r[index] = receivedDate != null ? new Date( receivedDate.getTime() ) : null; break; case MailInputField.COLUMN_SENT_DATE: Date sentDate = message.getSentDate(); r[index] = sentDate != null ? new Date( sentDate.getTime() ) : null; break; case MailInputField.COLUMN_CONTENT_TYPE: r[index] = message.getContentType(); break; case MailInputField.COLUMN_FOLDER_NAME: r[index] = data.mailConn.getFolderName(); break; case MailInputField.COLUMN_SIZE: r[index] = new Long( message.getSize() ); break; case MailInputField.COLUMN_FLAG_DRAFT: r[index] = new Boolean( data.mailConn.isMessageDraft( message ) ); break; case MailInputField.COLUMN_FLAG_FLAGGED: r[index] = new Boolean( data.mailConn.isMessageFlagged( message ) ); break; case MailInputField.COLUMN_FLAG_NEW: r[index] = new Boolean( data.mailConn.isMessageNew( message ) ); break; case MailInputField.COLUMN_FLAG_READ: r[index] = new Boolean( data.mailConn.isMessageRead( message ) ); break; case MailInputField.COLUMN_FLAG_DELETED: r[index] = new Boolean( data.mailConn.isMessageDeleted( message ) ); break; case MailInputField.COLUMN_ATTACHED_FILES_COUNT: r[index] = new Long( data.mailConn.getAttachedFilesCount( message, null ) ); break; case MailInputField.COLUMN_HEADER: String name = meta.getInputFields()[i].getName(); // *only one name String[] arr = { name }; // this code was before generic epoch Enumeration<?> en = message.getMatchingHeaders( arr ); if ( en == null ) { r[index] = ""; break; } List<String> headers = new ArrayList<String>(); while ( en.hasMoreElements() ) { Header next = Header.class.cast( en.nextElement() ); headers.add( next.getValue() ); } // [PDI-6532] if there is no matching headers return empty String r[index] = headers.isEmpty() ? "" : StringUtils.join( headers, ";" ); break; case MailInputField.COLUMN_BODY_CONTENT_TYPE: r[index] = data.mailConn.getMessageBodyContentType( message ); break; default: break; } } catch ( Exception e ) { String errMsg = "Error adding value for field " + meta.getInputFields()[i].getName(); throw new Exception( errMsg, e ); } } return r; } } }