/* $Id$ */ /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.manifoldcf.crawler.connectors.email; import org.apache.commons.lang.StringUtils; import org.apache.manifoldcf.agents.interfaces.RepositoryDocument; import org.apache.manifoldcf.agents.interfaces.ServiceInterruption; import org.apache.manifoldcf.core.interfaces.*; import org.apache.manifoldcf.core.util.URLEncoder; import org.apache.manifoldcf.crawler.interfaces.IExistingVersions; import org.apache.manifoldcf.crawler.interfaces.IProcessActivity; import org.apache.manifoldcf.crawler.interfaces.ISeedingActivity; import org.apache.manifoldcf.crawler.system.Logging; import javax.mail.*; import javax.mail.internet.MimeMessage; import javax.mail.search.*; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This interface describes an instance of a connection between a repository and ManifoldCF's * standard "pull" ingestion agent. * <p/> * Each instance of this interface is used in only one thread at a time. Connection Pooling * on these kinds of objects is performed by the factory which instantiates repository connectors * from symbolic names and config parameters, and is pooled by these parameters. That is, a pooled connector * handle is used only if all the connection parameters for the handle match. * <p/> * Implementers of this interface should provide a default constructor which has this signature: * <p/> * xxx(); * <p/> * Connectors are either configured or not. If configured, they will persist in a pool, and be * reused multiple times. Certain methods of a connector may be called before the connector is * configured. This includes basically all methods that permit inspection of the connector's * capabilities. The complete list is: * <p/> * <p/> * The purpose of the repository connector is to allow documents to be fetched from the repository. * <p/> * Each repository connector describes a set of documents that are known only to that connector. * It therefore establishes a space of document identifiers. Each connector will only ever be * asked to deal with identifiers that have in some way originated from the connector. * <p/> * Documents are fetched in three stages. First, the getDocuments() method is called in the connector * implementation. This returns a set of document identifiers. The document identifiers are used to * obtain the current document version strings in the second stage, using the getDocumentVersions() method. * The last stage is processDocuments(), which queues up any additional documents needed, and also ingests. * This method will not be called if the document version seems to indicate that no document change took * place. */ public class EmailConnector extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector { protected final static long SESSION_EXPIRATION_MILLISECONDS = 300000L; // Local variables. protected long sessionExpiration = -1L; // Parameters for establishing a session protected String server = null; protected String portString = null; protected String username = null; protected String password = null; protected String protocol = null; protected Properties properties = null; protected String urlTemplate = null; protected String attachmentUrlTemplate = null; // Local session handle protected EmailSession session = null; private static Map<String,String> providerMap; static { providerMap = new HashMap<String,String>(); providerMap.put(EmailConfig.PROTOCOL_POP3, EmailConfig.PROTOCOL_POP3_PROVIDER); providerMap.put(EmailConfig.PROTOCOL_POP3S, EmailConfig.PROTOCOL_POP3S_PROVIDER); providerMap.put(EmailConfig.PROTOCOL_IMAP, EmailConfig.PROTOCOL_IMAP_PROVIDER); providerMap.put(EmailConfig.PROTOCOL_IMAPS, EmailConfig.PROTOCOL_IMAPS_PROVIDER); } //////////////////////////////////Start of Basic Connector Methods///////////////////////// /** * Connect. * * @param configParameters is the set of configuration parameters, which * in this case describe the root directory. */ @Override public void connect(ConfigParams configParameters) { super.connect(configParameters); this.server = configParameters.getParameter(EmailConfig.SERVER_PARAM); this.portString = configParameters.getParameter(EmailConfig.PORT_PARAM); this.protocol = configParameters.getParameter(EmailConfig.PROTOCOL_PARAM); this.username = configParameters.getParameter(EmailConfig.USERNAME_PARAM); this.password = configParameters.getObfuscatedParameter(EmailConfig.PASSWORD_PARAM); this.urlTemplate = configParameters.getParameter(EmailConfig.URL_PARAM); this.attachmentUrlTemplate = configParameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM); this.properties = new Properties(); int i = 0; while (i < configParameters.getChildCount()) //In post property set is added as a configuration node { ConfigNode cn = configParameters.getChild(i++); if (cn.getType().equals(EmailConfig.NODE_PROPERTIES)) { String findParameterName = cn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); String findParameterValue = cn.getAttributeValue(EmailConfig.ATTRIBUTE_VALUE); this.properties.setProperty(findParameterName, findParameterValue); } } } /** * Close the connection. Call this before discarding this instance of the * repository connector. */ @Override public void disconnect() throws ManifoldCFException { this.attachmentUrlTemplate = null; this.urlTemplate = null; this.server = null; this.portString = null; this.protocol = null; this.username = null; this.password = null; this.properties = null; finalizeConnection(); super.disconnect(); } /** * This method is periodically called for all connectors that are connected but not * in active use. */ @Override public void poll() throws ManifoldCFException { if (session != null) { if (System.currentTimeMillis() >= sessionExpiration) finalizeConnection(); } } /** * Test the connection. Returns a string describing the connection integrity. * * @return the connection's status as a displayable string. */ @Override public String check() throws ManifoldCFException { try { checkConnection(); return super.check(); } catch (ServiceInterruption e) { return "Connection temporarily failed: " + e.getMessage(); } catch (ManifoldCFException e) { return "Connection failed: " + e.getMessage(); } } protected void checkConnection() throws ManifoldCFException, ServiceInterruption { // Force a re-connection finalizeConnection(); getSession(); try { CheckConnectionThread cct = new CheckConnectionThread(session); cct.start(); cct.finishUp(); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { handleMessagingException(e,"checking the connection"); } } ///////////////////////////////End of Basic Connector Methods//////////////////////////////////////// //////////////////////////////Start of Repository Connector Method/////////////////////////////////// @Override public int getConnectorModel() { return MODEL_ADD; //Change is not applicable in context of email } /** * Return the list of activities that this connector supports (i.e. writes into the log). * * @return the list. */ @Override public String[] getActivitiesList() { return new String[]{EmailConfig.ACTIVITY_FETCH}; } /** * Get the bin name strings for a document identifier. The bin name describes the queue to which the * document will be assigned for throttling purposes. Throttling controls the rate at which items in a * given queue are fetched; it does not say anything about the overall fetch rate, which may operate on * multiple queues or bins. * For example, if you implement a web crawler, a good choice of bin name would be the server name, since * that is likely to correspond to a real resource that will need real throttle protection. * * @param documentIdentifier is the document identifier. * @return the set of bin names. If an empty array is returned, it is equivalent to there being no request * rate throttling available for this identifier. */ @Override public String[] getBinNames(String documentIdentifier) { return new String[]{server}; } /** * Get the maximum number of documents to amalgamate together into one batch, for this connector. * * @return the maximum number. 0 indicates "unlimited". */ @Override public int getMaxDocumentRequest() { return 10; } /** Queue "seed" documents. Seed documents are the starting places for crawling activity. Documents * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object. * * This method can choose to find repository changes that happen only during the specified time interval. * The seeds recorded by this method will be viewed by the framework based on what the * getConnectorModel() method returns. * * It is not a big problem if the connector chooses to create more seeds than are * strictly necessary; it is merely a question of overall work required. * * The end time and seeding version string passed to this method may be interpreted for greatest efficiency. * For continuous crawling jobs, this method will * be called once, when the job starts, and at various periodic intervals as the job executes. * * When a job's specification is changed, the framework automatically resets the seeding version string to null. The * seeding version string may also be set to null on each job run, depending on the connector model returned by * getConnectorModel(). * * Note that it is always ok to send MORE documents rather than less to this method. * The connector will be connected before this method can be called. *@param activities is the interface this method should use to perform whatever framework actions are desired. *@param spec is a document specification (that comes from the job). *@param lastSeedVersion is the last seeding version string for this job, or null if the job has no previous seeding version string. *@param seedTime is the end of the time range of documents to consider, exclusive. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@return an updated seeding version string, to be stored with the job. */ @Override public String addSeedDocuments(ISeedingActivity activities, Specification spec, String lastSeedVersion, long seedTime, int jobMode) throws ManifoldCFException, ServiceInterruption { long startTime; if (lastSeedVersion == null) startTime = 0L; else { // Unpack seed time from seed version string startTime = new Long(lastSeedVersion).longValue(); } getSession(); int i = 0; Map<String,String> findMap = new HashMap<String,String>(); List<String> folderNames = new ArrayList<String>(); while (i < spec.getChildCount()) { SpecificationNode sn = spec.getChild(i++); if (sn.getType().equals(EmailConfig.NODE_FOLDER)) { folderNames.add(sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME)); } else if (sn.getType().equals(EmailConfig.NODE_FILTER)) { String findParameterName, findParameterValue; findParameterName = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); findParameterValue = sn.getAttributeValue(EmailConfig.ATTRIBUTE_VALUE); findMap.put(findParameterName, findParameterValue); } } for (String folderName : folderNames) { try { OpenFolderThread oft = new OpenFolderThread(session, folderName); oft.start(); Folder folder = oft.finishUp(); try { Message[] messages = findMessages(folder, startTime, seedTime, findMap); for (Message message : messages) { String emailID = ((MimeMessage) message).getMessageID(); activities.addSeedDocument(createDocumentIdentifier(folderName,emailID)); } } finally { CloseFolderThread cft = new CloseFolderThread(session, folder); cft.start(); cft.finishUp(); } } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { handleMessagingException(e, "finding emails"); } } return new Long(seedTime).toString(); } /* This method will return the list of messages which matches the given criteria */ private Message[] findMessages(Folder folder, long startTime, long endTime, Map<String,String> findMap) throws MessagingException, InterruptedException { String findParameterName; String findParameterValue; SearchTerm searchTerm = null; Iterator<Map.Entry<String,String>> it = findMap.entrySet().iterator(); while (it.hasNext()) { Map.Entry<String,String> pair = it.next(); findParameterName = pair.getKey().toLowerCase(Locale.ROOT); findParameterValue = pair.getValue(); if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Email: Finding emails where '" + findParameterName + "' = '" + findParameterValue + "'"); SearchTerm searchClause = null; Integer comparisonTerm = null; if (findParameterName.equals(EmailConfig.EMAIL_SUBJECT)) { searchClause = new SubjectTerm(findParameterValue); } else if (findParameterName.equals(EmailConfig.EMAIL_FROM)) { searchClause = new FromStringTerm(findParameterValue); } else if (findParameterName.equals(EmailConfig.EMAIL_TO)) { searchClause = new RecipientStringTerm(Message.RecipientType.TO, findParameterValue); } else if (findParameterName.equals(EmailConfig.EMAIL_BODY)) { searchClause = new BodyTerm(findParameterValue); } else if (findParameterName.equals(EmailConfig.EMAIL_START_DATE)) { comparisonTerm = ComparisonTerm.GT; } else if (findParameterName.equals(EmailConfig.EMAIL_END_DATE)) { comparisonTerm = ComparisonTerm.LT; } if (comparisonTerm != null) { SimpleDateFormat date = new SimpleDateFormat(EmailConfig.EMAIL_FILTERING_DATE_FORMAT); try { searchClause = new ReceivedDateTerm(comparisonTerm, date.parse(findParameterValue)); } catch (ParseException e) { Logging.connectors.warn("Email: Unknown date format: '" + findParameterValue + "'for filter parameter name: '" + findParameterName + "'"); } } if (searchClause != null) { if (searchTerm == null) searchTerm = searchClause; else searchTerm = new AndTerm(searchTerm, searchClause); } else { Logging.connectors.warn("Email: Unknown filter parameter name: '"+findParameterName+"'"); } } Message[] result; if (searchTerm == null) { GetMessagesThread gmt = new GetMessagesThread(session, folder); gmt.start(); result = gmt.finishUp(); } else { SearchMessagesThread smt = new SearchMessagesThread(session, folder, searchTerm); smt.start(); result = smt.finishUp(); } return result; } protected void getSession() throws ManifoldCFException, ServiceInterruption { if (session == null) { // Check that all the required parameters are there. if (urlTemplate == null) throw new ManifoldCFException("Missing url parameter"); if (server == null) throw new ManifoldCFException("Missing server parameter"); if (properties == null) throw new ManifoldCFException("Missing server properties"); if (protocol == null) throw new ManifoldCFException("Missing protocol parameter"); // Create a session. int port; if (portString != null && portString.length() > 0) { try { port = Integer.parseInt(portString); } catch (NumberFormatException e) { throw new ManifoldCFException("Port number has bad format: "+e.getMessage(),e); } } else port = -1; try { ConnectThread connectThread = new ConnectThread(server, port, username, password, providerMap.get(protocol), properties); connectThread.start(); session = connectThread.finishUp(); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { handleMessagingException(e, "connecting"); } } sessionExpiration = System.currentTimeMillis() + SESSION_EXPIRATION_MILLISECONDS; } protected void finalizeConnection() { if (session != null) { try { CloseSessionThread closeSessionThread = new CloseSessionThread(session); closeSessionThread.start(); closeSessionThread.finishUp(); } catch (InterruptedException e) { } catch (MessagingException e) { Logging.connectors.warn("Error while closing connection to server: " + e.getMessage(),e); } finally { session = null; } } } /** Process a set of documents. * This is the method that should cause each document to be fetched, processed, and the results either added * to the queue of documents for the current job, and/or entered into the incremental ingestion manager. * The document specification allows this class to filter what is done based on the job. * The connector will be connected before this method can be called. *@param documentIdentifiers is the set of document identifiers to process. *@param statuses are the currently-stored document versions for each document in the set of document identifiers * passed in above. *@param activities is the interface this method should use to queue up new document references * and ingest documents. *@param jobMode is an integer describing how the job is being run, whether continuous or once-only. *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one. */ @Override public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption { List<String> requiredMetadata = new ArrayList<String>(); boolean useEmailExtractor = false; for (int i = 0; i < spec.getChildCount(); i++) { SpecificationNode sn = spec.getChild(i); if (sn.getType().equals(EmailConfig.NODE_METADATA)) { String metadataAttribute = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); requiredMetadata.add(metadataAttribute); } if (sn.getType().equals(EmailConfig.NODE_EXTRACT_EMAIL)) { useEmailExtractor = true; } } // Keep a cached set of open folders Map<String,Folder> openFolders = new HashMap<String,Folder>(); try { for (String documentIdentifier : documentIdentifiers) { final Integer attachmentIndex = extractAttachmentNumberFromDocumentIdentifier(documentIdentifier); if (attachmentIndex == null) { // It's an email String versionString = "_" + urlTemplate; // NOT empty; we need to make ManifoldCF understand that this is a document that never will change. // Check if we need to index if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString)) continue; String compositeID = documentIdentifier; String version = versionString; String folderName = extractFolderNameFromDocumentIdentifier(compositeID); String id = extractEmailIDFromDocumentIdentifier(compositeID); String errorCode = null; String errorDesc = null; Long fileLengthLong = null; long startTime = System.currentTimeMillis(); try { try { Folder folder = openFolders.get(folderName); if (folder == null) { getSession(); OpenFolderThread oft = new OpenFolderThread(session, folderName); oft.start(); folder = oft.finishUp(); openFolders.put(folderName,folder); } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Email: Processing document identifier '" + compositeID + "'"); SearchTerm messageIDTerm = new MessageIDTerm(id); getSession(); SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm); smt.start(); Message[] message = smt.finishUp(); String msgURL = makeDocumentURI(urlTemplate, folderName, id); Message msg = null; for (Message msg2 : message) { msg = msg2; } if (msg == null) { // email was not found activities.deleteDocument(documentIdentifier); continue; } if (!activities.checkURLIndexable(msgURL)) { errorCode = activities.EXCLUDED_URL; errorDesc = "Excluded because of URL ('"+msgURL+"')"; activities.noDocument(documentIdentifier, version); continue; } long fileLength = msg.getSize(); if (!activities.checkLengthIndexable(fileLength)) { errorCode = activities.EXCLUDED_LENGTH; errorDesc = "Excluded because of length ("+fileLength+")"; activities.noDocument(documentIdentifier, version); continue; } Date sentDate = msg.getSentDate(); if (!activities.checkDateIndexable(sentDate)) { errorCode = activities.EXCLUDED_DATE; errorDesc = "Excluded because of date ("+sentDate+")"; activities.noDocument(documentIdentifier, version); continue; } String mimeType = "text/plain"; if (!activities.checkMimeTypeIndexable(mimeType)) { errorCode = activities.EXCLUDED_MIMETYPE; errorDesc = "Excluded because of mime type ('"+mimeType+"')"; activities.noDocument(documentIdentifier, version); continue; } RepositoryDocument rd = new RepositoryDocument(); rd.setFileName(msg.getFileName()); rd.setMimeType(mimeType); rd.setCreatedDate(sentDate); rd.setModifiedDate(sentDate); for (String metadata : requiredMetadata) { if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) { Address[] to = msg.getRecipients(Message.RecipientType.TO); if (to != null) { String[] toStr = new String[to.length]; int j = 0; for (Address address : to) { toStr[j] = useEmailExtractor ? extractEmailAddress(address.toString()) : address.toString(); j++; } rd.addField(EmailConfig.EMAIL_TO, toStr); } } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) { Address[] from = msg.getFrom(); String[] fromStr = new String[from.length]; int j = 0; for (Address address : from) { fromStr[j] = useEmailExtractor ? extractEmailAddress(address.toString()) : address.toString(); j++; } rd.addField(EmailConfig.EMAIL_FROM, fromStr); } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) { String subject = msg.getSubject(); rd.addField(EmailConfig.EMAIL_SUBJECT, subject); } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) { rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString()); } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_ENCODING)) { Object o = msg.getContent(); if (o != null) { if (o instanceof Multipart) { Multipart mp = (Multipart) o; String[] encoding = new String[mp.getCount()]; for (int k = 0, n = mp.getCount(); k < n; k++) { Part part = mp.getBodyPart(k); if (isAttachment(part)) { final String[] fileSplit = part.getFileName().split("\\?"); if (fileSplit.length > 1) { encoding[k] = fileSplit[1]; } else { encoding[k] = ""; } } } rd.addField(EmailConfig.ENCODING_FIELD, encoding); } else if (o instanceof String) { rd.addField(EmailConfig.ENCODING_FIELD, ""); } } } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENT_MIMETYPE)) { Object o = msg.getContent(); if (o != null) { if (o instanceof Multipart) { Multipart mp = (Multipart) o; String[] MIMEType = new String[mp.getCount()]; for (int k = 0, n = mp.getCount(); k < n; k++) { Part part = mp.getBodyPart(k); if (isAttachment(part)) { MIMEType[k] = part.getContentType(); } } rd.addField(EmailConfig.MIMETYPE_FIELD, MIMEType); } else if (o instanceof String) { rd.addField(EmailConfig.MIMETYPE_FIELD, ""); } } } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_ATTACHMENTNAME)) { Object o = msg.getContent(); if (o != null) { if (o instanceof Multipart) { Multipart mp = (Multipart) o; String[] attachmentNames = new String[mp.getCount()]; for (int k = 0, n = mp.getCount(); k < n; k++) { Part part = mp.getBodyPart(k); if (isAttachment(part)) { attachmentNames[k] = part.getFileName(); } } rd.addField(EmailConfig.ATTACHMENTNAME_FIELD, attachmentNames); } else if (o instanceof String) { rd.addField(EmailConfig.ATTACHMENTNAME_FIELD, ""); } } } } //Content includes both body and attachments, //Body will be set as content and attachments will be indexed as separate documents. final EmailContent bodyContent = extractBodyContent(msg); if(bodyContent != null) { rd.setMimeType(bodyContent.getMimeType()); InputStream is = new ByteArrayInputStream(bodyContent.getContent().getBytes(StandardCharsets.UTF_8)); try { rd.setBinary(is, fileLength); activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd); errorCode = "OK"; fileLengthLong = new Long(fileLength); } finally { is.close(); } } // If we're supposed to deal with attachments, this is the time to queue them up if (attachmentUrlTemplate != null) { if (msg.getContent() != null && msg.getContent() instanceof Multipart) { final Multipart mp = (Multipart) msg.getContent(); final int numAttachments = mp.getCount(); for (int i = 0; i < numAttachments; i++) { if (isAttachment(mp.getBodyPart(i))) { activities.addDocumentReference(documentIdentifier + ":" + i); } } } } } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleMessagingException(e, "processing email"); } catch (IOException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleIOException(e, "processing email"); throw new ManifoldCFException(e.getMessage(), e); } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) errorCode = null; throw e; } finally { if (errorCode != null) activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH, fileLengthLong,documentIdentifier,errorCode,errorDesc,null); } } else { // It's a specific attachment final int attachmentNumber = attachmentIndex; String versionString = "_" + attachmentUrlTemplate; // NOT empty; we need to make ManifoldCF understand that this is a document that never will change. // Check if we need to index if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString)) continue; String compositeID = documentIdentifier; String version = versionString; String folderName = extractFolderNameFromDocumentIdentifier(compositeID); String id = extractEmailIDFromDocumentIdentifier(compositeID); String errorCode = null; String errorDesc = null; Long fileLengthLong = null; long startTime = System.currentTimeMillis(); try { try { Folder folder = openFolders.get(folderName); if (folder == null) { getSession(); OpenFolderThread oft = new OpenFolderThread(session, folderName); oft.start(); folder = oft.finishUp(); openFolders.put(folderName,folder); } if (Logging.connectors.isDebugEnabled()) Logging.connectors.debug("Email: Processing document identifier '" + documentIdentifier + "'"); SearchTerm messageIDTerm = new MessageIDTerm(id); getSession(); SearchMessagesThread smt = new SearchMessagesThread(session, folder, messageIDTerm); smt.start(); Message[] message = smt.finishUp(); String msgURL = makeDocumentURI(attachmentUrlTemplate, folderName, id, attachmentNumber); Message msg = null; for (Message msg2 : message) { msg = msg2; } if (msg == null) { // email was not found activities.deleteDocument(documentIdentifier); continue; } if (!activities.checkURLIndexable(msgURL)) { errorCode = activities.EXCLUDED_URL; errorDesc = "Excluded because of URL ('"+msgURL+"')"; activities.noDocument(documentIdentifier, version); continue; } final Date sentDate = msg.getSentDate(); if (!activities.checkDateIndexable(sentDate)) { errorCode = activities.EXCLUDED_DATE; errorDesc = "Excluded because of date ("+sentDate+")"; activities.noDocument(documentIdentifier, version); continue; } final Multipart mp = (Multipart) msg.getContent(); if (mp.getCount() <= attachmentNumber) { activities.deleteDocument(documentIdentifier); continue; } final Part part = mp.getBodyPart(attachmentNumber); final long fileLength = part.getSize(); if (!activities.checkLengthIndexable(fileLength)) { errorCode = activities.EXCLUDED_LENGTH; errorDesc = "Excluded because of length ("+fileLength+")"; activities.noDocument(documentIdentifier, version); continue; } final String origMimeType = part.getContentType(); final String mimeType; //MSExchange puts crap after the mime type so it has to be munged. // Example: "application/msword; name=SampleDOCFile_100kb.doc" if (origMimeType == null || origMimeType.indexOf(";") == -1) { mimeType = origMimeType; } else { mimeType = origMimeType.substring(0, origMimeType.indexOf(";")); } if (!activities.checkMimeTypeIndexable(mimeType)) { errorCode = activities.EXCLUDED_MIMETYPE; errorDesc = "Excluded because of mime type ('"+mimeType+"')"; activities.noDocument(documentIdentifier, version); continue; } RepositoryDocument rd = new RepositoryDocument(); rd.setFileName(part.getFileName()); rd.setMimeType(mimeType); rd.setCreatedDate(sentDate); rd.setModifiedDate(sentDate); for (String metadata : requiredMetadata) { if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_TO)) { Address[] to = msg.getRecipients(Message.RecipientType.TO); if (to != null) { String[] toStr = new String[to.length]; int j = 0; for (Address address : to) { toStr[j] = useEmailExtractor ? extractEmailAddress(address.toString()) : address.toString(); j++; } rd.addField(EmailConfig.EMAIL_TO, toStr); } } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_FROM)) { Address[] from = msg.getFrom(); String[] fromStr = new String[from.length]; int j = 0; for (Address address : from) { fromStr[j] = useEmailExtractor ? extractEmailAddress(address.toString()) : address.toString(); j++; } rd.addField(EmailConfig.EMAIL_FROM, fromStr); } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_SUBJECT)) { String subject = msg.getSubject(); //Attachments may have a field named "subject". So, different field name is used not to clash. rd.addField(EmailConfig.MAILSUBJECT_FIELD, subject); } else if (metadata.toLowerCase(Locale.ROOT).equals(EmailConfig.EMAIL_DATE)) { rd.addField(EmailConfig.EMAIL_DATE, sentDate.toString()); } } final InputStream is = part.getInputStream(); try { rd.setBinary(is, fileLength); activities.ingestDocumentWithException(documentIdentifier, version, msgURL, rd); errorCode = "OK"; fileLengthLong = new Long(fileLength); } finally { is.close(); } } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleMessagingException(e, "processing email attachment"); } catch (IOException e) { errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT); errorDesc = e.getMessage(); handleIOException(e, "processing email attachment"); throw new ManifoldCFException(e.getMessage(), e); } } catch (ManifoldCFException e) { if (e.getErrorCode() == ManifoldCFException.INTERRUPTED) errorCode = null; throw e; } finally { if (errorCode != null) activities.recordActivity(new Long(startTime),EmailConfig.ACTIVITY_FETCH, fileLengthLong,documentIdentifier,errorCode,errorDesc,null); } } } } finally { for (Folder f : openFolders.values()) { try { CloseFolderThread cft = new CloseFolderThread(session, f); cft.start(); cft.finishUp(); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { handleMessagingException(e, "closing folders"); } } } } private EmailContent getContent(Part part) throws MessagingException, IOException { if (part.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) { return new EmailContent(part.getContent().toString()); } else if(part.isMimeType(EmailConfig.MIMETYPE_HTML)) { return new EmailContent(part.getContent().toString(), EmailConfig.MIMETYPE_HTML); } if (part.isMimeType(EmailConfig.MIMETYPE_MULTIPART_ALTERNATIVE)) { // prefer html text over plain text Multipart mp = (Multipart) part.getContent(); EmailContent emailContent = null; for (int i = 0; i < mp.getCount(); i++) { Part bodyPart = mp.getBodyPart(i); if (bodyPart.isMimeType(EmailConfig.MIMETYPE_TEXT_PLAIN)) { if (emailContent == null) { emailContent = getContent(bodyPart); } continue; } else if (bodyPart.isMimeType(EmailConfig.MIMETYPE_HTML)) { emailContent = getContent(bodyPart); if (emailContent != null) { return emailContent; } } else { return getContent(bodyPart); } } return emailContent; } else if (part.isMimeType(EmailConfig.MIMETYPE_MULTIPART_GENERIC)) { Multipart mp = (Multipart) part.getContent(); for (int i = 0; i < mp.getCount(); i++) { EmailContent emailContent = getContent(mp.getBodyPart(i)); if (emailContent != null) { return emailContent; } } } return null; } private EmailContent extractBodyContent(Message msg) throws MessagingException, IOException { EmailContent emailContent = null; Object o = msg.getContent(); if (o instanceof Multipart) { Multipart mp = (Multipart) msg.getContent(); for (int k = 0, n = mp.getCount(); k < n; k++) { Part part = mp.getBodyPart(k); String disposition = part.getDisposition(); if (disposition == null) { EmailContent content = getContent(part); if (content != null) { emailContent = content; } } } } else if (o instanceof String) { emailContent = new EmailContent((String)o); } return emailContent; } /** * Checks whether a Part is an attachment or not * @param part Part to check * @return is attachment or not */ private boolean isAttachment(Part part) throws MessagingException { String disposition = part.getDisposition(); return ((disposition != null) && ((disposition.toLowerCase(Locale.ROOT).equals(Part.ATTACHMENT) || (disposition.toLowerCase(Locale.ROOT).equals(Part.INLINE))))); } /** * Extracts e-mail address within < and > characters if any. * If not, returns passed raw mail address. * * @param rawEmailAddress e-mail address to be extracted * @return Extracted e-mail address */ private String extractEmailAddress(String rawEmailAddress) { Pattern pattern = Pattern.compile("<(.+?@.+?)>"); Matcher matcher = pattern.matcher(rawEmailAddress); return matcher.find() ? matcher.group(1) : rawEmailAddress; } //////////////////////////////End of Repository Connector Methods/////////////////////////////////// ///////////////////////////////////////Start of Configuration UI///////////////////////////////////// /** * Output the configuration header section. * This method is called in the head section of the connector's configuration page. Its purpose is to * add the required tabs to the list, and to output any javascript methods that might be needed by * the configuration editing HTML. * The connector does not need to be connected for this method to be called. * * @param threadContext is the local thread context. * @param out is the output to which any HTML should be sent. * @param locale is the desired locale. * @param parameters are the configuration parameters, as they currently exist, for this connection being configured. * @param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector. */ @Override public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray) throws ManifoldCFException, IOException { tabsArray.add(Messages.getString(locale, "EmailConnector.Server")); tabsArray.add(Messages.getString(locale, "EmailConnector.URL")); // Map the parameters Map<String, Object> paramMap = new HashMap<String, Object>(); // Fill in the parameters from each tab fillInServerConfigurationMap(paramMap, out, parameters); fillInURLConfigurationMap(paramMap, out, parameters); // Output the Javascript - only one Velocity template for all tabs Messages.outputResourceWithVelocity(out, locale, "ConfigurationHeader.js", paramMap); } @Override public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) throws ManifoldCFException, IOException { // Output the Server tab Map<String, Object> paramMap = new HashMap<String, Object>(); // Set the tab name paramMap.put("TabName", tabName); // Fill in the parameters fillInServerConfigurationMap(paramMap, out, parameters); fillInURLConfigurationMap(paramMap, out, parameters); Messages.outputResourceWithVelocity(out, locale, "Configuration_Server.html", paramMap); Messages.outputResourceWithVelocity(out, locale, "Configuration_URL.html", paramMap); } private static void fillInServerConfigurationMap(Map<String, Object> paramMap, IPasswordMapperActivity mapper, ConfigParams parameters) { int i = 0; String username = parameters.getParameter(EmailConfig.USERNAME_PARAM); String password = parameters.getObfuscatedParameter(EmailConfig.PASSWORD_PARAM); String protocol = parameters.getParameter(EmailConfig.PROTOCOL_PARAM); String server = parameters.getParameter(EmailConfig.SERVER_PARAM); String port = parameters.getParameter(EmailConfig.PORT_PARAM); List<Map<String, String>> list = new ArrayList<Map<String, String>>(); while (i < parameters.getChildCount()) //In post property set is added as a configuration node { ConfigNode cn = parameters.getChild(i++); if (cn.getType().equals(EmailConfig.NODE_PROPERTIES)) { String findParameterName = cn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); String findParameterValue = cn.getAttributeValue(EmailConfig.ATTRIBUTE_VALUE); Map<String, String> row = new HashMap<String, String>(); row.put("name", findParameterName); row.put("value", findParameterValue); list.add(row); } } if (username == null) username = StringUtils.EMPTY; if (password == null) password = StringUtils.EMPTY; else password = mapper.mapPasswordToKey(password); if (protocol == null) protocol = EmailConfig.PROTOCOL_DEFAULT_VALUE; if (server == null) server = StringUtils.EMPTY; if (port == null) port = EmailConfig.PORT_DEFAULT_VALUE; paramMap.put("USERNAME", username); paramMap.put("PASSWORD", password); paramMap.put("PROTOCOL", protocol); paramMap.put("SERVER", server); paramMap.put("PORT", port); paramMap.put("PROPERTIES", list); } private static void fillInURLConfigurationMap(Map<String, Object> paramMap, IPasswordMapperActivity mapper, ConfigParams parameters) { String urlTemplate = parameters.getParameter(EmailConfig.URL_PARAM); if (urlTemplate == null) { urlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)"; } paramMap.put("URL", urlTemplate); String attachmentUrlTemplate = parameters.getParameter(EmailConfig.ATTACHMENT_URL_PARAM); if (attachmentUrlTemplate == null) { attachmentUrlTemplate = "http://sampleserver/$(FOLDERNAME)?id=$(MESSAGEID)&attach=$(ATTACHMENTNUMBER)"; } paramMap.put("ATTACHMENTURL", attachmentUrlTemplate); } /** * Process a configuration post. * This method is called at the start of the connector's configuration page, whenever there is a possibility * that form data for a connection has been posted. Its purpose is to gather form information and modify * the configuration parameters accordingly. * The name of the posted form is always "editconnection". * The connector does not need to be connected for this method to be called. * * @param threadContext is the local thread context. * @param variableContext is the set of variables available from the post, including binary file post information. * @param parameters are the configuration parameters, as they currently exist, for this connection being configured. * @return null if all is well, or a string error message if there is an error that should prevent saving of the * connection (and cause a redirection to an error page). */ @Override public String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext, ConfigParams parameters) throws ManifoldCFException { String urlTemplate = variableContext.getParameter("url"); if (urlTemplate != null) parameters.setParameter(EmailConfig.URL_PARAM, urlTemplate); String attachmentUrlTemplate = variableContext.getParameter("attachmenturl"); if (attachmentUrlTemplate != null) parameters.setParameter(EmailConfig.ATTACHMENT_URL_PARAM, attachmentUrlTemplate); String userName = variableContext.getParameter("username"); if (userName != null) parameters.setParameter(EmailConfig.USERNAME_PARAM, userName); String password = variableContext.getParameter("password"); if (password != null) parameters.setObfuscatedParameter(EmailConfig.PASSWORD_PARAM, variableContext.mapKeyToPassword(password)); String protocol = variableContext.getParameter("protocol"); if (protocol != null) parameters.setParameter(EmailConfig.PROTOCOL_PARAM, protocol); String server = variableContext.getParameter("server"); if (server != null) parameters.setParameter(EmailConfig.SERVER_PARAM, server); String port = variableContext.getParameter("port"); if (port != null) parameters.setParameter(EmailConfig.PORT_PARAM, port); // Remove old find parameter document specification information removeNodes(parameters, EmailConfig.NODE_PROPERTIES); // Parse the number of records that were posted String findCountString = variableContext.getParameter("findcount"); if (findCountString != null) { int findCount = Integer.parseInt(findCountString); // Loop throught them and add new server properties int i = 0; while (i < findCount) { String suffix = "_" + Integer.toString(i++); // Only add the name/value if the item was not deleted. String findParameterOp = variableContext.getParameter("findop" + suffix); if (findParameterOp == null || !findParameterOp.equals("Delete")) { String findParameterName = variableContext.getParameter("findname" + suffix); String findParameterValue = variableContext.getParameter("findvalue" + suffix); addFindParameterNode(parameters, findParameterName, findParameterValue); } } } // Now, look for a global "Add" operation String operation = variableContext.getParameter("findop"); if (operation != null && operation.equals("Add")) { // Pick up the global parameter name and value String findParameterName = variableContext.getParameter("findname"); String findParameterValue = variableContext.getParameter("findvalue"); addFindParameterNode(parameters, findParameterName, findParameterValue); } return null; } /** * View configuration. This method is called in the body section of the * connector's view configuration page. Its purpose is to present the * connection information to the user. The coder can presume that the HTML that * is output from this configuration will be within appropriate <html> and * <body> tags. * * @param threadContext is the local thread context. * @param out is the output to which any HTML should be sent. * @param parameters are the configuration parameters, as they currently exist, for * this connection being configured. */ @Override public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException { Map<String, Object> paramMap = new HashMap<String, Object>(); // Fill in map from each tab fillInServerConfigurationMap(paramMap, out, parameters); fillInURLConfigurationMap(paramMap, out, parameters); Messages.outputResourceWithVelocity(out, locale, "ConfigurationView.html", paramMap); } /////////////////////////////////End of configuration UI//////////////////////////////////////////////////// /////////////////////////////////Start of Specification UI////////////////////////////////////////////////// /** Output the specification header section. * This method is called in the head section of a job page which has selected a repository connection of the * current type. Its purpose is to add the required tabs to the list, and to output any javascript methods * that might be needed by the job editing HTML. * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector. */ @Override public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException { Map<String, Object> paramMap = new HashMap<String, Object>(); paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); // Add the tabs tabsArray.add(Messages.getString(locale, "EmailConnector.Metadata")); tabsArray.add(Messages.getString(locale, "EmailConnector.Filter")); Messages.outputResourceWithVelocity(out, locale, "SpecificationHeader.js", paramMap); } /** Output the specification body section. * This method is called in the body section of a job page which has selected a repository connection of the * current type. Its purpose is to present the required form elements for editing. * The coder can presume that the HTML that is output from this configuration will be within appropriate * <html>, <body>, and <form> tags. The name of the form is always "editjob". * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@param actualSequenceNumber is the connection within the job that has currently been selected. *@param tabName is the current tab name. (actualSequenceNumber, tabName) form a unique tuple within * the job. */ @Override public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException { outputFilterTab(out, locale, ds, tabName, connectionSequenceNumber, actualSequenceNumber); outputMetadataTab(out, locale, ds, tabName, connectionSequenceNumber, actualSequenceNumber); } /** * Take care of "Metadata" tab. */ protected void outputMetadataTab(IHTTPOutput out, Locale locale, Specification ds, String tabName, int connectionSequenceNumber, int actualSequenceNumber) throws ManifoldCFException, IOException { Map<String, Object> paramMap = new HashMap<String, Object>(); paramMap.put("TabName", tabName); paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); paramMap.put("SelectedNum", Integer.toString(actualSequenceNumber)); fillInMetadataTab(paramMap, ds); fillInMetadataAttributes(paramMap); Messages.outputResourceWithVelocity(out, locale, "Specification_Metadata.html", paramMap); } /** * Fill in Velocity context for Metadata tab. */ protected static void fillInMetadataTab(Map<String, Object> paramMap, Specification ds) { Set<String> metadataSelections = new HashSet<String>(); String extractEmailSelection = null; int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i++); if (sn.getType().equals(EmailConfig.NODE_METADATA)) { String metadataName = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); metadataSelections.add(metadataName); } else if (sn.getType().equals(EmailConfig.NODE_EXTRACT_EMAIL)) { extractEmailSelection = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); } } paramMap.put("METADATASELECTIONS", metadataSelections); paramMap.put("EXTRACTEMAILSELECTION", extractEmailSelection); } /** * Fill in Velocity context with data to permit attribute selection. */ protected void fillInMetadataAttributes(Map<String, Object> paramMap) { String[] matchNames = EmailConfig.BASIC_METADATA; paramMap.put("METADATAATTRIBUTES", matchNames); String extractEmailAttribute = EmailConfig.BASIC_EXTRACT_EMAIL; paramMap.put("EXTRACTEMAILATTRIBUTE", extractEmailAttribute); } protected void outputFilterTab(IHTTPOutput out, Locale locale, Specification ds, String tabName, int connectionSequenceNumber, int actualSequenceNumber) throws ManifoldCFException, IOException { Map<String, Object> paramMap = new HashMap<String, Object>(); paramMap.put("TabName", tabName); paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); paramMap.put("SelectedNum", Integer.toString(actualSequenceNumber)); fillInFilterTab(paramMap, ds); if (tabName.equals(Messages.getString(locale, "EmailConnector.Filter"))) fillInSearchableAttributes(paramMap); Messages.outputResourceWithVelocity(out, locale, "Specification_Filter.html", paramMap); } private void fillInSearchableAttributes(Map<String, Object> paramMap) { String[] attributes = EmailConfig.BASIC_SEARCHABLE_ATTRIBUTES; paramMap.put("SEARCHABLEATTRIBUTES", attributes); try { String[] folderNames = getFolderNames(); paramMap.put("FOLDERNAMES", folderNames); paramMap.put("EXCEPTION", ""); } catch (ManifoldCFException e) { paramMap.put("EXCEPTION", e.getMessage()); } catch (ServiceInterruption e) { paramMap.put("EXCEPTION", e.getMessage()); } } protected static void fillInFilterTab(Map<String, Object> paramMap, Specification ds) { List<Map<String, String>> filterList = new ArrayList<Map<String, String>>(); Set<String> folders = new HashSet<String>(); int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i++); if (sn.getType().equals(EmailConfig.NODE_FILTER)) { String findParameterName = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); String findParameterValue = sn.getAttributeValue(EmailConfig.ATTRIBUTE_VALUE); Map<String, String> row = new HashMap<String, String>(); row.put("name", findParameterName); row.put("value", findParameterValue); filterList.add(row); } else if (sn.getType().equals(EmailConfig.NODE_FOLDER)) { String folderName = sn.getAttributeValue(EmailConfig.ATTRIBUTE_NAME); folders.add(folderName); } } paramMap.put("MATCHES", filterList); paramMap.put("FOLDERS", folders); } /** Process a specification post. * This method is called at the start of job's edit or view page, whenever there is a possibility that form * data for a connection has been posted. Its purpose is to gather form information and modify the * document specification accordingly. The name of the posted form is always "editjob". * The connector will be connected before this method can be called. *@param variableContext contains the post data, including binary file-upload information. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. *@return null if all is well, or a string error message if there is an error that should prevent saving of * the job (and cause a redirection to an error page). */ @Override public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String result = processFilterTab(variableContext, ds, connectionSequenceNumber); if (result != null) return result; result = processMetadataTab(variableContext, ds, connectionSequenceNumber); return result; } protected String processFilterTab(IPostParameters variableContext, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String seqPrefix = "s"+connectionSequenceNumber+"_"; String findCountString = variableContext.getParameter(seqPrefix + "findcount"); if (findCountString != null) { int findCount = Integer.parseInt(findCountString); // Remove old find parameter document specification information removeNodes(ds, EmailConfig.NODE_FILTER); int i = 0; while (i < findCount) { String suffix = "_" + Integer.toString(i++); // Only add the name/value if the item was not deleted. String findParameterOp = variableContext.getParameter(seqPrefix + "findop" + suffix); if (findParameterOp == null || !findParameterOp.equals("Delete")) { String findParameterName = variableContext.getParameter(seqPrefix + "findname" + suffix); String findParameterValue = variableContext.getParameter(seqPrefix + "findvalue" + suffix); addFindParameterNode(ds, findParameterName, findParameterValue); } } String operation = variableContext.getParameter(seqPrefix + "findop"); if (operation != null && operation.equals("Add")) { String findParameterName = variableContext.getParameter(seqPrefix + "findname"); String findParameterValue = variableContext.getParameter(seqPrefix + "findvalue"); addFindParameterNode(ds, findParameterName, findParameterValue); } } String[] folders = variableContext.getParameterValues(seqPrefix + "folders"); if (folders != null) { removeNodes(ds, EmailConfig.NODE_FOLDER); for (String folder : folders) { addFolderNode(ds, folder); } } return null; } protected String processMetadataTab(IPostParameters variableContext, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String result = processMetadataAttributes(variableContext, ds, connectionSequenceNumber); if (result != null) return result; result = processExtractEmail(variableContext, ds, connectionSequenceNumber); return result; } protected String processMetadataAttributes(IPostParameters variableContext, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String seqPrefix = "s"+connectionSequenceNumber+"_"; // Remove old included metadata nodes removeNodes(ds, EmailConfig.NODE_METADATA); // Get the posted metadata values String[] metadataNames = variableContext.getParameterValues(seqPrefix + "metadata"); if (metadataNames != null) { // Add each metadata name as a node to the document specification int i = 0; while (i < metadataNames.length) { String metadataName = metadataNames[i++]; addIncludedMetadataNode(ds, metadataName); } } return null; } protected String processExtractEmail(IPostParameters variableContext, Specification ds, int connectionSequenceNumber) throws ManifoldCFException { String seqPrefix = "s"+connectionSequenceNumber+"_"; // Remove old included extract email nodes removeNodes(ds, EmailConfig.NODE_EXTRACT_EMAIL); // Get the posted extract email value String extractEmail = variableContext.getParameter(seqPrefix + "extractemail"); if (extractEmail == null) { return null; } // Gather the extract email parameter to be the last one SpecificationNode sn = new SpecificationNode(EmailConfig.NODE_EXTRACT_EMAIL); sn.setAttribute(EmailConfig.ATTRIBUTE_NAME, extractEmail); // Add the new extract email parameter ds.addChild(ds.getChildCount(), sn); return null; } /** View specification. * This method is called in the body section of a job's view page. Its purpose is to present the document * specification information to the user. The coder can presume that the HTML that is output from * this configuration will be within appropriate <html> and <body> tags. * The connector will be connected before this method can be called. *@param out is the output to which any HTML should be sent. *@param locale is the locale the output is preferred to be in. *@param ds is the current document specification for this job. *@param connectionSequenceNumber is the unique number of this connection within the job. */ @Override public void viewSpecification(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException, IOException { Map<String, Object> paramMap = new HashMap<String, Object>(); paramMap.put("SeqNum", Integer.toString(connectionSequenceNumber)); fillInFilterTab(paramMap, ds); fillInMetadataTab(paramMap, ds); Messages.outputResourceWithVelocity(out, locale, "SpecificationView.html", paramMap); } ///////////////////////////////////////End of specification UI/////////////////////////////////////////////// /** Get a sorted list of folder names */ protected String[] getFolderNames() throws ManifoldCFException, ServiceInterruption { getSession(); try { ListFoldersThread lft = new ListFoldersThread(session); lft.start(); return lft.finishUp(); } catch (InterruptedException e) { throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED); } catch (MessagingException e) { handleMessagingException(e,"getting folder list"); return null; } } /** Create a document's URI given a template, a folder name, and a message ID */ protected static String makeDocumentURI(String urlTemplate, String folderName, String id) { // First, URL encode folder name and id String encodedFolderName = URLEncoder.encode(folderName); String encodedId = URLEncoder.encode(id); // The template is already URL encoded, except for the substitution points Map<String,String> subsMap = new HashMap<String,String>(); subsMap.put("FOLDERNAME", encodedFolderName); subsMap.put("MESSAGEID", encodedId); return substitute(urlTemplate, subsMap); } /** Create a document's URI given a template, a folder name, a message ID, and an attachment number */ protected static String makeDocumentURI(String urlTemplate, String folderName, String id, int attachmentNumber) { // First, URL encode folder name and id String encodedFolderName = URLEncoder.encode(folderName); String encodedId = URLEncoder.encode(id); // The template is already URL encoded, except for the substitution points Map<String,String> subsMap = new HashMap<String,String>(); subsMap.put("FOLDERNAME", encodedFolderName); subsMap.put("MESSAGEID", encodedId); subsMap.put("ATTACHMENTNUMBER", Integer.toString(attachmentNumber)); return substitute(urlTemplate, subsMap); } protected static String substitute(String template, Map<String,String> map) { StringBuilder sb = new StringBuilder(); int index = 0; while (true) { int newIndex = template.indexOf("$(",index); if (newIndex == -1) { sb.append(template.substring(index)); break; } sb.append(template.substring(index, newIndex)); int endIndex = template.indexOf(")",newIndex+2); String varName; if (endIndex == -1) varName = template.substring(newIndex + 2); else varName = template.substring(newIndex + 2, endIndex); String subsValue = map.get(varName); if (subsValue == null) subsValue = ""; sb.append(subsValue); if (endIndex == -1) break; index = endIndex+1; } return sb.toString(); } protected static void addFindParameterNode(ConfigParams parameters, String findParameterName, String findParameterValue) { ConfigNode cn = new ConfigNode(EmailConfig.NODE_PROPERTIES); cn.setAttribute(EmailConfig.ATTRIBUTE_NAME, findParameterName); cn.setAttribute(EmailConfig.ATTRIBUTE_VALUE, findParameterValue); // Add to the end parameters.addChild(parameters.getChildCount(), cn); } protected static void removeNodes(ConfigParams parameters, String nodeTypeName) { int i = 0; while (i < parameters.getChildCount()) { ConfigNode cn = parameters.getChild(i); if (cn.getType().equals(nodeTypeName)) parameters.removeChild(i); else i++; } } protected static void removeNodes(Specification ds, String nodeTypeName) { int i = 0; while (i < ds.getChildCount()) { SpecificationNode sn = ds.getChild(i); if (sn.getType().equals(nodeTypeName)) ds.removeChild(i); else i++; } } protected static void addIncludedMetadataNode(Specification ds, String metadataName) { // Build the proper node SpecificationNode sn = new SpecificationNode(EmailConfig.NODE_METADATA); sn.setAttribute(EmailConfig.ATTRIBUTE_NAME, metadataName); // Add to the end ds.addChild(ds.getChildCount(), sn); } protected static void addFindParameterNode(Specification ds, String findParameterName, String findParameterValue) { SpecificationNode sn = new SpecificationNode(EmailConfig.NODE_FILTER); sn.setAttribute(EmailConfig.ATTRIBUTE_NAME, findParameterName); sn.setAttribute(EmailConfig.ATTRIBUTE_VALUE, findParameterValue); // Add to the end ds.addChild(ds.getChildCount(), sn); } protected static void addFolderNode(Specification ds, String folderName) { SpecificationNode sn = new SpecificationNode(EmailConfig.NODE_FOLDER); sn.setAttribute(EmailConfig.ATTRIBUTE_NAME, folderName); ds.addChild(ds.getChildCount(), sn); } /** Create a document identifier from a folder name and an email ID */ protected static String createDocumentIdentifier(String folderName, String emailID) { return makeSafeFolderName(folderName) + ":" + emailID; } /** Find an attachment number in a document identifier */ protected static Integer extractAttachmentNumberFromDocumentIdentifier(String di) { int index1 = di.indexOf(":"); if (index1 == -1) throw new RuntimeException("Bad document identifier: '"+di+"'"); int index2 = di.indexOf(":", index1 + 1); if (index2 == -1) return null; return new Integer(di.substring(index2 + 1)); } /** Find a folder name in a document identifier */ protected static String extractFolderNameFromDocumentIdentifier(String di) { int index = di.indexOf(":"); if (index == -1) throw new RuntimeException("Bad document identifier: '"+di+"'"); return di.substring(0,index); } /** Find an email ID in a document identifier */ protected static String extractEmailIDFromDocumentIdentifier(String di) { int index1 = di.indexOf(":"); if (index1 == -1) throw new RuntimeException("Bad document identifier: '"+di+"'"); int index2 = di.indexOf(":", index1 + 1); if (index2 == -1) return di.substring(index1+1); return di.substring(index1 + 1, index2); } /** Create a safe folder name (which doesn't contain colons) */ protected static String makeSafeFolderName(String folderName) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < folderName.length(); i++) { char x = folderName.charAt(i); if (x == '\\') sb.append('\\').append('\\'); else if (x == ':') sb.append('\\').append('0'); else sb.append(x); } return sb.toString(); } /** Unpack a safe folder name */ protected static String unpackSafeFolderName(String packedFolderName) { StringBuilder sb = new StringBuilder(); int i = 0; while (i < packedFolderName.length()) { char x = packedFolderName.charAt(i++); if (x == '\\') { if (i == packedFolderName.length()) throw new RuntimeException("Illegal packed folder name: '"+packedFolderName+"'"); x = packedFolderName.charAt(i++); if (x == '\\') sb.append('\\'); else if (x == '0') sb.append(':'); else throw new RuntimeException("Illegal packed folder name: '"+packedFolderName+"'"); } else sb.append(x); } return sb.toString(); } /** Handle Messaging exceptions in a consistent global manner */ protected static void handleMessagingException(MessagingException e, String context) throws ManifoldCFException, ServiceInterruption { if (e.getMessage().indexOf("Connection dropped by server?") != -1) { final long currentTime = System.currentTimeMillis(); throw new ServiceInterruption("Email server is down, retrying: "+e.getMessage(),e,currentTime + 300000L, currentTime + 12 * 60 * 60000L,-1,true); } else { Logging.connectors.error("Email: Error "+context+": "+e.getMessage(),e); throw new ManifoldCFException("Error "+context+": "+e.getMessage(),e); } } /** Handle IO Exception */ protected static void handleIOException(IOException e, String context) throws ManifoldCFException, ServiceInterruption { if (e instanceof java.net.SocketTimeoutException) { Logging.connectors.error("Email: Socket timeout "+context+": "+e.getMessage(),e); throw new ManifoldCFException("Socket timeout: "+e.getMessage(),e); } else if (e instanceof InterruptedIOException) { throw new ManifoldCFException("Interrupted: "+e.getMessage(),ManifoldCFException.INTERRUPTED); } else { Logging.connectors.error("Email: IO error "+context+": "+e.getMessage(),e); throw new ManifoldCFException("IO error "+context+": "+e.getMessage(),e); } } /** Class to set up connection. */ protected static class ConnectThread extends Thread { protected final String server; protected final int port; protected final String username; protected final String password; protected final String protocol; protected final Properties properties; // Local session handle protected EmailSession session = null; protected Throwable exception = null; public ConnectThread(String server, int port, String username, String password, String protocol, Properties properties) { this.server = server; this.port = port; this.username = username; this.password = password; this.protocol = protocol; this.properties = properties; setDaemon(true); } public void run() { try { session = new EmailSession(server, port, username, password, protocol, properties); } catch (Throwable e) { exception = e; } } public EmailSession finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } return session; } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to close the session. */ protected static class CloseSessionThread extends Thread { protected final EmailSession session; protected Throwable exception = null; public CloseSessionThread(EmailSession session) { this.session = session; setDaemon(true); } public void run() { try { session.close(); } catch (Throwable e) { exception = e; } } public void finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to list all folders. */ protected static class ListFoldersThread extends Thread { protected final EmailSession session; protected String[] rval = null; protected Throwable exception = null; public ListFoldersThread(EmailSession session) { this.session = session; setDaemon(true); } public void run() { try { rval = session.listFolders(); } catch (Throwable e) { exception = e; } } public String[] finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } return rval; } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to check the connection. */ protected static class CheckConnectionThread extends Thread { protected final EmailSession session; protected Throwable exception = null; public CheckConnectionThread(EmailSession session) { this.session = session; setDaemon(true); } public void run() { try { session.checkConnection(); } catch (Throwable e) { exception = e; } } public void finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to open a folder. */ protected static class OpenFolderThread extends Thread { protected final EmailSession session; protected final String folderName; // Local folder protected Folder folder = null; protected Throwable exception = null; public OpenFolderThread(EmailSession session, String folderName) { this.session = session; this.folderName = folderName; setDaemon(true); } public void run() { try { folder = session.openFolder(folderName); } catch (Throwable e) { exception = e; } } public Folder finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } return folder; } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to close a folder. */ protected static class CloseFolderThread extends Thread { protected final EmailSession session; protected final Folder folder; // Local folder protected Throwable exception = null; public CloseFolderThread(EmailSession session, Folder folder) { this.session = session; this.folder = folder; setDaemon(true); } public void run() { try { session.closeFolder(folder); } catch (Throwable e) { exception = e; } } public void finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to get all messages from a folder. */ protected static class GetMessagesThread extends Thread { protected final EmailSession session; protected final Folder folder; // Local messages protected Message[] messages = null; protected Throwable exception = null; public GetMessagesThread(EmailSession session, Folder folder) { this.session = session; this.folder = folder; setDaemon(true); } public void run() { try { messages = session.getMessages(folder); } catch (Throwable e) { exception = e; } } public Message[] finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } return messages; } catch (InterruptedException e) { this.interrupt(); throw e; } } } /** Class to search for messages in a folder. */ protected static class SearchMessagesThread extends Thread { protected final EmailSession session; protected final Folder folder; protected final SearchTerm searchTerm; // Local messages protected Message[] messages = null; protected Throwable exception = null; public SearchMessagesThread(EmailSession session, Folder folder, SearchTerm searchTerm) { this.session = session; this.folder = folder; this.searchTerm = searchTerm; setDaemon(true); } public void run() { try { messages = session.search(folder, searchTerm); } catch (Throwable e) { exception = e; } } public Message[] finishUp() throws MessagingException, InterruptedException { try { join(); if (exception != null) { if (exception instanceof RuntimeException) throw (RuntimeException)exception; else if (exception instanceof Error) throw (Error)exception; else if (exception instanceof MessagingException) throw (MessagingException)exception; else throw new RuntimeException("Unknown exception type: "+exception.getClass().getName()+": "+exception.getMessage(),exception); } return messages; } catch (InterruptedException e) { this.interrupt(); throw e; } } } private static class EmailContent { private final String content; private final String mimeType; public EmailContent(final String content) { this.content = content; this.mimeType = EmailConfig.MIMETYPE_TEXT_PLAIN; } public EmailContent(final String content, final String mimetype) { this.content = content; this.mimeType = mimetype; } public String getContent() { return content; } public String getMimeType() { return mimeType; } } }