/**
*
* Copyright 2013-2014 OpenSextant.org
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.opensextant.xtext.collectors.mailbox;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.mail.Flags;
import javax.mail.Message;
import javax.mail.MessagingException;
import org.opensextant.ConfigException;
import org.opensextant.util.TextUtils;
import org.opensextant.xtext.ConversionListener;
import org.opensextant.xtext.ConvertedDocument;
import org.opensextant.xtext.XText;
import org.opensextant.xtext.collectors.CollectionListener;
import org.opensextant.xtext.collectors.Collector;
import org.opensextant.xtext.converters.MessageConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The Class DefaultMailCrawl.
*/
public class DefaultMailCrawl extends MailClient implements ConversionListener, Collector {
/**
* A collection listener to consult as far as how to record the found & converted content
* as well as to determine what is worth saving.
*
*/
protected CollectionListener listener = null;
private final Logger log = LoggerFactory.getLogger(getClass());
/**
* Instantiates a new default mail crawl.
*
* @param cfg the cfg
* @param archive the archive
*/
public DefaultMailCrawl(MailConfig cfg, String archive) {
super(cfg, archive);
}
/** The Constant dateKeyFormat. */
final static SimpleDateFormat dateKeyFormat = new SimpleDateFormat("yyyyMMdd");
/**
* Creates the date folder.
*
* @param d date
* @return folder representing the date (e.g., collection date)
*/
protected File createDateFolder(Date d) {
String dateKey = dateKeyFormat.format(d.getTime());
String path = String.format("%s%s%s", archiveRoot, Collector.PATH_SEP, dateKey);
File dateFolder = new File(path);
if (!dateFolder.exists()) {
if (!dateFolder.mkdir()) {
return null;
}
}
return dateFolder;
}
/**
* Creates the message folder.
*
* @param parent parent container
* @param msgid message ID
* @return created folder that will contain the message and any related attachments.
*/
protected File createMessageFolder(File parent, String msgid) {
String path = String.format("%s%s%s", parent.getAbsolutePath(), Collector.PATH_SEP, msgid);
File msgFolder = new File(path);
if (!msgFolder.exists()) {
if (!msgFolder.mkdir()) {
return null;
}
}
return msgFolder;
}
/**
* Important that you set a listener if you want to see what was captured.
* As well as optimize future harvests. Listener tells the collector if the item in question was harvested or not.
* @param l listener to use
*/
public void setListener(CollectionListener l) {
listener = l;
}
/* (non-Javadoc)
* @see org.opensextant.xtext.collectors.mailbox.MailClient#setConverter(org.opensextant.xtext.XText)
*/
@Override
public void setConverter(XText conversionManager) {
converter = conversionManager;
if (converter != null) {
converter.setConversionListener(this);
}
}
/**
* Email parser, converter, recorder. This routine handles one message that
* may have a number of attachments (children)
*
* IOException is logged if handling of children documents+conversions fails.
* TODO: handleConversion should throw IOException or use listener to report errors for this document
*
* @param doc the doc
* @param filepath the filepath
*/
@Override
public void handleConversion(ConvertedDocument doc, String filepath) {
if (listener == null) {
// nothing to do.
return;
}
if (doc == null) {
log.debug("Item was not converted, FILE={}", filepath);
return;
}
try {
// Converted document is discovered, then enters this interface method.
//
// Parent doc will be ./A.eml
// Child Attachments will be ./A_eml/b.doc
//
listener.collected(doc, filepath);
if (doc.hasChildren()) {
// NOTE: our internal ID for children documents may not match what is preserved on disk in XText metadata.
//
for (ConvertedDocument child : doc.getChildren()) {
// This creates a new ID out of the parent doc id and the attachment filename.
String uniqueValue = String.format("%s,%s", doc.id, child.filename);
String _id = uniqueValue;
try {
_id = TextUtils.text_id(uniqueValue);
} catch (Exception err) {
log.error("hashing Error", err);
}
child.setId(_id);
// Record the child attachment.
listener.collected(child, child.filepath);
}
}
} catch (IOException err) {
log.error(
"Failed to record or manage the email message and/or its attachments, FILE={}",
filepath);
}
}
/**
* TODO:
*
* pull all mail messages,
* - create reasonable FILE.msg file name
* - use XText to iterate over each msg file for conversion
* - reimplement
*
* @throws IOException on failure to connect or collect.
*/
@Override
public void collect() throws IOException, ConfigException {
File dateFolder = createDateFolder(new Date());
if (dateFolder == null) {
log.error("Unable to create directory: " + dateFolder);
return;
}
Message[] messages = null;
try {
connect();
messages = getMessages();
if (messages == null) {
log.info("No messages available - Exiting MailClient now");
disconnect();
return;
}
} catch (MessagingException javaMailErr) {
throw new IOException("Unable to connect or get messages", javaMailErr);
}
int readCount = 0;
int totalCount = 0;
int available = messages.length;
int errCount = 0;
// Loop through all available messages
// Exit early if 10 'serious' errors are encountered.
//
for (Message message : messages) {
++totalCount;
// Exit if too many errors.
if (errCount > 10) {
break;
}
try {
if (config.doneReading(messages.length, readCount)) {
// Done here.
break;
}
/**
* Silently ignore deleted messages; items deleted while we were
* in session
*/
if (message.isExpunged()) {
log.info("Message deleted during session; Unable to collect. Mail Subj: {}",
message.getSubject());
continue;
}
boolean newMessage = !message.isSet(Flags.Flag.SEEN);
log.debug("Message Subject: " + message.getSubject() + " new?: " + newMessage);
boolean setForDeleteNow = false;
String subject = message.getSubject();
if (message.getSubject() == null) {
log.info("Empty message title MSG number=" + message.getMessageNumber());
//continue;
subject = "No_Subject";
}
if ((!config.isReadNewMessagesOnly() || newMessage)) {
// 1. Identify the email message.
// and determine if you need to capture it again.
//
String messageFilename = MessageConverter.createSafeFilename(subject);
if (messageFilename.length() > 60) {
messageFilename = messageFilename.substring(0, 60);
}
String msgId = MessageConverter.getMessageID(message);
if (msgId == null) {
log.error("How can a message ID be null? SUBJ={}", message.getSubject());
continue;
}
msgId = MessageConverter.getShorterMessageID(msgId);
try {
if (listener != null && listener.exists(msgId)) {
// You already collected this item. Ignoring.
//
continue;
}
} catch (Exception err1) {
log.error("Collection error with mail.", err1);
continue;
}
readCount++;
if (log.isDebugEnabled()) {
log.debug("Message: {}", message.getSubject());
String msg = String.format("Processing message: %s / %s of available: %s",
readCount, totalCount, available);
log.debug(msg);
}
// Save file in archive, Convert it, etc.
int status = saveMessageToFile(dateFolder, message, msgId, messageFilename);
if (status < 0) {
++errCount;
}
if (!config.isReadOnly() && config.isDeleteOnRead()) {
message.setFlag(Flags.Flag.DELETED, true);
String dbg = String.format("Processing message: %d / %d of available:%d",
readCount, totalCount, available);
log.debug(dbg);
setForDeleteNow = true;
}
}
// NOT a new message and we want to delete old
//
if (!newMessage && config.isDeleteOld() && !setForDeleteNow && !config.isReadOnly()) {
message.setFlag(Flags.Flag.DELETED, true);
log.debug("Deleting message: #{}", totalCount);
}
} catch (javax.mail.FolderClosedException connErr) {
++errCount;
} catch (MessagingException me) {
log.error("Failed to read messsage #{}", totalCount, me);
++errCount;
} catch (IOException writeErr) {
log.error("Failed to write msg.eml #{}", totalCount, writeErr);
++errCount;
}
}
// Well, if work was actually done but you fail to close the connection
// Its not a failure ... just make sure you figure out how to close cleanly.
// Error on close is likely rare.
try {
disconnect();
} catch (Exception javaMailErrOnClose) {
log.error("Unkosher disconnect", javaMailErrOnClose);
}
}
/**
* A very specific MESSAGE ->> FILE archiving method.
* Mail item will end up in:
*
* YYYYMMDD/MSGID/SUBJ.eml .. the original email.
* YYYYMMDD/MSGID/SUBJ_eml/ .. attachments here..
*
* @param dateFolder the date folder
* @param msg javamail message
* @param oid message ID
* @param fname file name to save message
* @return 0 on success, -1 on error
* @throws IOException unknown I/O error.
* @throws MessagingException the messaging exception
*/
protected int saveMessageToFile(File dateFolder, Message msg, String oid, String fname)
throws IOException, MessagingException {
OutputStream msgIO = null;
try {
File msgFolder = createMessageFolder(dateFolder, oid);
// Save the file and do the conversion
//
String msgFilepath = String.format("%s/%s.eml", msgFolder, fname);
File msgFile = new File(msgFilepath);
msgIO = new FileOutputStream(msgFile);
// Requirement: Write data to disk first, saving a ".eml" file.
msg.writeTo(msgIO);
// NOTE: here the act of converting the ".eml" file now invokes
// the default MessageConverter logic and finally calls this as the ConversionListener
//
converter.convertFile(msgFile);
return 0;
} catch (Exception msgErr) {
log.error("Failed reading, saving document", msgErr);
return -1;
} finally {
msgIO.close();
}
}
}