/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.contrib.mail.internal;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Locale;
import javax.inject.Inject;
import javax.inject.Named;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.internet.MailDateFormat;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeUtility;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.xwiki.component.annotation.Component;
import org.xwiki.contrib.mail.MailContent;
import org.xwiki.contrib.mail.MailItem;
import org.xwiki.contrib.mail.internal.util.GMailMailDateFormat;
import org.xwiki.contrib.mail.internal.util.Utils;
/**
* @version $Id$
*/
@Component
@Named("javamail")
public class JavamailMessageParser implements IMessageParser<Part>
{
public static final String DEFAULT_SUBJECT = "[no subject]";
@Inject
private Logger logger;
/**
* {@inheritDoc}
*
* @throws MessagingException
* @throws IOException
* @see org.xwiki.contrib.mail.internal.IMessageParser#parseHeaders(java.lang.Object)
*/
@Override
public MailItem parseHeaders(Part mail) throws MessagingException, IOException
{
MailItem m = new MailItem();
String[] headers;
String value = null;
value = extractSingleHeader(mail, "Message-ID");
value = Utils.cropId(value);
m.setMessageId(value);
value = extractSingleHeader(mail, "In-Reply-To");
value = Utils.cropId(value);
m.setReplyToId(value);
value = extractSingleHeader(mail, "References");
m.setRefs(value);
value = extractSingleHeader(mail, "Subject");
if (StringUtils.isBlank(value)) {
value = DEFAULT_SUBJECT;
}
value = value.replaceAll("[\n\r]", "").replaceAll(">", ">").replaceAll("<", "<");
m.setSubject(value);
// If topic is not provided, we use message subject without the beginning junk
value = extractSingleHeader(mail, "Thread-Topic");
if (StringUtils.isBlank(value)) {
value = m.getSubject().replaceAll("(?mi)([\\[\\(] *)?(RE|FWD?) *([-:;)\\]][ :;\\])-]*|$)|\\]+ *$", "");
} else {
value = Utils.removeCRLF(value);
}
m.setTopic(value);
// Topic Id : if none is provided, we use the message-id as topic id
value = extractSingleHeader(mail, "Thread-Index");
if (!StringUtils.isBlank(value)) {
value = Utils.cropId(value);
}
m.setTopicId(value);
value = extractSingleHeader(mail, "From");
value = value.replaceAll("\"", "").replaceAll("[\n\r]", "");
m.setFrom(value);
value = extractSingleHeader(mail, "Sender");
value = value.replaceAll("\"", "").replaceAll("[\n\r]", "");
m.setSender(value);
value = extractSingleHeader(mail, "To");
value = value.replaceAll("\"", "").replaceAll("[\n\r]", "");
m.setTo(value);
value = extractSingleHeader(mail, "CC");
value = value.replaceAll("\"", "").replaceAll("[\n\r]", "");
m.setCc(value);
// process the locale, if any provided
String locLang = "en";
String locCountry = "US";
String language;
headers = mail.getHeader("Content-Language");
if (headers != null) {
language = headers[0];
if (language != null && !language.isEmpty()) {
int index = language.indexOf('.');
if (index != -1) {
locLang = language.substring(0, index - 1);
locCountry = language.substring(index);
}
}
}
Locale locale = new Locale(locLang, locCountry);
m.setLocale(locale);
String date = "";
Date decodedDate = null;
headers = mail.getHeader("Date");
if (headers != null) {
date = headers[0];
}
// Decode the date
try {
logger.debug("Parsing date [" + date + "] with Javamail MailDateFormat");
decodedDate = new MailDateFormat().parse(date);
} catch (ParseException e) {
logger.debug("Could not parse date header " + ExceptionUtils.getRootCauseMessage(e));
decodedDate = null;
}
if (decodedDate == null) {
try {
logger.debug("Parsing date [" + date + "] with GMail parser");
decodedDate = new GMailMailDateFormat().parse(date);
} catch (ParseException e) {
logger.info("Could not parse date header with GMail parser " + ExceptionUtils.getRootCauseMessage(e));
decodedDate = new Date();
logger.info("Using 'now' as date as date could not be parsed");
}
}
m.setDate(decodedDate);
boolean firstInTopic = ("".equals(m.getReplyToId()));
m.setFirstInTopic(firstInTopic);
m.setOriginalMessage((Message) mail);
m.setBodypart(mail.getContent());
m.setContentType(mail.getContentType().toLowerCase());
String sensitivity = "normal";
headers = mail.getHeader("Sensitivity");
if (headers != null && !headers[0].isEmpty()) {
sensitivity = "normal";
}
m.setSensitivity(sensitivity.toLowerCase());
String importance = "normal";
headers = mail.getHeader("Importance");
if (importance == null || importance == "") {
importance = "normal";
}
m.setImportance(importance.toLowerCase());
// type
m.setBuiltinType("mail");
return m;
}
/**
* Gets a unique value from a mail header. If header is present more than once, only first value is returned. Value
* is Mime decoded, if needed. If header is not found, an empty string is returned.
*
* @param part
* @param name Header identifier.
* @return First header value, or empty string if not found.
* @throws MessagingException
*/
public static String extractSingleHeader(final Part part, final String name) throws MessagingException
{
String[] values = part.getHeader(name);
if (values != null && values.length > 0) {
try {
return MimeUtility.decodeText(values[0]);
} catch (UnsupportedEncodingException e) {
return values[0];
}
}
return "";
}
/**
* Extracts mail content, and manage attachments.
*
* @param part
* @return
* @throws MessagingException
* @throws IOException
* @throws UnsupportedEncodingException
*/
public MailContent extractMailContent(Part part) throws MessagingException, IOException
{
logger.debug("extractMailContent...");
if (part == null) {
return null;
}
MailContent mailContent = new MailContent();
if (part.isMimeType("application/pkcs7-mime") || part.isMimeType("multipart/encrypted")) {
logger.debug("Mail content is ENCRYPTED");
mailContent
.setText("<<<This e-mail part is encrypted. Text Content and attachments of encrypted e-mails are not published in Mail Archiver to avoid disclosure of restricted or confidential information.>>>");
mailContent
.setHtml("<i><<<This e-mail is encrypted. Text Content and attachments of encrypted e-mails are not published in Mail Archiver to avoid disclosure of restricted or confidential information.>>></i>");
mailContent.setEncrypted(true);
return mailContent;
} else {
mailContent = extractPartsContent(part);
}
// TODO : filling attachment cids and creating xwiki attachments should be done in same method
HashMap<String, String> attachmentsMap = fillAttachmentContentIds(mailContent.getAttachments());
String fileName = "";
for (MimeBodyPart currentbodypart : mailContent.getAttachments()) {
try {
String cid = currentbodypart.getContentID();
fileName = currentbodypart.getFileName();
// replace by correct name if filename was renamed (multiple attachments with same name)
if (attachmentsMap.containsKey(cid)) {
fileName = attachmentsMap.get(cid);
}
logger.debug("Treating attachment: " + fileName + " with contentid " + cid);
if (fileName == null) {
fileName = "file.ext";
}
if (fileName.equals("oledata.mso") || fileName.endsWith(".wmz") || fileName.endsWith(".emz")) {
logger.debug("Garbaging Microsoft crap !");
} else {
String disposition = currentbodypart.getDisposition();
String attcontentType = currentbodypart.getContentType().toLowerCase();
logger.debug("Treating attachment of type: " + attcontentType);
/*
* XWikiAttachment wikiAttachment = new XWikiAttachment(); wikiAttachment.setFilename(fileName);
* wikiAttachment.setContent(currentbodypart.getInputStream());
*/
MailAttachment wikiAttachment = new MailAttachment();
wikiAttachment.setCid(cid);
wikiAttachment.setFilename(fileName);
byte[] filedatabytes = IOUtils.toByteArray(currentbodypart.getInputStream());
wikiAttachment.setData(filedatabytes);
mailContent.addWikiAttachment(cid, wikiAttachment);
} // end if
} catch (Exception e) {
logger.warn("Attachment " + fileName + " could not be treated", e);
}
}
return mailContent;
}
/**
* Recursively extracts content of an email. Every Part that has a file name, or is neither multipart, plain text or
* html, is considered an attachment.
*
* @param part
* @return
* @throws MessagingException
* @throws UnsupportedEncodingException
* @throws IOException
*/
public MailContent extractPartsContent(Part part) throws MessagingException, IOException
{
MailContent mailContent = new MailContent();
String contentType = part.getContentType().toLowerCase();
if (!StringUtils.isBlank(part.getFileName())
|| (!contentType.startsWith("multipart/") && !part.isMimeType("text/plain") && !part
.isMimeType("text/html"))) {
mailContent.addAttachment((MimeBodyPart) part);
} else if (part.isMimeType("text/plain")) {
logger.debug("Extracting part PLAIN TEXT");
mailContent.appendText(MimeUtility.decodeText((String) part.getContent()));
} else if (part.isMimeType("text/html")) {
logger.debug("Extracting part HTML");
mailContent.appendHtml(MimeUtility.decodeText((String) part.getContent()));
} else if (part.isMimeType("message/rfc822")) {
logger.debug("Extracting part message/rfc822");
Message innerMessage = (Message) part.getContent();
mailContent.addAttachedMail(innerMessage);
// FIXME attached mails should be loaded previously to their container
} else if (contentType.startsWith("multipart/")) {
logger.debug("Extracting MULTIPART");
Multipart multipart = (Multipart) part.getContent();
if (contentType.startsWith("multipart/signed")) {
// Signed multiparts contain 2 parts: first is the content, second is the control information
// We just ignore the control information
logger.debug("Extracting SIGNED MULTIPART");
mailContent.append(extractPartsContent(multipart.getBodyPart(0)));
} else if (part.isMimeType("multipart/related") || part.isMimeType("multipart/mixed")
|| part.isMimeType("multipart/alternative")) {
logger.debug("Extracting multipart / related or mixed or alternative");
// FIXME multipart/alternative should be treated differently than other parts, though the same treatment
// should be ok most of the time
// (multipart/alternative is usually one part text/plain and the alternative text/html, so as text and
// html
// are always considered alternates by this algorithm, it's ok)
int i = 0;
int mcount = multipart.getCount();
while (i < mcount) {
logger.debug("Adding MULTIPART #{}", i);
try {
final MailContent innerMailContent = extractPartsContent(multipart.getBodyPart(i));
mailContent.append(innerMailContent);
} catch (Exception e) {
logger.warn("Could not add MULTIPART #{} because of {}", i, ExceptionUtils.getRootCause(e));
}
i++;
}
} else {
logger.info("Multipart subtype {} not managed", contentType.substring(0, contentType.indexOf(' ')));
}
} else {
logger.info("Message Type {} not managed", contentType.substring(0, contentType.indexOf('/')));
}
return mailContent;
}
/*
* Fills a map with key=contentId, value=filename of attachment
*/
public HashMap<String, String> fillAttachmentContentIds(ArrayList<MimeBodyPart> bodyparts)
{
HashMap<String, String> attmap = new HashMap<String, String>();
for (MimeBodyPart bodypart : bodyparts) {
String fileName = null;
String cid = null;
try {
fileName = bodypart.getFileName();
cid = bodypart.getContentID();
} catch (MessagingException e) {
logger.warn("Failed to retrieve attachment information", e);
}
if (!StringUtils.isBlank(cid) && fileName != null) {
logger.debug("fillAttachmentContentIds: Treating attachment: {} with contentid {}", fileName, cid);
String name = getAttachmentValidName(fileName);
int nb = 1;
if (!name.contains(".")) {
name += ".ext";
}
String newName = name;
while (attmap.containsValue(newName)) {
logger.debug("fillAttachmentContentIds: " + newName + " attachment already exists, renaming to "
+ name.replaceAll("(.*)\\.([^.]*)", "$1-" + nb + ".$2"));
newName = name.replaceAll("(.*)\\.([^.]*)", "$1-" + nb + ".$2");
nb++;
}
attmap.put(cid, newName);
} else {
logger.debug("fillAttachmentContentIds: content ID is null, nothing to do");
}
}
return attmap;
}
/*
* Returns a valid name for an attachment from its original name
*/
public String getAttachmentValidName(String afilename)
{
int i = afilename.lastIndexOf("\\");
if (i == -1) {
i = afilename.lastIndexOf("/");
}
String filename = afilename.substring(i + 1);
filename = filename.replaceAll("\\+", " ");
return filename;
}
}