/** * Copyright (c) 2011-2012 Optimax Software Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of Optimax Software, ElasticInbox, nor the names * of its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ package com.elasticinbox.core.message; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Iterator; import java.util.Properties; import javax.mail.MessagingException; import javax.mail.Part; import javax.mail.Session; import javax.mail.internet.InternetAddress; import javax.mail.internet.MimeMessage; import javax.mail.internet.MimeMessage.RecipientType; import javax.mail.internet.MimeMultipart; import javax.mail.internet.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.elasticinbox.common.utils.Assert; import com.elasticinbox.core.model.Address; import com.elasticinbox.core.model.AddressList; import com.elasticinbox.core.model.Message; import com.elasticinbox.core.model.MimePart; import com.google.common.io.CharStreams; import com.google.common.io.LimitInputStream; public final class MimeParser { public final static String MIME_HEADER_SPAM = "X-Spam-Flag"; /** Read 100K if cannot parse body */ private final static int MAX_UNPARSED_PART_SIZE = 50 * 1024; /** Used when encoding is unknown */ private final static String DEFAULT_ENCODING = "ISO-8859-1"; private static Properties props = new Properties(); private Message message; private MimeMessage mimeMessage; private StringBuilder textBody = new StringBuilder(); private StringBuilder htmlBody = new StringBuilder(); private static final Logger logger = LoggerFactory .getLogger(MimeParser.class); static { // Make JavaMail parser more error tolerant // see http://javamail.kenai.com/nonav/javadocs/javax/mail/internet/package-summary.html#package_description props.setProperty("mail.mime.address.strict", "false"); props.setProperty("mail.mime.decodetext.strict", "false"); props.setProperty("mail.mime.decodefilename", "true"); props.setProperty("mail.mime.decodeparameters", "true"); props.setProperty("mail.mime.charset", "utf-8"); props.setProperty("mail.mime.parameters.strict", "false"); props.setProperty("mail.mime.base64.ignoreerrors", "true"); props.setProperty("mail.mime.uudecode. ignoreerrors", "true"); props.setProperty("mail.mime.uudecode.ignoremissingbeginend", "true"); props.setProperty("mail.mime.multipart.allowempty", "true"); props.setProperty("mail.mime.ignoreunknownencoding", "true"); props.setProperty("mail.mime.ignoremultipartencoding", "false"); props.setProperty("mail.mime.allowencodedmessages", "true"); // Some of JavaMail properties should be set on System level for (Iterator<Object> iter = props.keySet().iterator(); iter.hasNext();) { String key = (String) iter.next(); System.setProperty(key, (String) props.get(key)); } } public MimeParser() { // } public MimeParser(InputStream in) throws IOException, MimeParserException { parse(in); } /** * Parse {@link InputStream} into {@link Message} structure * * @param in * @throws IOException * @throws MimeParserException */ public void parse(InputStream in) throws IOException, MimeParserException { this.message = new Message(); Session session = Session.getDefaultInstance(props); try { this.mimeMessage = new MimeMessage(session, in); this.message.setFrom(getAddressList(mimeMessage.getFrom())); this.message.setTo(getAddressList(mimeMessage.getRecipients(RecipientType.TO))); this.message.setCc(getAddressList(mimeMessage.getRecipients(RecipientType.CC))); this.message.setBcc(getAddressList(mimeMessage.getRecipients(RecipientType.BCC))); this.message.setSubject(mimeMessage.getSubject()); this.message.setMessageId(mimeMessage.getMessageID()); this.message.setReplyTo(getAddressList(mimeMessage.getReplyTo())); this.message.setDate(mimeMessage.getSentDate()); //this.message.setSize((long) mimeMessage.getSize()); // extract necessary minor headers // TODO: This should be replaced by filters in future message.addMinorHeader(MIME_HEADER_SPAM, mimeMessage.getHeader(MIME_HEADER_SPAM, null)); // extract mime parts and body parseMessagePart(mimeMessage, ""); } catch (MessagingException e) { logger.error("Unable to parse MIME message: ", e); throw new MimeParserException(e.getMessage()); } if (this.htmlBody.length() > 0) { message.setHtmlBody(this.htmlBody.toString()); } if (this.textBody.length() > 0) { message.setPlainBody(this.textBody.toString()); } } public Message getMessage() throws IOException { return message; } /** * Get InputStream of MIME part identified by Part ID * * @param contentId * @return * @throws MimeParserException */ public InputStream getInputStreamByPartId(String partId) throws MimeParserException { Assert.notNull(this.mimeMessage, "No message was processed. Initialize first."); message.getPart(partId); // make sure that part exists, otherwise IAE will be thrown MimeMultipart mp; Object content; InputStream in = null; // find based on Part ID eg. 1.2.3 try { mp = (MimeMultipart) this.mimeMessage.getContent(); String[] partNums = partId.split("\\."); // loop through parts to reach the final part for (int i = 0; i < partNums.length; i++) { int localPartId = Integer.parseInt(partNums[i]) - 1; content = mp.getBodyPart(localPartId).getContent(); if (content instanceof MimeMultipart) { mp = (MimeMultipart) content; } else if ((content instanceof String) || (content instanceof InputStream) || (content instanceof MimeMessage)) { in = mp.getBodyPart(localPartId).getInputStream(); } else { // normally, we should never get here // perhaps bad Part ID throw new MessagingException("MIME part not found"); } } } catch (IOException e) { throw new MimeParserException("Unable to extract attachment from the message: " + e.getMessage()); } catch (MessagingException e) { throw new IllegalArgumentException("Message does not contain part with ID " + partId); } return in; } /** * Get InputStream of MIME part identified by Content-ID * * @param contentId * @return * @throws MimeParserException */ public InputStream getInputStreamByContentId(String contentId) throws MimeParserException { Assert.notNull(this.mimeMessage, "No message was processed. Initialize first."); // lookup part ID and make sure that part exists. IAE will be thrown otherwise. String partId = message.getPartByContentId(contentId).getPartId(); return getInputStreamByPartId(partId); } /** * Recursively walk through parsed MIME message and extract parts info * * @throws IOException * @throws MessagingException */ private void parseMessagePart(Part part, String partId) throws IOException, MessagingException { Object content = null; // decode part try { content = part.getContent(); } catch (UnsupportedEncodingException uee) { // TODO: make better handling of unsupported encodings, perhaps using jcharset detector if (part.isMimeType("text/*")) { // decode text using default encoding for all unknown encodings logger.warn("Parser detected unsupported encoding: {}. Will try decoding with {}", uee.getMessage(), DEFAULT_ENCODING); InputStream in = part.getInputStream(); content = CharStreams.toString(new InputStreamReader(in, DEFAULT_ENCODING)); } else { logger.error("Parser detected unsupported encoding: {}. Unparsed part will be used.", uee.getMessage()); content = readUnparsedPart(part.getInputStream()); } } catch (Exception e) { // Content-Type is malformed if we got here logger.warn("Unable to parse Content-Type. Thrown by {}: {}. Unparsed part will be used.", e.getClass(), e.getMessage()); content = readUnparsedPart(part.getInputStream()); } logger.debug("Parsing part {} with mime type {}.", (partId.isEmpty()) ? "message" : partId, part.getContentType()); if (content instanceof String) { // simple part with text String dis = null; try { dis = part.getDisposition(); } catch (ParseException e) { // if parsing of disposition string failed, assume part an attachment dis = Part.ATTACHMENT; } logger.trace("MIME parser extracted TEXT part: {}", (String) content); if ((dis != null) && dis.equals(Part.ATTACHMENT)) { // add text part as attachment message.addPart(partId, new MimePart(part)); } else { // if no disposition, then add to message body if(part.isMimeType("text/html")) { htmlBody.append((String) content); } else { textBody.append((String) content); } } } else if (content instanceof MimeMultipart) { MimeMultipart multipart = (MimeMultipart) content; for (int i = 0; i < multipart.getCount(); i++) { // build next part id StringBuilder nextPartId = new StringBuilder(partId); // add period if not at root level if (!partId.isEmpty()) nextPartId.append("."); int localPartId = i+1; // IMAPv4 MIME part counter starts from 1 nextPartId.append(localPartId); Part nextPart = multipart.getBodyPart(i); parseMessagePart(nextPart, nextPartId.toString()); } } else if ((content instanceof InputStream) || (content instanceof MimeMessage)) { // binary, message/rfc822 or text attachment message.addPart(partId, new MimePart(part)); } else { throw new MessagingException("Unkonwn message part type " + content.getClass().getName()); } } /** * Get AddressList from JavaMail Address array * * @param mailboxes MailboxList * @return AddressList * @throws IllegalArgumentException */ private static AddressList getAddressList(javax.mail.Address[] al) throws IllegalArgumentException { if (al == null) return null; ArrayList<Address> addresses = new ArrayList<Address>(); for (int i = 0; i < al.length; i++) { InternetAddress ia = (InternetAddress) al[i]; Address a = new Address(ia.getPersonal(), ia.getAddress()); addresses.add(a); } return new AddressList(addresses); } /** * Read unparsed part. * * @param in * @return * @throws UnsupportedEncodingException * @throws IOException */ private static String readUnparsedPart(InputStream in) throws UnsupportedEncodingException, IOException { return CharStreams.toString(new InputStreamReader(new LimitInputStream( in, MAX_UNPARSED_PART_SIZE), DEFAULT_ENCODING)); } }