/* * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is the Kowari Metadata Store. * * The Initial Developer of the Original Code is Plugged In Software Pty * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002 * Plugged In Software Pty Ltd. All Rights Reserved. * * Contributor(s): N/A. * * [NOTE: The text of this Exhibit A may differ slightly from the text * of the notices in the Source Code files of the Original Code. You * should use the text of this Exhibit A rather than the text found in the * Original Code Source Code for Your Modifications.] * */ package org.mulgara.content.mbox.parser; // Java standard packages import java.io.*; import java.util.*; import java.util.regex.*; import java.net.URI; import java.net.MalformedURLException; // Java enterprise packages import javax.mail.*; import javax.mail.internet.*; // Third party packages import org.apache.log4j.*; // Other quoll tool packages import org.mulgara.content.mbox.parser.exception.*; import org.mulgara.util.TempDir; /** * A tool for splitting an internet <acronym title="Multipurpose Internet Mail * Extensions">MIME</acronym> into its parts (body and attachments, not * headers). * * @created 2003-03-07 * * @author <a href="http://staff.pisoftware.com/raboczi"/>Simon Raboczi</a> * @author Mark Ludlow * * @version $Revision: 1.8 $ * * @modified $Date: 2005/01/05 04:57:41 $ * * @maintenanceAuthor $Author: newmana $ * * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A> * * @copyright © 2003 * <A href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</A> * * @licence <A href="{@docRoot}/LICENCE">License description</A> */ public final class MimeMessageToPart { /** The category to log to. */ private static final Logger logger = Logger.getLogger(MimeMessageToPart.class); /** The mbox splitter's session */ private Session session; /** The mail message store */ private Store store; /** The main folder of the store */ private Folder folder; /** Flag to indicate whether we are using a chached file or not */ private boolean cachedFile; /** Pointer to the message we are currently reading */ private int messagePointer; /** URI pointing to the content file */ private URI contentURI; /** Number of messages in the mbox */ private int messageCount; /** * Constructor. */ public MimeMessageToPart() { } /** * Initialises the mail store and associated folders using the given uri and * content stream. * * @param uri The uri of the mbox * @param contentStream A stream which contains the actual mbox data * * @throws IOProcessingException * @throws MimeMessageProcessException */ public void initialise(URI uri, InputStream contentStream) throws IOProcessingException, MimeMessageProcessException { // Validate arguments if (uri == null) { throw new IllegalArgumentException("Null input uri"); } // Boolean to flag if we used a cached file cachedFile = false; // Store the uri contentURI = uri; if (!uri.getScheme().equals("file")) { // If we have a non-file protocol then we need to cache the content before // parsing it into messages uri = cacheFile(contentStream, uri); // Set that we are using a cached file cachedFile = true; } // Create the session to our mbox session = Session.getDefaultInstance(new Properties()); try { // Obtain the store representing our mbox store = new MboxStoreImpl(session, new URLName(uri.toURL())); } catch (MalformedURLException malformedURLException) { throw new IOProcessingException("Could not create URL from mbox uri: " + contentURI, malformedURLException); } try { // Obtain the inbox folder folder = store.getDefaultFolder(); } catch (MessagingException messagingException) { throw new MimeMessageProcessException("Failed to obtain default folder " + "from mbox: " + contentURI, messagingException); } try { // store the number of messages in the mbox messageCount = folder.getMessageCount(); } catch (MessagingException messagingException) { throw new MimeMessageProcessException("Failed to obtain message count " + "for mbox: " + contentURI, messagingException); } if (logger.isDebugEnabled()) { logger.debug("Found " + messageCount + " messages in folder"); } // Reset the current message pointer messagePointer = 0; } /** * Retrieves the next message from the mbox. * * @return The next message in the mbox's bean * * @throws UnsupportedEncodingProcessingException * @throws MimeMessageProcessException * @throws IOProcessingException */ public MimeMessageToPartBean processNextMessage() throws UnsupportedEncodingProcessingException, MimeMessageProcessException, IOProcessingException { if (messagePointer >= messageCount) { // If we have reached the end of the available messages then stop reading return null; } // Create a bean to store the information from the message MimeMessageToPartBean bean = new MimeMessageToPartBean(); // Container for our message Part part = null; try { // Get the single message and write it out part = folder.getMessage(messagePointer); } catch (MessagingException messagingException) { throw new MimeMessageProcessException("Failed to find next message in " + "mbox", messagingException); } try { if (part instanceof MimeMessage) { if (logger.isDebugEnabled()) { logger.debug("Part was a mime message. Now processing: " + part.getDescription()); } MimeMessage mimeMessage = (MimeMessage) part; // BCC recipients try { Address[] recipients = mimeMessage.getRecipients(Message.RecipientType.BCC); if (recipients != null) { for (int i = 0; i < recipients.length; i++) { bean.addBCCAddress(makeQuollEmailAddress(recipients[i])); } } } catch (AddressException e) { // discard misformatted header and press on } // CC recipients try { Address[] recipients = mimeMessage.getRecipients(Message.RecipientType.CC); if (recipients != null) { for (int i = 0; i < recipients.length; i++) { bean.addCCAddress(makeQuollEmailAddress(recipients[i])); } } } catch (AddressException e) { // discard misformatted header and press on } // Date bean.setDate(mimeMessage.getSentDate()); // From try { Address[] fromAddresses = mimeMessage.getFrom(); if (fromAddresses != null) { switch (fromAddresses.length) { case 1: // No @ symbol found - probably exchange alias if (fromAddresses[0].toString().indexOf('@') == -1) { /* * All the code in this conditional block is kludgery to handle * the compounded effects of previous kludgery elsewhere in the * system. Uniaccess's output of Lotus Notes address headers * conflicts with the output for internet headers, particularly * with regard to the separator character. The unit test data * had quotes manually inserted around the addresses in which * this occurred, to avoid confusing JavaMail with embedded * commas. */ String[] rawFromAddresses = mimeMessage.getHeader("From"); if (rawFromAddresses.length == 1) { // Strip quotes Matcher matcher = Pattern.compile("\"([^\"]*)\"").matcher( rawFromAddresses[0]); if (matcher.matches()) { rawFromAddresses[0] = matcher.group(1); } bean.setFromAddress(new QuollEmailAddress( rawFromAddresses[0])); } else { StringBuffer buffer = new StringBuffer(); if (rawFromAddresses.length > 0) { buffer.append(rawFromAddresses[0]); for (int i = 1; i < rawFromAddresses.length; i++) { buffer.append(" ").append(rawFromAddresses[i]); } } logger.debug( "Message with " + rawFromAddresses.length + " FROM headers: " + buffer ); bean.setFromAddress(new QuollEmailAddress(buffer. toString())); } } else { bean.setFromAddress(makeQuollEmailAddress(fromAddresses[0])); } case 0: break; default: StringBuffer buffer = new StringBuffer(); if (fromAddresses.length > 0) { buffer.append(fromAddresses[0]); for (int i = 1; i < fromAddresses.length; i++) { buffer.append(" ").append(fromAddresses[i]); } } logger.debug( "Message with " + fromAddresses.length + " senders: " + buffer ); bean.setFromAddress(new QuollEmailAddress(buffer.toString())); } } } catch (AddressException e) { // discard misformatted header and press on } // Message ID bean.setMessageID(mimeMessage.getMessageID()); // Subject bean.setSubject(mimeMessage.getSubject()); // Reference ID's String[] refs = mimeMessage.getHeader("references"); if (refs != null) { StringTokenizer refsTok = new StringTokenizer(refs[0], "<> \t", false); int count = refsTok.countTokens(); for (int i = 1; i <= count; i++) { String ref = refsTok.nextToken(); if (ref != null) { bean.addReference(ref); } } } // To recipients try { Address[] recipients = mimeMessage.getRecipients(Message.RecipientType.TO); if (recipients != null) { for (int i = 0; i < recipients.length; i++) { bean.addToAddress(makeQuollEmailAddress(recipients[i])); } } } catch (AddressException e) { // discard misformatted header and press on } } else { throw new MimeMessageProcessException("Can't process " + part.getClass()); } // Container for our content Object content = null; try { // Try to get the content of the message. This is prone to fail if // the Content-Type header's charset value ends with a semi-colon so // we need to catch runtime exceptions in order to avoid crashing on // such messages content = part.getContent(); if (logger.isDebugEnabled()) { logger.debug("Type of content is: " + content.getClass()); } } catch (RuntimeException runtimeException) { // We do not want to halt progress of the processing so just warn and // ignore logger.warn("Failed to get content of part '" + bean.getMessageID() + "' due to bad header formatting", runtimeException); } // Populate content if (content instanceof Multipart) { if (logger.isDebugEnabled()) { logger.debug("Getting parts of multipart object."); } // Split into subparts Multipart multipart; multipart = (Multipart) part.getContent(); try { for (int i = 0; i < multipart.getCount(); i++) { BodyPart bodyPart = multipart.getBodyPart(i); // This is a kludge to handle spurious empty parts caused by // trailing blank lines; any part lacking a Content-Type header // is assumed to be spurious String[] rawContentType = bodyPart.getHeader("Content-Type"); if (rawContentType == null) { continue; } // Get the details of the attachment findAttachmentDetails(bean, bodyPart); } } catch (javax.mail.MessagingException e) { // This is a kludge to handle the case where a multipart/* message // has zero attachments; Uniaccess tends to generate such mboxes // from Lotus Notes calendars. if (e.getMessage().equals("Missing start boundary")) { // We ignore this problem and just go to the next message } else throw e; } } else { // Get the details of the attachment //findAttachmentDetails(beans[j], part); } if (logger.isDebugEnabled()) { logger.debug("Created a message bean: " + bean); } // Increment our message pointer to the next message messagePointer++; // Return the list of generated mime message beans return bean; } catch (javax.mail.MessagingException me) { throw new MimeMessageProcessException( "Couldn't process the message into " + "parts", me); } catch (java.io.UnsupportedEncodingException uee) { throw new UnsupportedEncodingProcessingException("Could decode message", uee); } catch (IOException e) { throw new IOProcessingException("Couldn't read/write the message", e); } } /** * Closes all associated services to the message splitter and releases any * resources. * * @throws MimeMessageProcessException */ public void close() throws MimeMessageProcessException { if (cachedFile) { // If we used a cached file then clean it up tidyCachedFile(contentURI); } // Clear global variables session = null; store = null; try { // Close the folder.close(true); } catch (MessagingException messagingException) { throw new MimeMessageProcessException("Failed to close default folder " + "for mbox: " + contentURI, messagingException); } // Clear the folder object folder = null; } /** * Calculate the size, mime type, and filename of the given attachment. * * @param bean The container of information about a particular message * @param part The part we are finding the details of */ private void findAttachmentDetails(MimeMessageToPartBean bean, Part part) throws IOException, javax.mail.MessagingException { // Create an attachment to store our part data Attachment attachment = new Attachment(); // Set the filename attachment.setFilename(part.getFileName()); // Set the size attachment.setSize(part.getSize()); // Set the mime type attachment.setMimeType(part.getContentType()); if (logger.isDebugEnabled()) { logger.debug("Added attachment: [" + attachment.getFilename() + ", " + attachment.getMimeType() + ", " + attachment.getSize() + "]"); } // record the details of the file bean.addPart(attachment); } /** * Make a quoll email address from a javax.mail.Address. * * @param address The original address. * @return The quoll email address. */ private QuollEmailAddress makeQuollEmailAddress(Address address) { // Set address and personal name seperately if possible if (address instanceof InternetAddress) { InternetAddress internetAddress = (InternetAddress) address; QuollEmailAddress result = new QuollEmailAddress(internetAddress. getAddress()); result.setPersonalName(internetAddress.getPersonal()); return result; } else { return new QuollEmailAddress(address.toString()); } } /** * Caches the content of the input stream locally and returns the URI pointer * to the cached file. * * @param contentStream The stream that contains the file to cache * @param originalUri The original uri of the file * * @return The URI to the cached file * * @throws IOProcessingException */ private URI cacheFile(InputStream contentStream, URI originalUri) throws IOProcessingException { // Create a cache directory in the temporary directory File cacheDir = new File(TempDir.getTempDir(), "mbox-cache"); // Container for the original file name String filename = null; try { // Obtain the original file name filename = originalUri.toURL().getFile(); } catch (MalformedURLException malformedURLException) { throw new IOProcessingException("Failed to obtain filename from URI: " + originalUri.toString(), malformedURLException); } // Remove the leading slash filename = filename.substring(1, filename.length()); // Create the directory cacheDir.mkdirs(); // Create a cached version of the message as a local file File cacheFile = new File(cacheDir, "mbox-message-" + filename + ".mbox"); // Container for our print writer PrintWriter writer = null; try { // Open a writer to the cached file writer = new PrintWriter(new FileWriter(cacheFile)); } catch (IOException ioException) { throw new IOProcessingException("Failed to open writer to " + "temporary cache file: " + cacheFile.getAbsolutePath(), ioException); } // Create a reader for the original stream BufferedReader reader = new BufferedReader(new InputStreamReader( contentStream)); // Container for our current line String line = null; try { // Get the first line of the stream line = reader.readLine(); } catch (IOException ioException) { throw new IOProcessingException("Could not read from source stream: " + originalUri.toString(), ioException); } // Read the stream line by line until we encounter its end while (line != null) { // Write out the next line of the stream writer.println(line); try { // Get the next line of the stream line = reader.readLine(); } catch (IOException ioException) { throw new IOProcessingException("Could not read from source stream: " + originalUri.toString(), ioException); } } // Close the writer writer.flush(); writer.close(); try { // Close the stream reader.close(); } catch (IOException ioException) { logger.warn("Ignoring failure to close input stream from mbox: " + originalUri.toString(), ioException); // We can't do much about streams that refuse to close, so as long as we // have our data we can safely ignore the problem. } return cacheFile.toURI(); } /** * Removes a cached file from the given URI. * * @param uri The URI of the cached file to remove */ public void tidyCachedFile(URI uri) { // Create a file from the URI File cacheFile = new File(uri); // Assume we can delete the file boolean couldDelete = true; try { // Remove the file couldDelete = cacheFile.delete(); } catch (SecurityException securityException) { // Log the problem but ignore it as we can't do anything about it logger.warn("Unable to gain access for deletion of " + cacheFile.getAbsolutePath(), securityException); } if (!couldDelete) { // Log the problem but ignore it as we can't do anything about it logger.warn("Failed to delete [" + cacheFile.getAbsolutePath() + "] for an unknown reason."); } } }