MimeMessageToPart.java example

Explorer
mulgara-master
- src
  - jar
  - war
    - server-http
      - java
        HttpServer.java
        HttpServerServlet.java
- tools
  - src
    - org
      - mulgara
        tools
        Sparql.java
        Tql.java
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Kowari Metadata Store.
 *
 * The Initial Developer of the Original Code is Plugged In Software Pty
 * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
 * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
 * Plugged In Software Pty Ltd. All Rights Reserved.
 *
 * Contributor(s): N/A.
 *
 * [NOTE: The text of this Exhibit A may differ slightly from the text
 * of the notices in the Source Code files of the Original Code. You
 * should use the text of this Exhibit A rather than the text found in the
 * Original Code Source Code for Your Modifications.]
 *
 */

package org.mulgara.content.mbox.parser;

// Java standard packages
import java.io.*;
import java.util.*;
import java.util.regex.*;
import java.net.URI;
import java.net.MalformedURLException;

// Java enterprise packages
import javax.mail.*;
import javax.mail.internet.*;

// Third party packages
import org.apache.log4j.*;

// Other quoll tool packages
import org.mulgara.content.mbox.parser.exception.*;
import org.mulgara.util.TempDir;

/**
 * A tool for splitting an internet <acronym title="Multipurpose Internet Mail
 * Extensions">MIME</acronym> into its parts (body and attachments, not
 * headers).
 *
 * @created 2003-03-07
 *
 * @author <a href="http://staff.pisoftware.com/raboczi"/>Simon Raboczi</a>
 * @author Mark Ludlow
 *
 * @version $Revision: 1.8 $
 *
 * @modified $Date: 2005/01/05 04:57:41 $
 *
 * @maintenanceAuthor $Author: newmana $
 *
 * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
 *
 * @copyright © 2003
 *   <A href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</A>
 *
 * @licence <A href="{@docRoot}/LICENCE">License description</A>
 */

public final class MimeMessageToPart {

  /** The category to log to. */
  private static final Logger logger = Logger.getLogger(MimeMessageToPart.class);

  /** The mbox splitter's session */
  private Session session;

  /** The mail message store */
  private Store store;

  /** The main folder of the store */
  private Folder folder;

  /** Flag to indicate whether we are using a chached file or not */
  private boolean cachedFile;

  /** Pointer to the message we are currently reading */
  private int messagePointer;

  /** URI pointing to the content file */
  private URI contentURI;

  /** Number of messages in the mbox */
  private int messageCount;

  /**
   * Constructor.
   */
  public MimeMessageToPart() {

  }

  /**
   * Initialises the mail store and associated folders using the given uri and
   * content stream.
   *
   * @param uri The uri of the mbox
   * @param contentStream A stream which contains the actual mbox data
   *
   * @throws IOProcessingException
   * @throws MimeMessageProcessException
   */
  public void initialise(URI uri, InputStream contentStream) throws
      IOProcessingException, MimeMessageProcessException {

    // Validate arguments
    if (uri == null) {

      throw new IllegalArgumentException("Null input uri");
    }

    // Boolean to flag if we used a cached file
    cachedFile = false;

    // Store the uri
    contentURI = uri;

    if (!uri.getScheme().equals("file")) {

      // If we have a non-file protocol then we need to cache the content before
      // parsing it into messages
      uri = cacheFile(contentStream, uri);

      // Set that we are using a cached file
      cachedFile = true;
    }

    // Create the session to our mbox
    session = Session.getDefaultInstance(new Properties());

    try {

      // Obtain the store representing our mbox
      store = new MboxStoreImpl(session, new URLName(uri.toURL()));
    } catch (MalformedURLException malformedURLException) {

      throw new IOProcessingException("Could not create URL from mbox uri: " +
                                      contentURI, malformedURLException);
    }

    try {

      // Obtain the inbox folder
      folder = store.getDefaultFolder();
    } catch (MessagingException messagingException) {

      throw new MimeMessageProcessException("Failed to obtain default folder " +
                                            "from mbox: " + contentURI,
                                            messagingException);
    }

    try {

      // store the number of messages in the mbox
      messageCount = folder.getMessageCount();
    } catch (MessagingException messagingException) {

      throw new MimeMessageProcessException("Failed to obtain message count " +
                                            "for mbox: " + contentURI,
                                            messagingException);

    }


    if (logger.isDebugEnabled()) {

       logger.debug("Found " + messageCount + " messages in folder");
    }

    // Reset the current message pointer
    messagePointer = 0;
  }

  /**
   * Retrieves the next message from the mbox.
   *
   * @return The next message in the mbox's bean
   *
   * @throws UnsupportedEncodingProcessingException
   * @throws MimeMessageProcessException
   * @throws IOProcessingException
   */
  public MimeMessageToPartBean processNextMessage() throws
      UnsupportedEncodingProcessingException, MimeMessageProcessException,
      IOProcessingException {

    if (messagePointer >= messageCount) {

      // If we have reached the end of the available messages then stop reading
      return null;
    }

    // Create a bean to store the information from the message
    MimeMessageToPartBean bean = new MimeMessageToPartBean();

    // Container for our message
    Part part = null;

    try {

      // Get the single message and write it out
      part = folder.getMessage(messagePointer);
    } catch (MessagingException messagingException) {

      throw new MimeMessageProcessException("Failed to find next message in " +
                                            "mbox", messagingException);
    }

    try {

      if (part instanceof MimeMessage) {

        if (logger.isDebugEnabled()) {

          logger.debug("Part was a mime message.  Now processing: " +
                       part.getDescription());
        }

        MimeMessage mimeMessage = (MimeMessage) part;

        // BCC recipients
        try {

          Address[] recipients =
              mimeMessage.getRecipients(Message.RecipientType.BCC);

          if (recipients != null) {

            for (int i = 0; i < recipients.length; i++) {

              bean.addBCCAddress(makeQuollEmailAddress(recipients[i]));
            }
          }
        } catch (AddressException e) {
          // discard misformatted header and press on
        }

        // CC recipients
        try {

          Address[] recipients =
              mimeMessage.getRecipients(Message.RecipientType.CC);

          if (recipients != null) {

            for (int i = 0; i < recipients.length; i++) {

              bean.addCCAddress(makeQuollEmailAddress(recipients[i]));
            }
          }
        } catch (AddressException e) {

          // discard misformatted header and press on
        }

        // Date
        bean.setDate(mimeMessage.getSentDate());

        // From
        try {
          Address[] fromAddresses = mimeMessage.getFrom();
          if (fromAddresses != null) {

            switch (fromAddresses.length) {

              case 1:

                // No @ symbol found - probably exchange alias
                if (fromAddresses[0].toString().indexOf('@') == -1) {

                  /*
                   * All the code in this conditional block is kludgery to handle
                   * the compounded effects of previous kludgery elsewhere in the
                   * system.  Uniaccess's output of Lotus Notes address headers
                   * conflicts with the output for internet headers, particularly
                   * with regard to the separator character.  The unit test data
                   * had quotes manually inserted around the addresses in which
                   * this occurred, to avoid confusing JavaMail with embedded
                   * commas.
                   */
                  String[] rawFromAddresses = mimeMessage.getHeader("From");
                  if (rawFromAddresses.length == 1) {

                    // Strip quotes
                    Matcher matcher =
                        Pattern.compile("\"([^\"]*)\"").matcher(
                        rawFromAddresses[0]);

                    if (matcher.matches()) {

                      rawFromAddresses[0] = matcher.group(1);
                    }

                    bean.setFromAddress(new QuollEmailAddress(
                        rawFromAddresses[0]));
                  } else {

                    StringBuffer buffer = new StringBuffer();

                    if (rawFromAddresses.length > 0) {

                      buffer.append(rawFromAddresses[0]);

                      for (int i = 1; i < rawFromAddresses.length; i++) {

                        buffer.append(" ").append(rawFromAddresses[i]);
                      }
                    }
                    logger.debug(
                        "Message with " + rawFromAddresses.length +
                        " FROM headers: " +
                        buffer
                        );

                    bean.setFromAddress(new QuollEmailAddress(buffer.
                        toString()));
                  }
                } else {

                  bean.setFromAddress(makeQuollEmailAddress(fromAddresses[0]));
                }
              case 0:
                break;
              default:
                StringBuffer buffer = new StringBuffer();
                if (fromAddresses.length > 0) {

                  buffer.append(fromAddresses[0]);

                  for (int i = 1; i < fromAddresses.length; i++) {

                    buffer.append(" ").append(fromAddresses[i]);
                  }
                }
                logger.debug(
                    "Message with " + fromAddresses.length + " senders: " +
                    buffer
                    );

                bean.setFromAddress(new QuollEmailAddress(buffer.toString()));
            }
          }
        } catch (AddressException e) {

          // discard misformatted header and press on
        }

        // Message ID
        bean.setMessageID(mimeMessage.getMessageID());

        // Subject
        bean.setSubject(mimeMessage.getSubject());

        // Reference ID's
        String[] refs = mimeMessage.getHeader("references");

        if (refs != null) {

          StringTokenizer refsTok = new StringTokenizer(refs[0], "<> \t", false);
          int count = refsTok.countTokens();

          for (int i = 1; i <= count; i++) {

            String ref = refsTok.nextToken();

            if (ref != null) {

              bean.addReference(ref);
            }
          }
        }

        // To recipients
        try {

          Address[] recipients =
              mimeMessage.getRecipients(Message.RecipientType.TO);

          if (recipients != null) {

            for (int i = 0; i < recipients.length; i++) {

              bean.addToAddress(makeQuollEmailAddress(recipients[i]));
            }
          }
        } catch (AddressException e) {

          // discard misformatted header and press on
        }
      } else {

        throw new MimeMessageProcessException("Can't process " +
                                              part.getClass());
      }

      // Container for our content
      Object content = null;

      try {

        // Try to get the content of the message.  This is prone to fail if
        // the Content-Type header's charset value ends with a semi-colon so
        // we need to catch runtime exceptions in order to avoid crashing on
        // such messages
        content = part.getContent();

        if (logger.isDebugEnabled()) {

          logger.debug("Type of content is: " + content.getClass());
        }
      } catch (RuntimeException runtimeException) {

        // We do not want to halt progress of the processing so just warn and
        // ignore
        logger.warn("Failed to get content of part '" + bean.getMessageID() +
                    "' due to bad header formatting", runtimeException);
      }

      // Populate content
      if (content instanceof Multipart) {

        if (logger.isDebugEnabled()) {

          logger.debug("Getting parts of multipart object.");
        }

        // Split into subparts
        Multipart multipart;
        multipart = (Multipart) part.getContent();
        try {

          for (int i = 0; i < multipart.getCount(); i++) {

            BodyPart bodyPart = multipart.getBodyPart(i);

            // This is a kludge to handle spurious empty parts caused by
            // trailing blank lines; any part lacking a Content-Type header
            // is assumed to be spurious
            String[] rawContentType = bodyPart.getHeader("Content-Type");

            if (rawContentType == null) {

              continue;
            }

            // Get the details of the attachment
            findAttachmentDetails(bean, bodyPart);
          }
        } catch (javax.mail.MessagingException e) {

          // This is a kludge to handle the case where a multipart/* message
           // has zero attachments; Uniaccess tends to generate such mboxes
           // from Lotus Notes calendars.
           if (e.getMessage().equals("Missing start boundary")) {

             // We ignore this problem and just go to the next message
           } else throw e;
        }
      } else {

        // Get the details of the attachment
        //findAttachmentDetails(beans[j], part);
      }

      if (logger.isDebugEnabled()) {

        logger.debug("Created a message bean: " + bean);
      }

      // Increment our message pointer to the next message
      messagePointer++;

      // Return the list of generated mime message beans
      return bean;
    } catch (javax.mail.MessagingException me) {

      throw new MimeMessageProcessException(
          "Couldn't process the message into " +
          "parts", me);
    } catch (java.io.UnsupportedEncodingException uee) {

      throw new UnsupportedEncodingProcessingException("Could decode message",
          uee);
    } catch (IOException e) {

      throw new IOProcessingException("Couldn't read/write the message", e);
    }
  }

  /**
   * Closes all associated services to the message splitter and releases any
   * resources.
   *
   * @throws MimeMessageProcessException
   */
  public void close() throws MimeMessageProcessException {

    if (cachedFile) {

      // If we used a cached file then clean it up
      tidyCachedFile(contentURI);
    }

    // Clear global variables
    session = null;
    store = null;

    try {

      // Close the
      folder.close(true);
    } catch (MessagingException messagingException) {

      throw new MimeMessageProcessException("Failed to close default folder " +
                                            "for mbox: " + contentURI,
                                            messagingException);
    }

    // Clear the folder object
    folder = null;
  }

  /**
   * Calculate the size, mime type, and filename of the given attachment.
   *
   * @param  bean  The container of information about a particular message
   * @param  part  The part we are finding the details of
   */
  private void findAttachmentDetails(MimeMessageToPartBean bean, Part part) throws
      IOException, javax.mail.MessagingException {

    // Create an attachment to store our part data
    Attachment attachment = new Attachment();

    // Set the filename
    attachment.setFilename(part.getFileName());

    // Set the size
    attachment.setSize(part.getSize());

    // Set the mime type
    attachment.setMimeType(part.getContentType());

    if (logger.isDebugEnabled()) {

      logger.debug("Added attachment: [" + attachment.getFilename() + ", " +
                   attachment.getMimeType() + ", " +
                   attachment.getSize() + "]");
    }

    // record the details of the file
    bean.addPart(attachment);
  }

  /**
   * Make a quoll email address from a javax.mail.Address.
   *
   * @param address The original address.
   * @return The quoll email address.
   */
  private QuollEmailAddress makeQuollEmailAddress(Address address) {

    // Set address and personal name seperately if possible
    if (address instanceof InternetAddress) {
      InternetAddress internetAddress = (InternetAddress) address;
      QuollEmailAddress result = new QuollEmailAddress(internetAddress.
          getAddress());
      result.setPersonalName(internetAddress.getPersonal());
      return result;
    } else {
      return new QuollEmailAddress(address.toString());
    }
  }

  /**
   * Caches the content of the input stream locally and returns the URI pointer
   * to the cached file.
   *
   * @param contentStream The stream that contains the file to cache
   * @param originalUri The original uri of the file
   *
   * @return The URI to the cached file
   *
   * @throws IOProcessingException
   */
  private URI cacheFile(InputStream contentStream, URI originalUri) throws
      IOProcessingException {

    // Create a cache directory in the temporary directory
    File cacheDir = new File(TempDir.getTempDir(), "mbox-cache");

    // Container for the original file name
    String filename = null;

    try {

      // Obtain the original file name
      filename = originalUri.toURL().getFile();
    } catch (MalformedURLException malformedURLException) {

      throw new IOProcessingException("Failed to obtain filename from URI: " +
                                      originalUri.toString(),
                                      malformedURLException);
    }

    // Remove the leading slash
    filename = filename.substring(1, filename.length());

    // Create the directory
    cacheDir.mkdirs();

    // Create a cached version of the message as a local file
    File cacheFile = new File(cacheDir, "mbox-message-" +
                              filename + ".mbox");

    // Container for our print writer
    PrintWriter writer = null;

    try {
      // Open a writer to the cached file
      writer = new PrintWriter(new FileWriter(cacheFile));
    } catch (IOException ioException) {

      throw new IOProcessingException("Failed to open writer to " +
                                      "temporary cache file: " +
                                      cacheFile.getAbsolutePath(), ioException);
    }

    // Create a reader for the original stream
    BufferedReader reader = new BufferedReader(new InputStreamReader(
        contentStream));

    // Container for our current line
    String line = null;

    try {

      // Get the first line of the stream
      line = reader.readLine();
    } catch (IOException ioException) {

      throw new IOProcessingException("Could not read from source stream: " +
                                      originalUri.toString(), ioException);
    }

    // Read the stream line by line until we encounter its end
    while (line != null) {

      // Write out the next line of the stream
      writer.println(line);

      try {

        // Get the next line of the stream
        line = reader.readLine();
      } catch (IOException ioException) {

        throw new IOProcessingException("Could not read from source stream: " +
                                        originalUri.toString(), ioException);
      }
    }

    // Close the writer
    writer.flush();
    writer.close();

    try {

      // Close the stream
      reader.close();
    } catch (IOException ioException) {

      logger.warn("Ignoring failure to close input stream from mbox: " +
                  originalUri.toString(), ioException);

      // We can't do much about streams that refuse to close, so as long as we
      // have our data we can safely ignore the problem.
    }

    return cacheFile.toURI();
  }

  /**
   * Removes a cached file from the given URI.
   *
   * @param uri The URI of the cached file to remove
   */
  public void tidyCachedFile(URI uri) {

    // Create a file from the URI
    File cacheFile = new File(uri);

    // Assume we can delete the file
    boolean couldDelete = true;

    try {

      // Remove the file
      couldDelete = cacheFile.delete();
    } catch (SecurityException securityException) {

      // Log the problem but ignore it as we can't do anything about it
      logger.warn("Unable to gain access for deletion of " +
                  cacheFile.getAbsolutePath(), securityException);
    }

    if (!couldDelete) {

      // Log the problem but ignore it as we can't do anything about it
      logger.warn("Failed to delete [" + cacheFile.getAbsolutePath() +
                  "] for an unknown reason.");
    }

  }
}