/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.content.mbox.parser;
import java.io.*;
import java.util.*;
import java.util.zip.*;
import javax.mail.*;
// logging
import org.apache.log4j.*;
/*
* MboxFolder.java
* Copyright (C) 1999 dog <dog@dog.net.uk>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* You also have permission to link it with the Sun Microsystems, Inc.
* JavaMail(tm) extension and run that combination.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* You may retrieve the latest version of this library from
* http://www.dog.net.uk/knife/
*
* Contributor(s): Daniel Thor Kristjan <danielk@cat.nyu.edu> close and expunge
* clarification.
* Sverre Huseby <sverrehu@online.no> gzipped mailboxes
*/
/**
* This is a refactoring of the original MBoxFolderImpl. Basically, taking out
* the private methods and putting them into a class of their own. This is so
* that reading of the folder and indexing it is in its own separate class. The
* global access to files and messages are now away from the implementation of
* the Java Mail API.
* <P/>
*
* This code is derived from the 'knife' mail and news client
* at http://www.dog.net.uk/knife.
* <P/>
*
* @created 2001-8-21
*
* @author dog@dog.net.uk
* @author Andrew Newman
* @author Mark Ludlow
*
* @version $Revision: 1.8 $
*
* @modified $Date: 2005/01/05 04:57:41 $
*
* @maintenanceAuthor $Author: newmana $
*
* @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
*
* @copyright © 2001
* <A href="http://www.PIsoftware.com/">Plugged In Software Pty Ltd</A>
*
* @licence <A href="{@docRoot}/../../LICENCE_LGPL.txt">Licence description</A>
*/
public class MboxReader {
private static final String INDEX_EXTENSION = ".idx";
/** Log category */
private static final Logger log = Logger.getLogger(MboxReader.class);
private File file;
private File indexFile;
private boolean indexExists = false;
private boolean readOnly = false;
private MboxFolderImpl myFolder;
private int messageCount = -1;
/**
* Constructor.
*/
protected MboxReader(MboxFolderImpl newFolder, String filename) {
file = new File(filename);
indexFile = new File(filename + INDEX_EXTENSION);
if ((indexFile.canRead()) && (indexFile.exists())) {
indexed();
}
readOnly = !this.canWrite();
myFolder = newFolder;
}
/**
* Returns the file for the mail box.
*
* @return the file for the mail box.
*/
public File getFile() {
return file;
}
/**
* Returns if the mail box has been indexed.
*
* @return if the mail box has been indexed.
*/
public boolean isIndexed() {
return indexExists;
}
/**
* Indicates that the mail box has been indexed.
*/
public void indexed() {
indexExists = true;
}
/**
* Return the mail box files name.
*
* @return the mail box files name.
*/
public String getName() {
return file.getName();
}
/**
* Return the mail box absolute path.
*
* @return the mail box file name.
*/
public String getFullName() {
return file.getAbsolutePath();
}
/**
* Indicates whether this folder exists.
*
* @exception MessagingException if a messaging error occurred
*/
public boolean exists() throws MessagingException {
return file.exists();
}
/**
* Return whether the file can be written to or not.
*
* @return whether the file can be written to or not.
*/
public boolean canWrite() {
return file.canWrite();
}
/**
* Return whether the file can be read or not.
*
* @return whether the file can be read or not.
*/
public boolean canRead() {
return file.canRead();
}
/**
* Sets the readOnly flag of this mail box to true. No writes will be
* allowed.
*/
public void setReadOnly() {
readOnly = true;
}
/**
* Tests if the flag of this mail box is set to read-only.
*/
public boolean isReadOnly() {
return readOnly;
}
/**
* Does a simple check to see whether the file is of mbox format.
*
* @return if the file is of mbox format.
* @throws MessagingException if the file could not be opened.
*/
public boolean isMboxFormat() throws MessagingException {
boolean isMboxFormat = false;
try {
InputStream is = getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(
new CRLFInputStream(is)));
String line = reader.readLine();
if ((line != null) && ((line.toLowerCase().startsWith("from ")))) {
isMboxFormat = true;
}
reader.close();
is.close();
is = null;
} catch (IOException e) {
throw new MessagingException("Unable to open folder: " +
file.getAbsolutePath(), e);
}
return isMboxFormat;
}
/**
* Create an index of the file. This simply holds the offset of each message
* in the raw mbox file. The index contains the number of messages and then
* the offsets to the file. Eventually, it should contain the last time
* the mail box file was modified so that it can be reindexed as required.
*/
public void indexMbox() throws MessagingException {
synchronized (this) {
if (!isIndexed()) {
int noMails = 0;
long offset = 0;
ArrayList<Long> offsets = new ArrayList<Long>();
// Read in the offsets to a array list.
try {
// replace because it stopped reading when it encountered a null
// character in a file - which is NOT the EOF
//BufferedRandomAccessFile mailBox = new BufferedRandomAccessFile(file,
RandomAccessFile mailBox = new RandomAccessFile(file, "r");
String line;
// Container for the from line in order to calculate the message start
String fromLine = "";
line = mailBox.readLine();
// Process the initial line for the message
if (line != null && line.toLowerCase().startsWith("from ")) {
if (log.isDebugEnabled()) {
log.debug("Found initial from");
}
// Check if we have the proper message starter sequence
// Since we need to know the beginning of the message, we start
// to store how far along we are progressing for resetting the
// pointer's origin
fromLine = line;
// Get the next line
line = mailBox.readLine();
if (log.isDebugEnabled()) {
log.debug("Checking for header line in " + line);
}
if (line.length() > 0 && line.split(" ")[0].endsWith(":")) {
// Check that the next line is a header of some
// sort. There is a 99.99% certainty this is always the case as
// there should be at least a "From:" header
// Concatenate the new line onto our marker line
fromLine += "\n" + line;
// The offset to the from entry in the file.
// Assumes Unix file for now.
offset = mailBox.getFilePointer() - fromLine.length() - 1;
offsets.add(new Long(offset));
noMails++;
}
}
while (line != null) {
if (line.trim().equals("")) {
// There is a specific sequence to testing mail messages and the
// first check is a blank line, then progress to the next
line = mailBox.readLine();
if (log.isDebugEnabled()) {
log.debug("Checking for message in " + line);
}
if (line != null && line.toLowerCase().startsWith("from ")) {
// The second check is the next line starts with 'from'
// Since we need to know the beginning of the message, we start
// to store how far along we are progressing for resetting the
// pointer's origin
fromLine = line;
// Get the next line
line = mailBox.readLine();
if (log.isDebugEnabled()) {
log.debug("Checking for header line in " + line);
}
if (line != null && line.length() > 0 &&
line.split(" ")[0].indexOf(":") >= 0) {
// The third check is that the next line is a header of some
// sort. There is a 99.99% certainty this is always the case as
// there should be at least a "From:" header
// Concatenate the new line onto our marker line
fromLine += "\n" + line;
// The offset to the from entry in the file.
// Assumes Unix file for now.
offset = mailBox.getFilePointer() - fromLine.length() - 1;
offsets.add(new Long(offset));
noMails++;
}
}
} else {
// Get the next line
line = mailBox.readLine();
}
}
mailBox.close();
log.info("Indexed " + noMails + " messages in MBOX file " +
file.getCanonicalPath());
} catch (IOException e) {
throw new MessagingException("I/O error reading mailbox", e);
}
//Create the index file.
try {
if (!indexFile.exists()) {
indexFile.createNewFile();
}
if (indexFile.canWrite()) {
FileOutputStream fos = new FileOutputStream(indexFile);
BufferedOutputStream bos = new BufferedOutputStream(fos);
ObjectOutputStream index = new ObjectOutputStream(bos);
// Write number of message/offsets.
index.writeInt(noMails);
// Write offsets
for (Long offsetl: offsets) {
index.writeLong(offsetl);
}
index.close();
index = null;
bos.close();
bos = null;
fos.close();
fos = null;
indexed();
} else {
throw new MessagingException("Cannot write index file: " +
indexFile.getName());
}
} catch (IOException ioe) {
throw new MessagingException("Error indexing file: " +
indexFile.getName() + ", " +
ioe.getMessage());
}
}
}
}
/**
* Returns the number of messages in this folder.
*
* @return the number of messages in this folder.
* @exception MessagingException if a messaging error occurred
*/
public int getMessageCount() throws MessagingException {
// Open the index file and read the first long. This is the number of
// messages.
if (messageCount == -1) {
this.indexMbox();
try {
ObjectInputStream index = new ObjectInputStream(
new FileInputStream(indexFile));
messageCount = index.readInt();
index.close();
} catch (IOException ioe) {
throw new MessagingException("Error indexing file: " +
indexFile.getName() + ", " +
ioe.getMessage());
}
}
return messageCount;
}
/**
* Returns the specified message from this folder. Calls
* getMessages(msgnum, msgnum).
*
*
* @param msgnum the index to the message in the store.
* @exception MessagingException if a messaging error occurred
*/
public Message getMessage(int msgnum) throws MessagingException {
return (getMessages(msgnum, msgnum)[0]);
}
/**
* Returns all the messages in this folder. Calls
* getMessages(0, getMessageCount).
*
* @return all the messages in this folder.
* @exception MessagingException if a messaging error occurred
*/
public Message[] getMessages() throws MessagingException {
return getMessages(0, getMessageCount());
}
/**
* Returns all the messages within the give range.
*/
public Message[] getMessages(int msgStart, int msgEnd) throws
MessagingException {
return collectionToMessageArray(getMessagesAsArrayList(msgStart, msgEnd));
}
/**
* Returns the specified messages in a given range.
*
* @param msgStart the starting index of the messages.
* @param msgEnd the finishing index of the messages.
* @return the specified messages in a given range.
* @exception MessagingException if a messaging error occurred.
*/
public ArrayList<Message> getMessagesAsArrayList(int msgStart, int msgEnd) throws
MessagingException {
//Test to see if it above the max number of messages
long numberOfMessages = getMessageCount();
if ((numberOfMessages < msgStart) || (numberOfMessages < msgEnd)) {
throw new MessagingException(
"Message number higher than maximum number" +
"of messages.");
}
ArrayList<Message> messages = new ArrayList<Message>();
// messages.add(getMessage(0, 1));
// Read the location of the message using the index
FileInputStream fis = null;
ObjectInputStream index = null;
try {
fis = new FileInputStream(indexFile);
index = new ObjectInputStream(fis);
// Skip the first entry which is the number of messages
index.readInt();
long offset = 0;
// Skip the following entries
for (int count = 0; count < msgStart; count++) {
offset = index.readLong();
}
// Start reading the files
for (int count = msgStart; count < msgEnd + 1; count++) {
offset = index.readLong();
messages.add(getMessage(offset, count));
}
// close file handles
index.close();
fis.close();
index = null;
fis = null;
// Use a reader to get that file.
return messages;
} catch (IOException ioe) {
throw new MessagingException("I/O error reading index ", ioe);
} finally {
try {
if (index != null) {
index.close();
index = null;
}
if (fis != null) {
fis.close();
fis = null;
}
} catch (IOException ioe2) {
throw new MessagingException("I/O error reading index ", ioe2);
}
}
}
/**
* Reads the message file beginning at the current offset.
*
* @param fileOffset the number of bytes to skip in the message file.
* @param msgnum the number of the message.
* @return the full formed message.
* @throws MessagingException if the message was of the incorrect format.
*/
private Message getMessage(long fileOffset, int msgnum) throws
MessagingException {
try {
//BufferedRandomAccessFile raf = new BufferedRandomAccessFile(file, "r", 4096);
RandomAccessFile raf = new RandomAccessFile(file, "r");
raf.seek(fileOffset);
Message message = new MboxMessage(myFolder, raf, msgnum);
raf.close();
return message;
} catch (IOException e) {
throw new MessagingException("I/O error reading mailbox", e);
}
}
/**
* Checks if the current file is or is supposed to be
* compressed. Uses the filename to figure it out.
*/
private boolean isGzip() {
return file.getName().toLowerCase().endsWith(".gz");
}
/**
* Creates an output stream that possibly will compress
* whatever is sent to it, based on the current filename.
*/
public OutputStream getOutputStream() throws IOException {
OutputStream out;
out = new FileOutputStream(file);
if (isGzip())
out = new GZIPOutputStream(out);
return out;
}
/**
* Creates an input stream that possibly will decompress the
* file contents.
*/
public InputStream getInputStream() throws IOException {
InputStream in;
in = new FileInputStream(file);
if (isGzip()) {
in = new GZIPInputStream(in);
}
return in;
}
/**
* A help method that converts the collection to a message array.
*
* @param source the source collection (array list usually), which is assumed
* to contain Message objects.
* @return the list of messages contained in the collection.
*/
private Message[] collectionToMessageArray(Collection<Message> source) {
return source.toArray(new Message[source.size()]);
}
}