/**************************************************************** * Licensed to the Apache Software Foundation (ASF) under one * * or more contributor license agreements. See the NOTICE file * * distributed with this work for additional information * * regarding copyright ownership. The ASF licenses this file * * to you under the Apache License, Version 2.0 (the * * "License"); you may not use this file except in compliance * * with the License. You may obtain a copy of the License at * * * * http://www.apache.org/licenses/LICENSE-2.0 * * * * Unless required by applicable law or agreed to in writing, * * software distributed under the License is distributed on an * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * * KIND, either express or implied. See the License for the * * specific language governing permissions and limitations * * under the License. * ****************************************************************/ package org.apache.james.mime4j; import java.io.IOException; import java.io.InputStream; import java.util.BitSet; import java.util.LinkedList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.james.mime4j.decoder.Base64InputStream; import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; /** * <p> * Parses MIME (or RFC822) message streams of bytes or characters and reports * parsing events to a <code>ContentHandler</code> instance. * </p> * <p> * Typical usage:<br/> * <pre> * ContentHandler handler = new MyHandler(); * MimeStreamParser parser = new MimeStreamParser(); * parser.setContentHandler(handler); * parser.parse(new BufferedInputStream(new FileInputStream("mime.msg"))); * </pre> * <strong>NOTE:</strong> All lines must end with CRLF * (<code>\r\n</code>). If you are unsure of the line endings in your stream * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance. * * * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $ */ public class MimeStreamParser { private static final Log log = LogFactory.getLog(MimeStreamParser.class); private static BitSet fieldChars = null; private RootInputStream rootStream = null; private LinkedList bodyDescriptors = new LinkedList(); private ContentHandler handler = null; private boolean raw = false; static { fieldChars = new BitSet(); for (int i = 0x21; i <= 0x39; i++) { fieldChars.set(i); } for (int i = 0x3b; i <= 0x7e; i++) { fieldChars.set(i); } } /** * Creates a new <code>MimeStreamParser</code> instance. */ public MimeStreamParser() { } /** * Parses a stream of bytes containing a MIME message. * * @param is the stream to parse. * @throws IOException on I/O errors. */ public void parse(InputStream is) throws IOException { rootStream = new RootInputStream(is); parseMessage(rootStream); } /** * Determines if this parser is currently in raw mode. * * @return <code>true</code> if in raw mode, <code>false</code> * otherwise. * @see #setRaw(boolean) */ public boolean isRaw() { return raw; } /** * Enables or disables raw mode. In raw mode all future entities * (messages or body parts) in the stream will be reported to the * {@link ContentHandler#raw(InputStream)} handler method only. * The stream will contain the entire unparsed entity contents * including header fields and whatever is in the body. * * @param raw <code>true</code> enables raw mode, <code>false</code> * disables it. */ public void setRaw(boolean raw) { this.raw = raw; } /** * Finishes the parsing and stops reading lines. * NOTE: No more lines will be parsed but the parser * will still call * {@link ContentHandler#endMultipart()}, * {@link ContentHandler#endBodyPart()}, * {@link ContentHandler#endMessage()}, etc to match previous calls * to * {@link ContentHandler#startMultipart(BodyDescriptor)}, * {@link ContentHandler#startBodyPart()}, * {@link ContentHandler#startMessage()}, etc. */ public void stop() { rootStream.truncate(); } /** * Parses an entity which consists of a header followed by a body containing * arbitrary data, body parts or an embedded message. * * @param is the stream to parse. * @throws IOException on I/O errors. */ private void parseEntity(InputStream is) throws IOException { BodyDescriptor bd = parseHeader(is); if (bd.isMultipart()) { bodyDescriptors.addFirst(bd); handler.startMultipart(bd); MimeBoundaryInputStream tempIs = new MimeBoundaryInputStream(is, bd.getBoundary()); handler.preamble(new CloseShieldInputStream(tempIs)); tempIs.consume(); while (tempIs.hasMoreParts()) { tempIs = new MimeBoundaryInputStream(is, bd.getBoundary()); parseBodyPart(tempIs); tempIs.consume(); if (tempIs.parentEOF()) { if (log.isWarnEnabled()) { log.warn("Line " + rootStream.getLineNumber() + ": Body part ended prematurely. " + "Higher level boundary detected or " + "EOF reached."); } break; } } handler.epilogue(new CloseShieldInputStream(is)); handler.endMultipart(); bodyDescriptors.removeFirst(); } else if (bd.isMessage()) { if (bd.isBase64Encoded()) { log.warn("base64 encoded message/rfc822 detected"); is = new EOLConvertingInputStream( new Base64InputStream(is)); } else if (bd.isQuotedPrintableEncoded()) { log.warn("quoted-printable encoded message/rfc822 detected"); is = new EOLConvertingInputStream( new QuotedPrintableInputStream(is)); } bodyDescriptors.addFirst(bd); parseMessage(is); bodyDescriptors.removeFirst(); } else { handler.body(bd, new CloseShieldInputStream(is)); } /* * Make sure the stream has been consumed. */ while (is.read() != -1) { } } private void parseMessage(InputStream is) throws IOException { if (raw) { handler.raw(new CloseShieldInputStream(is)); } else { handler.startMessage(); parseEntity(is); handler.endMessage(); } } private void parseBodyPart(InputStream is) throws IOException { if (raw) { handler.raw(new CloseShieldInputStream(is)); } else { handler.startBodyPart(); parseEntity(is); handler.endBodyPart(); } } /** * Parses a header. * * @param is the stream to parse. * @return a <code>BodyDescriptor</code> describing the body following * the header. */ private BodyDescriptor parseHeader(InputStream is) throws IOException { BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty() ? null : (BodyDescriptor) bodyDescriptors.getFirst()); handler.startHeader(); int lineNumber = rootStream.getLineNumber(); StringBuffer sb = new StringBuffer(); int curr = 0; int prev = 0; while ((curr = is.read()) != -1) { if (curr == '\n' && (prev == '\n' || prev == 0)) { /* * [\r]\n[\r]\n or an immediate \r\n have been seen. */ sb.deleteCharAt(sb.length() - 1); break; } sb.append((char) curr); prev = curr == '\r' ? prev : curr; } if (curr == -1 && log.isWarnEnabled()) { log.warn("Line " + rootStream.getLineNumber() + ": Unexpected end of headers detected. " + "Boundary detected in header or EOF reached."); } int start = 0; int pos = 0; int startLineNumber = lineNumber; while (pos < sb.length()) { while (pos < sb.length() && sb.charAt(pos) != '\r') { pos++; } if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') { pos++; continue; } if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) { /* * field should be the complete field data excluding the * trailing \r\n. */ String field = sb.substring(start, pos); start = pos + 2; /* * Check for a valid field. */ int index = field.indexOf(':'); boolean valid = false; if (index != -1 && fieldChars.get(field.charAt(0))) { valid = true; String fieldName = field.substring(0, index).trim(); for (int i = 0; i < fieldName.length(); i++) { if (!fieldChars.get(fieldName.charAt(i))) { valid = false; break; } } if (valid) { handler.field(field); bd.addField(fieldName, field.substring(index + 1)); } } if (!valid && log.isWarnEnabled()) { log.warn("Line " + startLineNumber + ": Ignoring invalid field: '" + field.trim() + "'"); } startLineNumber = lineNumber; } pos += 2; lineNumber++; } handler.endHeader(); return bd; } /** * Sets the <code>ContentHandler</code> to use when reporting * parsing events. * * @param h the <code>ContentHandler</code>. */ public void setContentHandler(ContentHandler h) { this.handler = h; } }