MimeTokenStream.java example

Explorer

apache-james-mime4j-groundhog-master
- benchmarks
  - src
    - org
      - apache
        james
        mime4j
        Base64InputStreamBench.java
        Base64OutputStreamBench.java
        LongMultipartReadBench.java
- examples
  - src
    - java
      - org
        apache
        james
        mime4j
        samples
        dom
        MultipartMessage.java
        TextPlainMessage.java
        transform
        TransformMessage.java
        tree
        MessageTree.java
- src
  - main
  - test
    - java
      - org
        apache
        james
        mime4j
        EncodeUtils.java
        ExampleMail.java
        TestUtil.java
        codec
        Base64InputStreamTest.java
        Base64OutputStreamTest.java
        CodecUtilTest.java
        DecoderUtilTest.java
        EncoderUtilTest.java
        QuotedPrintableEncodeTest.java
        QuotedPrintableInputStreamTest.java
        QuotedPrintableOutputStreamTest.java
        QuotedPrintableTextEncodeTest.java
        descriptor
        BaseTestForBodyDescriptors.java
        DefaultBodyDescriptorTest.java
        MaximalBodyDescriptorTest.java
        field
        ContentDispositionFieldTest.java
        ContentTransferEncodingFieldTest.java
        ContentTypeFieldTest.java
        FieldTest.java
        FieldsTest.java
        UnstructuredFieldTest.java
        address
        AddressTest.java
        contentdisposition
        ContentDispositionTest.java
        contenttype
        ContentTypeTest.java
        datetime
        DateTimeTest.java
        mimeversion
        MimeVersionParserTest.java
        structured
        StructuredFieldParserTest.java
        io
        BufferedLineReaderInputStreamBufferTest.java
        BufferedLineReaderInputStreamTest.java
        EOLConvertingInputStreamTest.java
        LimitedInputStreamTest.java
        LineNumberInputStreamTest.java
        LineReaderInputStreamAdaptorTest.java
        MimeBoundaryInputStreamTest.java
        PositionInputStreamTest.java
        message
        CopyConstructorTest.java
        EntityTest.java
        ExampleMessagesRoundtripTest.java
        HeaderTest.java
        MessageCompleteMailTest.java
        MessageParserTest.java
        MessageTest.java
        MessageWriteToTest.java
        MultipartFormTest.java
        SingleBodyCopyTest.java
        parser
        MimeEntityTest.java
        MimeStreamParserExampleMessagesTest.java
        MimeStreamParserTest.java
        MimeStreamTokenMessageRfc822Test.java
        MimeTokenEmbeddedMessageTest.java
        MimeTokenNoRecurseTest.java
        MimeTokenStreamBodyDescriptorTest.java
        MimeTokenStreamReaderTest.java
        MimeTokenStreamTest.java
        MultipartStreamTest.java
        MultipartTokensTest.java
        StrictMimeTokenStreamTest.java
        TestHandler.java
        storage
        DefaultStorageProviderTest.java
        MultiReferenceStorageTest.java
        StorageProviderTest.java
        util
        CharsetUtilTest.java
        MimeUtilTest.java
        StringArrayMapTest.java
        TestByteArrayBuffer.java

/****************************************************************
 * Licensed to the Apache Software Foundation (ASF) under one   *
 * or more contributor license agreements.  See the NOTICE file *
 * distributed with this work for additional information        *
 * regarding copyright ownership.  The ASF licenses this file   *
 * to you under the Apache License, Version 2.0 (the            *
 * "License"); you may not use this file except in compliance   *
 * with the License.  You may obtain a copy of the License at   *
 *                                                              *
 *   http://www.apache.org/licenses/LICENSE-2.0                 *
 *                                                              *
 * Unless required by applicable law or agreed to in writing,   *
 * software distributed under the License is distributed on an  *
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
 * KIND, either express or implied.  See the License for the    *
 * specific language governing permissions and limitations      *
 * under the License.                                           *
 ****************************************************************/

package org.apache.james.mime4j.parser;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.LinkedList;

import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.codec.Base64InputStream;
import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
import org.apache.james.mime4j.descriptor.BodyDescriptor;
import org.apache.james.mime4j.io.BufferedLineReaderInputStream;
import org.apache.james.mime4j.io.LineNumberInputStream;
import org.apache.james.mime4j.io.LineNumberSource;
import org.apache.james.mime4j.util.CharsetUtil;
import org.apache.james.mime4j.util.MimeUtil;

/**
 * <p>
 * Parses MIME (or RFC822) message streams of bytes or characters.
 * The stream is converted into an event stream.
 * <p>
 * <p>
 * Typical usage:
 * </p>
 * <pre>
 *      MimeTokenStream stream = new MimeTokenStream();
 *      stream.parse(new FileInputStream("mime.msg"));
 *      for (int state = stream.getState();
 *           state != MimeTokenStream.T_END_OF_STREAM;
 *           state = stream.next()) {
 *          switch (state) {
 *            case MimeTokenStream.T_BODY:
 *              System.out.println("Body detected, contents = "
 *                + stream.getInputStream() + ", header data = "
 *                + stream.getBodyDescriptor());
 *              break;
 *            case MimeTokenStream.T_FIELD:
 *              System.out.println("Header field detected: "
 *                + stream.getField());
 *              break;
 *            case MimeTokenStream.T_START_MULTIPART:
 *              System.out.println("Multipart message detexted,"
 *                + " header data = "
 *                + stream.getBodyDescriptor());
 *            ...
 *          }
 *      }
 * </pre>
 * <p>Instances of {@link MimeTokenStream} are reusable: Invoking the
 * method {@link #parse(InputStream)} resets the token streams internal
 * state. However, they are definitely <em>not</em> thread safe. If you
 * have a multi threaded application, then the suggested use is to have
 * one instance per thread.</p>
 */
public class MimeTokenStream implements EntityStates, RecursionMode {
    
    /**
     * Creates a stream that creates a more detailed body descriptor.
     * @return <code>MimeTokenStream</code>, not null
     */
    public static final MimeTokenStream createMaximalDescriptorStream() {
        MimeEntityConfig config = new MimeEntityConfig();
        config.setMaximalBodyDescriptor(true);
        return new MimeTokenStream(config);
    }
    
    /**
     * Creates a stream that strictly validates the input.
     * @return <code>MimeTokenStream</code> which throws a 
     * <code>MimeException</code> whenever possible issues 
     * are dedicated in the input
     */
    public static final MimeTokenStream createStrictValidationStream() {
        MimeEntityConfig config = new MimeEntityConfig();
        config.setStrictParsing(true);
        return new MimeTokenStream(config);
    }
    
    private final MimeEntityConfig config;
    private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>();
    
    private int state = T_END_OF_STREAM;
    private EntityStateMachine currentStateMachine;
    private int recursionMode = M_RECURSE;
    private BufferedLineReaderInputStream inbuffer;
    
    /**
     * Constructs a standard (lax) stream.
     * Optional validation events will be logged only.
     * Use {@link #createStrictValidationStream()} to create
     * a stream that strictly validates the input.
     */
    public MimeTokenStream() {
        this(new MimeEntityConfig());
    }
    
    protected MimeTokenStream(final MimeEntityConfig config) {
        super();
        this.config = config;
    }
    
    /** Instructs the {@code MimeTokenStream} to parse the given streams contents.
     * If the {@code MimeTokenStream} has already been in use, resets the streams
     * internal state.
     */
    public void parse(InputStream stream) {
        doParse(stream, null);
    }

    /** Instructs the {@code MimeTokenStream} to parse the given content with 
     * the content type. The message stream is assumed to have no message header
     * and is expected to begin with a message body. This can be the case when 
     * the message content is transmitted using a different transport protocol 
     * such as HTTP.
     * <p/>
     * If the {@code MimeTokenStream} has already been in use, resets the streams
     * internal state.
     */    
    public void parseHeadless(InputStream stream, String contentType) {
        if (contentType == null) {
            throw new IllegalArgumentException("Content type may not be null");
        }
        doParse(stream, contentType);
    }

    private void doParse(InputStream stream, String contentType) {
        entities.clear();

        LineNumberSource lineSource = null;
        if (config.isCountLineNumbers()) {
            LineNumberInputStream lineInput = new LineNumberInputStream(stream);
            lineSource = lineInput;
            stream = lineInput;
        }

        inbuffer = new BufferedLineReaderInputStream(
                stream, 
                4 * 1024,
                config.getMaxLineLen());
        switch (recursionMode) {
        case M_RAW:
            RawEntity rawentity = new RawEntity(inbuffer);
            currentStateMachine = rawentity;
            break;
        case M_NO_RECURSE:
        case M_FLAT:
            // expected to be called only at start of paring
        case M_RECURSE:
            MimeEntity mimeentity = new MimeEntity(
                    lineSource,
                    inbuffer,
                    null, 
                    T_START_MESSAGE, 
                    T_END_MESSAGE,
                    config);
            mimeentity.setRecursionMode(recursionMode);
            if (contentType != null) {
                mimeentity.skipHeader(contentType);
            }
            currentStateMachine = mimeentity;
            break;
        }
        entities.add(currentStateMachine);
        state = currentStateMachine.getState();
    }

    /**
     * Determines if this parser is currently in raw mode.
     * 
     * @return <code>true</code> if in raw mode, <code>false</code>
     *         otherwise.
     * @see #setRecursionMode(int)
     */
    public boolean isRaw() {
        return recursionMode == M_RAW;
    }
    
    /**
     * Gets the current recursion mode.
     * The recursion mode specifies the approach taken to parsing parts.
     * {@link #M_RAW}  mode does not parse the part at all.
     * {@link #M_RECURSE} mode recursively parses each mail
     * when an <code>message/rfc822</code> part is encounted;
     * {@link #M_NO_RECURSE} does not.
     * @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
     */
    public int getRecursionMode() {
        return recursionMode;
    }
    
    /**
     * Sets the current recursion.
     * The recursion mode specifies the approach taken to parsing parts.
     * {@link #M_RAW}  mode does not parse the part at all.
     * {@link #M_RECURSE} mode recursively parses each mail
     * when an <code>message/rfc822</code> part is encounted;
     * {@link #M_NO_RECURSE} does not.
     * @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
     */
    public void setRecursionMode(int mode) {
        recursionMode = mode;
        if (currentStateMachine != null) {
            currentStateMachine.setRecursionMode(mode);
        }
    }

    /**
     * Finishes the parsing and stops reading lines.
     * NOTE: No more lines will be parsed but the parser
     * will still call 
     * {@link ContentHandler#endMultipart()},
     * {@link ContentHandler#endBodyPart()},
     * {@link ContentHandler#endMessage()}, etc to match previous calls
     * to 
     * {@link ContentHandler#startMultipart(BodyDescriptor)},
     * {@link ContentHandler#startBodyPart()},
     * {@link ContentHandler#startMessage()}, etc.
     */
    public void stop() {
        inbuffer.truncate();
    }

    /**
     * Returns the current state.
     */
    public int getState() {
        return state;
    }

    /**
     * This method returns the raw entity, preamble, or epilogue contents.
     * <p/>
     * This method is valid, if {@link #getState()} returns either of
     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
     * 
     * @return Data stream, depending on the current state.
     * @throws IllegalStateException {@link #getState()} returns an
     *   invalid value.
     */
    public InputStream getInputStream() {
        return currentStateMachine.getContentStream();
    }
    
    /**
     * This method returns a transfer decoded stream based on the MIME 
     * fields with the standard defaults.
     * <p/>
     * This method is valid, if {@link #getState()} returns either of
     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
     * 
     * @return Data stream, depending on the current state.
     * @throws IllegalStateException {@link #getState()} returns an
     *   invalid value.
     */
    public InputStream getDecodedInputStream() {
        BodyDescriptor bodyDescriptor = getBodyDescriptor();
        String transferEncoding = bodyDescriptor.getTransferEncoding();
        InputStream dataStream = currentStateMachine.getContentStream();
        if (MimeUtil.isBase64Encoding(transferEncoding)) {
            dataStream = new Base64InputStream(dataStream);
        } else if (MimeUtil.isQuotedPrintableEncoded(transferEncoding)) {
            dataStream = new QuotedPrintableInputStream(dataStream);
        }
        return dataStream;
    }

    /**
     * Gets a reader configured for the current body or body part.
     * The reader will return a transfer and charset decoded 
     * stream of characters based on the MIME fields with the standard
     * defaults.
     * This is a conveniance method and relies on {@link #getInputStream()}.
     * Consult the javadoc for that method for known limitations.
     * 
     * @return <code>Reader</code>, not null
     * @see #getInputStream 
     * @throws IllegalStateException {@link #getState()} returns an
     *   invalid value 
     * @throws UnsupportedCharsetException if there is no JVM support 
     * for decoding the charset
     * @throws IllegalCharsetNameException if the charset name specified
     * in the mime type is illegal
     */
    public Reader getReader() {
        final BodyDescriptor bodyDescriptor = getBodyDescriptor();
        final String mimeCharset = bodyDescriptor.getCharset();
        final Charset charset;
        if (mimeCharset == null || "".equals(mimeCharset)) {
            charset = CharsetUtil.US_ASCII;
        } else {
            charset = Charset.forName(mimeCharset);
        }
        final InputStream instream = getDecodedInputStream();
        return new InputStreamReader(instream, charset);
    }
    
    /**
     * <p>Gets a descriptor for the current entity.
     * This method is valid if {@link #getState()} returns:</p>
     * <ul>
     * <li>{@link #T_BODY}</li>
     * <li>{@link #T_START_MULTIPART}</li>
     * <li>{@link #T_EPILOGUE}</li>
     * <li>{@link #T_PREAMBLE}</li>
     * </ul>
     * @return <code>BodyDescriptor</code>, not nulls
     */
    public BodyDescriptor getBodyDescriptor() {
        return currentStateMachine.getBodyDescriptor();
    }

    /**
     * This method is valid, if {@link #getState()} returns {@link #T_FIELD}.
     * @return String with the fields raw contents.
     * @throws IllegalStateException {@link #getState()} returns another
     *   value than {@link #T_FIELD}.
     */
    public Field getField() {
        return currentStateMachine.getField();
    }
    
    /**
     * This method advances the token stream to the next token.
     * @throws IllegalStateException The method has been called, although
     *   {@link #getState()} was already {@link #T_END_OF_STREAM}.
     */
    public int next() throws IOException, MimeException {
        if (state == T_END_OF_STREAM  ||  currentStateMachine == null) {
            throw new IllegalStateException("No more tokens are available.");
        }
        while (currentStateMachine != null) {
            EntityStateMachine next = currentStateMachine.advance();
            if (next != null) {
                entities.add(next);
                currentStateMachine = next;
            }
            state = currentStateMachine.getState();
            if (state != T_END_OF_STREAM) {
                return state;
            }
            entities.removeLast();
            if (entities.isEmpty()) {
                currentStateMachine = null;
            } else {
                currentStateMachine = entities.getLast();
                currentStateMachine.setRecursionMode(recursionMode);
            }
        }
        state = T_END_OF_STREAM;
        return state;
    }

    /**
     * Renders a state as a string suitable for logging.
     * @param state 
     * @return rendered as string, not null
     */
    public static final String stateToString(int state) {
        return AbstractEntity.stateToString(state);
    }
}