MimeMultipartParser.java example

Explorer
ManagedRuntimeInitiative-master
- MRI-J
/*
 * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 * CA 95054 USA or visit www.sun.com if you need additional information or
 * have any questions.
 */

package com.sun.xml.internal.ws.encoding;


import com.sun.istack.internal.NotNull;
import com.sun.istack.internal.Nullable;
import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ContentType;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.InternetHeaders;
import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ParseException;
import com.sun.xml.internal.ws.message.stream.StreamAttachment;
import com.sun.xml.internal.ws.util.ASCIIUtility;
import com.sun.xml.internal.ws.util.ByteArrayBuffer;

import javax.xml.ws.WebServiceException;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;

/**
 * Parses Mime multipart message into primary part and attachment parts. It
 * parses the stream lazily as and when required.
 *
 * TODO need a list to keep all the attachments so that even if Content-Id is
 * not there it is accounted
 *
 * @author Vivek Pandey
 * @author Jitendra Kotamraju
 */
public final class MimeMultipartParser {

    private final InputStream in;
    private final String start;
    private final byte[] boundaryBytes;

    private final BitSet lastPartFound = new BitSet(1);
    // current stream position, set to -1 on EOF
    private int b = 0;
    private final int[] bcs = new int[256];
    private int[] gss;
    private static final int BUFFER_SIZE = 4096;
    private byte[] buffer = new byte[BUFFER_SIZE];
    private byte[] prevBuffer = new byte[BUFFER_SIZE];
    private boolean firstPart = true;

    private final Map<String, StreamAttachment> attachments = new HashMap<String, StreamAttachment>();
    private StreamAttachment root;
    
    private int cidCounter = 0;

    public MimeMultipartParser(InputStream in, String contentType) {
        try {
            ContentType ct = new ContentType(contentType);
            String boundary = ct.getParameter("boundary");
            if (boundary == null || boundary.equals("")) {
                throw new WebServiceException("MIME boundary parameter not found" + contentType);
            }
            String bnd = "--" + boundary;
            boundaryBytes = ASCIIUtility.getBytes(bnd);
            start = ct.getParameter("start");
        } catch (ParseException e) {
            throw new WebServiceException(e);
        }

        //InputStream MUST support mark()
        if (!in.markSupported()) {
            this.in = new BufferedInputStream(in);
        } else {
            this.in = in;
        }
    }

    /**
     * Parses the stream and returns the root part. If start parameter is
     * present in Content-Type, it is used to determine the root part, otherwise
     * root part is the first part.
     *
     * @return StreamAttachment for root part
     *         null if root part cannot be found
     *
     */
    public @Nullable StreamAttachment getRootPart() {
        if (root != null) {
            return root;
        }
        while(!lastBodyPartFound() && (b != -1) && root == null) {
            getNextPart();
        }
        return root;
    }

    /**
     * Parses the entire stream and returns all MIME parts except root MIME part.
     *
     * @return Map<String, StreamAttachment> for all attachment parts
     */
    public @NotNull Map<String, StreamAttachment> getAttachmentParts() {
        while(!lastBodyPartFound() && (b != -1)) {
            getNextPart();
        }
        return attachments;
    }

    /**
     * This method can be called to get a matching MIME attachment part for the
     * given contentId. It parses the stream until it finds a matching part.
     *
     * @return StreamAttachment attachment for contentId
     *         null if there is no attachment for contentId
     */
    public @Nullable StreamAttachment getAttachmentPart(String contentId) throws IOException {
        //first see if this attachment is already parsed, if so return it
        StreamAttachment streamAttach = attachments.get(contentId);
        if (streamAttach != null) {
            return streamAttach;
        }
        //else parse the MIME parts till we get what we want
        while (!lastBodyPartFound() && (b != -1)) {
            streamAttach = getNextPart();
            String newContentId = streamAttach.getContentId();
            if (newContentId != null && newContentId.equals(contentId)){
                return streamAttach;
            }
        }
        return null;            // Attachment is not found
    }

    /**
     * Parses the stream and returns next available MIME part. This shouldn't
     * be called if there are no MIME parts in the stream. Attachment
     * part(not root part) is cached in the {@link Map}<{@link String},{@link StreamAttachment}>
     * before returning the MIME part. It also finds the root part of the MIME
     * package and assigns root variable.
     *
     * @return StreamAttachment next available MIME part
     *
     */
    private StreamAttachment getNextPart() {
        assert !lastBodyPartFound();

        try {
            if (firstPart) {
                compileBoundaryPattern();
                // skip the first boundary of the MIME package
                if (!skipPreamble()) {
                    throw new WebServiceException("Missing Start Boundary, or boundary does not start on a new line");
                }
            }
            InternetHeaders ih = new InternetHeaders(in);
            String[] contentTypes = ih.getHeader("content-type");
            String contentType = (contentTypes != null) ? contentTypes[0] : "application/octet-stream";
            String [] contentIds = ih.getHeader("content-id");
            String mimeContentId = (contentIds != null) ? contentIds[0] : null;
            String contentId = mimeContentId;
            if(mimeContentId!=null && mimeContentId.length()>2) {
                if(contentId.charAt(0)=='<')   contentId=mimeContentId.substring(1,mimeContentId.length()-1);
            }

            ByteArrayBuffer bos = new ByteArrayBuffer();
            b = readBody(bos);
            StreamAttachment as = new StreamAttachment(bos, contentId, contentType);
            if (start == null && firstPart) {
                root = as;      // Taking first part as root part
            } else if (mimeContentId != null && start != null && start.equals(mimeContentId)) {
                root = as;      // root part as identified by start parameter
            } else if (contentId != null) {
                attachments.put(contentId, as);     // Attachment part
            } else {
                ++cidCounter;
                attachments.put(""+cidCounter, as);
            }
            firstPart = false;
            return as;
        } catch(IOException ioe) {
            throw new WebServiceException(ioe);
        } catch(MessagingException me) {
            throw new WebServiceException(me);
        }
    }

    private int readBody(ByteArrayBuffer baos) throws IOException {
        if (!findMimeBody(baos)) {
            //TODO: i18n
            throw new WebServiceException("Missing boundary delimitier ");
        }
        return b;
    }

    private boolean findMimeBody(ByteArrayBuffer out) throws IOException {
        int i;
        int l = boundaryBytes.length;
        int lx = l - 1;
        int bufferLength;
        int s = 0;
        byte[] tmp;

        boolean first = true;
        BitSet eof = new BitSet(1);

        while (true) {
            in.mark(l);
            if (!first) {
                tmp = prevBuffer;
                prevBuffer = buffer;
                buffer = tmp;
            }
            bufferLength = readNext(in, l, eof);

            if (bufferLength == -1) {
                b = -1;
                if ((s == l)) {
                    out.write(prevBuffer, 0, s);
                }
                return true;
            }

            if (bufferLength < l) {
                out.write(buffer, 0, bufferLength);
                b = -1;
                return true;
            }

            for (i = lx; i >= 0; i--) {
                if (buffer[i] != boundaryBytes[i]) {
                    break;
                }
            }

            if (i < 0) {
                if (s > 0) {
                    // so if s == 1 : it must be an LF
                    // if s == 2 : it must be a CR LF
                    if (s <= 2) {
                        String crlf = new String(prevBuffer, 0, s);
                        if (!"\n".equals(crlf) && !"\r\n".equals(crlf)) {
                            throw new WebServiceException(
                                    "Boundary characters encountered in part Body " +
                                            "without a preceeding CRLF");
                        }
                    } else if (s > 2) {
                        if ((prevBuffer[s - 2] == '\r') && (prevBuffer[s - 1] == '\n')) {
                            out.write(prevBuffer, 0, s - 2);
                        } else if (prevBuffer[s - 1] == '\n') {
                            out.write(prevBuffer, 0, s - 1);
                        } else {
                            throw new WebServiceException(
                                    "Boundary characters encountered in part Body " +
                                            "without a preceeding CRLF");
                        }
                    }
                }
                // found the boundary, skip *LWSP-char and CRLF
                if (!skipLWSPAndCRLF(in)) {
                    //throw new Exception(
                    //   "Boundary does not terminate with CRLF");
                }
                return true;
            }

            if ((s > 0)) {
                if (prevBuffer[s - 1] == (byte) 13) {
                    // if buffer[0] == (byte)10
                    if (buffer[0] == (byte) 10) {
                        int j;
                        for (j = lx - 1; j > 0; j--) {
                            if (buffer[j + 1] != boundaryBytes[j]) {
                                break;
                            }
                        }
                        if (j == 0) {
                            // matched the boundaryBytes excluding the last char of the boundaryBytes
                            // so dont write the CR into stream
                            out.write(prevBuffer, 0, s - 1);
                        } else {
                            out.write(prevBuffer, 0, s);
                        }
                    } else {
                        out.write(prevBuffer, 0, s);
                    }
                } else {
                    out.write(prevBuffer, 0, s);
                }
            }

            s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]);
            in.reset();
            in.skip(s);
            if (first) {
                first = false;
            }
        }
    }


    private boolean lastBodyPartFound() {
        return lastPartFound.get(0);
    }

    private void compileBoundaryPattern() {
        int l = boundaryBytes.length;

        int i;
        int j;

        // Copied from J2SE 1.4 regex code
        // java.util.regex.Pattern.java

        // Initialise Bad Character Shift table
        for (i = 0; i < l; i++) {
            bcs[boundaryBytes[i]] = i + 1;
        }

        // Initialise Good Suffix Shift table
        gss = new int[l];

        NEXT:
        for (i = l; i > 0; i--) {
            // j is the beginning index of suffix being considered
            for (j = l - 1; j >= i; j--) {
                // Testing for good suffix
                if (boundaryBytes[j] == boundaryBytes[j - i]) {
                    // boundaryBytes[j..len] is a good suffix
                    gss[j - 1] = i;
                } else {
                    // No match. The array has already been
                    // filled up with correct values before.
                    continue NEXT;
                }
            }
            while (j > 0) {
                gss[--j] = i;
            }
        }
        gss[l - 1] = 1;
    }

    private boolean skipPreamble() throws IOException {
        if (!findBoundary()) {
            return false;
        }
        if (lastPartFound.get(0)) {
            throw new WebServiceException("Found closing boundary delimiter while trying to skip preamble");
        }
        return true;
    }

    private boolean findBoundary() throws IOException {
        int i;
        int l = boundaryBytes.length;
        int lx = l - 1;
        BitSet eof = new BitSet(1);

        while (true) {
            in.mark(l);
            readNext(in, l, eof);
            if (eof.get(0)) {
                // End of stream
                return false;
            }

            for (i = lx; i >= 0; i--) {
                if (buffer[i] != boundaryBytes[i]) {
                    break;
                }
            }

            if (i < 0) {
                // found the boundary, skip *LWSP-char and CRLF
                if (!skipLWSPAndCRLF(in)) {
                    throw new WebServiceException("Boundary does not terminate with CRLF");
                }
                return true;
            }

            int s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]);
            in.reset();
            in.skip(s);
        }
    }

    private boolean skipLWSPAndCRLF(InputStream is) throws IOException {

        b = is.read();
        //looks like old impl allowed just a \n as well
        if (b == '\n') {
            return true;
        }

        if (b == '\r') {
            b = is.read();
            if (b == '\n') {
                return true;
            } else {
                throw new WebServiceException(
                        "transport padding after a Mime Boundary  should end in a CRLF, found CR only");
            }
        }

        if (b == '-') {
            b = is.read();
            if (b != '-') {
                throw new WebServiceException(
                        "Unexpected singular '-' character after Mime Boundary");
            } else {
                lastPartFound.flip(0);
                // read the next char
                b = is.read();
            }
        }

        while ((b != -1) && ((b == ' ') || (b == '\t'))) {
            b = is.read();
            if (b == '\r') {
                b = is.read();
                if (b == '\n') {
                    return true;
                }
            }
        }

        if (b == -1) {
            // the last boundary need not have CRLF
            if (!lastPartFound.get(0)) {
                throw new WebServiceException(
                        "End of Multipart Stream before encountering  closing boundary delimiter");
            }
            return true;
        }
        return false;
    }


    private int readNext(InputStream is, int patternLength, BitSet eof) throws IOException {
        int bufferLength = is.read(buffer, 0, patternLength);
        if (bufferLength == -1) {
            eof.flip(0);
        } else if (bufferLength < patternLength) {
            //repeatedly read patternLength - bufferLength
            int temp ;
            int i = bufferLength;
            for (; i < patternLength; i++) {
                temp = is.read();
                if (temp == -1) {
                    eof.flip(0);
                    break;
                }
                buffer[i] = (byte) temp;
            }
            bufferLength = i;
        }
        return bufferLength;
    }
}