/* * Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Sun designates this * particular file as subject to the "Classpath" exception as provided * by Sun in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, * CA 95054 USA or visit www.sun.com if you need additional information or * have any questions. */ package com.sun.xml.internal.ws.encoding; import com.sun.istack.internal.NotNull; import com.sun.istack.internal.Nullable; import com.sun.xml.internal.messaging.saaj.packaging.mime.MessagingException; import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ContentType; import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.InternetHeaders; import com.sun.xml.internal.messaging.saaj.packaging.mime.internet.ParseException; import com.sun.xml.internal.ws.message.stream.StreamAttachment; import com.sun.xml.internal.ws.util.ASCIIUtility; import com.sun.xml.internal.ws.util.ByteArrayBuffer; import javax.xml.ws.WebServiceException; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.BitSet; import java.util.HashMap; import java.util.Map; /** * Parses Mime multipart message into primary part and attachment parts. It * parses the stream lazily as and when required. * * TODO need a list to keep all the attachments so that even if Content-Id is * not there it is accounted * * @author Vivek Pandey * @author Jitendra Kotamraju */ public final class MimeMultipartParser { private final InputStream in; private final String start; private final byte[] boundaryBytes; private final BitSet lastPartFound = new BitSet(1); // current stream position, set to -1 on EOF private int b = 0; private final int[] bcs = new int[256]; private int[] gss; private static final int BUFFER_SIZE = 4096; private byte[] buffer = new byte[BUFFER_SIZE]; private byte[] prevBuffer = new byte[BUFFER_SIZE]; private boolean firstPart = true; private final Map<String, StreamAttachment> attachments = new HashMap<String, StreamAttachment>(); private StreamAttachment root; private int cidCounter = 0; public MimeMultipartParser(InputStream in, String contentType) { try { ContentType ct = new ContentType(contentType); String boundary = ct.getParameter("boundary"); if (boundary == null || boundary.equals("")) { throw new WebServiceException("MIME boundary parameter not found" + contentType); } String bnd = "--" + boundary; boundaryBytes = ASCIIUtility.getBytes(bnd); start = ct.getParameter("start"); } catch (ParseException e) { throw new WebServiceException(e); } //InputStream MUST support mark() if (!in.markSupported()) { this.in = new BufferedInputStream(in); } else { this.in = in; } } /** * Parses the stream and returns the root part. If start parameter is * present in Content-Type, it is used to determine the root part, otherwise * root part is the first part. * * @return StreamAttachment for root part * null if root part cannot be found * */ public @Nullable StreamAttachment getRootPart() { if (root != null) { return root; } while(!lastBodyPartFound() && (b != -1) && root == null) { getNextPart(); } return root; } /** * Parses the entire stream and returns all MIME parts except root MIME part. * * @return Map<String, StreamAttachment> for all attachment parts */ public @NotNull Map<String, StreamAttachment> getAttachmentParts() { while(!lastBodyPartFound() && (b != -1)) { getNextPart(); } return attachments; } /** * This method can be called to get a matching MIME attachment part for the * given contentId. It parses the stream until it finds a matching part. * * @return StreamAttachment attachment for contentId * null if there is no attachment for contentId */ public @Nullable StreamAttachment getAttachmentPart(String contentId) throws IOException { //first see if this attachment is already parsed, if so return it StreamAttachment streamAttach = attachments.get(contentId); if (streamAttach != null) { return streamAttach; } //else parse the MIME parts till we get what we want while (!lastBodyPartFound() && (b != -1)) { streamAttach = getNextPart(); String newContentId = streamAttach.getContentId(); if (newContentId != null && newContentId.equals(contentId)){ return streamAttach; } } return null; // Attachment is not found } /** * Parses the stream and returns next available MIME part. This shouldn't * be called if there are no MIME parts in the stream. Attachment * part(not root part) is cached in the {@link Map}<{@link String},{@link StreamAttachment}> * before returning the MIME part. It also finds the root part of the MIME * package and assigns root variable. * * @return StreamAttachment next available MIME part * */ private StreamAttachment getNextPart() { assert !lastBodyPartFound(); try { if (firstPart) { compileBoundaryPattern(); // skip the first boundary of the MIME package if (!skipPreamble()) { throw new WebServiceException("Missing Start Boundary, or boundary does not start on a new line"); } } InternetHeaders ih = new InternetHeaders(in); String[] contentTypes = ih.getHeader("content-type"); String contentType = (contentTypes != null) ? contentTypes[0] : "application/octet-stream"; String [] contentIds = ih.getHeader("content-id"); String mimeContentId = (contentIds != null) ? contentIds[0] : null; String contentId = mimeContentId; if(mimeContentId!=null && mimeContentId.length()>2) { if(contentId.charAt(0)=='<') contentId=mimeContentId.substring(1,mimeContentId.length()-1); } ByteArrayBuffer bos = new ByteArrayBuffer(); b = readBody(bos); StreamAttachment as = new StreamAttachment(bos, contentId, contentType); if (start == null && firstPart) { root = as; // Taking first part as root part } else if (mimeContentId != null && start != null && start.equals(mimeContentId)) { root = as; // root part as identified by start parameter } else if (contentId != null) { attachments.put(contentId, as); // Attachment part } else { ++cidCounter; attachments.put(""+cidCounter, as); } firstPart = false; return as; } catch(IOException ioe) { throw new WebServiceException(ioe); } catch(MessagingException me) { throw new WebServiceException(me); } } private int readBody(ByteArrayBuffer baos) throws IOException { if (!findMimeBody(baos)) { //TODO: i18n throw new WebServiceException("Missing boundary delimitier "); } return b; } private boolean findMimeBody(ByteArrayBuffer out) throws IOException { int i; int l = boundaryBytes.length; int lx = l - 1; int bufferLength; int s = 0; byte[] tmp; boolean first = true; BitSet eof = new BitSet(1); while (true) { in.mark(l); if (!first) { tmp = prevBuffer; prevBuffer = buffer; buffer = tmp; } bufferLength = readNext(in, l, eof); if (bufferLength == -1) { b = -1; if ((s == l)) { out.write(prevBuffer, 0, s); } return true; } if (bufferLength < l) { out.write(buffer, 0, bufferLength); b = -1; return true; } for (i = lx; i >= 0; i--) { if (buffer[i] != boundaryBytes[i]) { break; } } if (i < 0) { if (s > 0) { // so if s == 1 : it must be an LF // if s == 2 : it must be a CR LF if (s <= 2) { String crlf = new String(prevBuffer, 0, s); if (!"\n".equals(crlf) && !"\r\n".equals(crlf)) { throw new WebServiceException( "Boundary characters encountered in part Body " + "without a preceeding CRLF"); } } else if (s > 2) { if ((prevBuffer[s - 2] == '\r') && (prevBuffer[s - 1] == '\n')) { out.write(prevBuffer, 0, s - 2); } else if (prevBuffer[s - 1] == '\n') { out.write(prevBuffer, 0, s - 1); } else { throw new WebServiceException( "Boundary characters encountered in part Body " + "without a preceeding CRLF"); } } } // found the boundary, skip *LWSP-char and CRLF if (!skipLWSPAndCRLF(in)) { //throw new Exception( // "Boundary does not terminate with CRLF"); } return true; } if ((s > 0)) { if (prevBuffer[s - 1] == (byte) 13) { // if buffer[0] == (byte)10 if (buffer[0] == (byte) 10) { int j; for (j = lx - 1; j > 0; j--) { if (buffer[j + 1] != boundaryBytes[j]) { break; } } if (j == 0) { // matched the boundaryBytes excluding the last char of the boundaryBytes // so dont write the CR into stream out.write(prevBuffer, 0, s - 1); } else { out.write(prevBuffer, 0, s); } } else { out.write(prevBuffer, 0, s); } } else { out.write(prevBuffer, 0, s); } } s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]); in.reset(); in.skip(s); if (first) { first = false; } } } private boolean lastBodyPartFound() { return lastPartFound.get(0); } private void compileBoundaryPattern() { int l = boundaryBytes.length; int i; int j; // Copied from J2SE 1.4 regex code // java.util.regex.Pattern.java // Initialise Bad Character Shift table for (i = 0; i < l; i++) { bcs[boundaryBytes[i]] = i + 1; } // Initialise Good Suffix Shift table gss = new int[l]; NEXT: for (i = l; i > 0; i--) { // j is the beginning index of suffix being considered for (j = l - 1; j >= i; j--) { // Testing for good suffix if (boundaryBytes[j] == boundaryBytes[j - i]) { // boundaryBytes[j..len] is a good suffix gss[j - 1] = i; } else { // No match. The array has already been // filled up with correct values before. continue NEXT; } } while (j > 0) { gss[--j] = i; } } gss[l - 1] = 1; } private boolean skipPreamble() throws IOException { if (!findBoundary()) { return false; } if (lastPartFound.get(0)) { throw new WebServiceException("Found closing boundary delimiter while trying to skip preamble"); } return true; } private boolean findBoundary() throws IOException { int i; int l = boundaryBytes.length; int lx = l - 1; BitSet eof = new BitSet(1); while (true) { in.mark(l); readNext(in, l, eof); if (eof.get(0)) { // End of stream return false; } for (i = lx; i >= 0; i--) { if (buffer[i] != boundaryBytes[i]) { break; } } if (i < 0) { // found the boundary, skip *LWSP-char and CRLF if (!skipLWSPAndCRLF(in)) { throw new WebServiceException("Boundary does not terminate with CRLF"); } return true; } int s = Math.max(i + 1 - bcs[buffer[i] & 0x7f], gss[i]); in.reset(); in.skip(s); } } private boolean skipLWSPAndCRLF(InputStream is) throws IOException { b = is.read(); //looks like old impl allowed just a \n as well if (b == '\n') { return true; } if (b == '\r') { b = is.read(); if (b == '\n') { return true; } else { throw new WebServiceException( "transport padding after a Mime Boundary should end in a CRLF, found CR only"); } } if (b == '-') { b = is.read(); if (b != '-') { throw new WebServiceException( "Unexpected singular '-' character after Mime Boundary"); } else { lastPartFound.flip(0); // read the next char b = is.read(); } } while ((b != -1) && ((b == ' ') || (b == '\t'))) { b = is.read(); if (b == '\r') { b = is.read(); if (b == '\n') { return true; } } } if (b == -1) { // the last boundary need not have CRLF if (!lastPartFound.get(0)) { throw new WebServiceException( "End of Multipart Stream before encountering closing boundary delimiter"); } return true; } return false; } private int readNext(InputStream is, int patternLength, BitSet eof) throws IOException { int bufferLength = is.read(buffer, 0, patternLength); if (bufferLength == -1) { eof.flip(0); } else if (bufferLength < patternLength) { //repeatedly read patternLength - bufferLength int temp ; int i = bufferLength; for (; i < patternLength; i++) { temp = is.read(); if (temp == -1) { eof.flip(0); break; } buffer[i] = (byte) temp; } bufferLength = i; } return bufferLength; } }