/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * * Contributors: * Original contributors from geronimo-javamail_1.4_spec-1.7.1 * Florent Guillaume */ package org.apache.chemistry.opencmis.commons.impl; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Locale; import java.util.Map; import org.apache.chemistry.opencmis.commons.exceptions.CmisRuntimeException; /** * MIME helper class. */ public final class MimeHelper { public static final String CONTENT_DISPOSITION = "Content-Disposition"; public static final String DISPOSITION_ATTACHMENT = "attachment"; public static final String DISPOSITION_INLINE = "inline"; public static final String DISPOSITION_FILENAME = "filename"; public static final String DISPOSITION_NAME = "name"; public static final String DISPOSITION_FORM_DATA_CONTENT = "form-data; " + DISPOSITION_NAME + "=\"content\""; // RFC 2045 private static final String MIME_SPECIALS = "()<>@,;:\\\"/[]?=" + "\t "; private static final String RFC2231_SPECIALS = "*'%" + MIME_SPECIALS; private static final String WHITE = " \t\n\r"; private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray(); private static final byte[] HEX_DECODE = new byte[0x80]; static { for (int i = 0; i < HEX_DIGITS.length; i++) { HEX_DECODE[HEX_DIGITS[i]] = (byte) i; HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; } } private MimeHelper() { } /** * Encodes a value per RFC 2231. * <p> * This is used to pass non-ASCII parameters to MIME parameter lists. * <p> * This implementation always uses UTF-8 and no language. * <p> * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for * details. * * @param value * the value to encode * @param buf * the buffer to fill * @return {@code true} if an encoding was needed, or {@code false} if no * encoding was actually needed */ protected static boolean encodeRFC2231value(String value, StringBuilder buf) { assert value != null; assert buf != null; String charset = IOUtils.UTF8; buf.append(charset); buf.append("''"); // no language byte[] bytes = IOUtils.toUTF8Bytes(value); boolean encoded = false; for (int i = 0; i < bytes.length; i++) { int ch = bytes[i] & 0xff; if (ch <= 32 || ch >= 127 || RFC2231_SPECIALS.indexOf(ch) != -1) { buf.append('%'); buf.append(HEX_DIGITS[ch >> 4]); buf.append(HEX_DIGITS[ch & 0xf]); encoded = true; } else { buf.append((char) ch); } } return encoded; } /** * Encodes a MIME parameter per RFC 2231. * <p> * This implementation always uses UTF-8 and no language. * <p> * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for * details. * * @param value * the string to encode * @return the encoded string */ protected static String encodeRFC2231(String key, String value) { StringBuilder buf = new StringBuilder(32); boolean encoded = encodeRFC2231value(value, buf); if (encoded) { return "; " + key + "*=" + buf.toString(); } else { return "; " + key + "=" + value; } } /** * Encodes the Content-Disposition header value according to RFC 2183 and * RFC 2231. * <p> * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for * details. * * @param disposition * the disposition * @param filename * the file name * @return the encoded header value */ public static String encodeContentDisposition(String disposition, String filename) { if (disposition == null) { disposition = DISPOSITION_ATTACHMENT; } return disposition + encodeRFC2231(DISPOSITION_FILENAME, filename); } /** * Decodes a filename from the Content-Disposition header value according to * RFC 2183 and RFC 2231. * <p> * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for * details. * * @param value * the header value to decode * @return the filename */ public static String decodeContentDispositionFilename(String value) { Map<String, String> params = new HashMap<String, String>(); decodeContentDisposition(value, params); return params.get(DISPOSITION_FILENAME); } /** * Decodes the Content-Disposition header value according to RFC 2183 and * RFC 2231. * <p> * Does not deal with continuation lines. * <p> * See <a href="http://tools.ietf.org/html/rfc2231">RFC 2231</a> for * details. * * @param value * the header value to decode * @param params * the map of parameters to fill * @return the disposition * */ public static String decodeContentDisposition(String value, Map<String, String> params) { try { HeaderTokenizer tokenizer = new HeaderTokenizer(value); // get the first token, which must be an ATOM Token token = tokenizer.next(); if (token.getType() != Token.ATOM) { return null; } String disposition = token.getValue(); // value ignored in this method // the remainder is the parameters String remainder = tokenizer.getRemainder(); if (remainder != null) { getParameters(remainder, params); } return disposition; } catch (ParseException e) { return null; } } /** * Gets charset from a content type header. * * @param value * the header value to decode * @return the charset or <code>null</code> if no valid boundary available */ public static String getCharsetFromContentType(String value) { try { HeaderTokenizer tokenizer = new HeaderTokenizer(value, ";", true); // get the first token, which must be an ATOM Token token = tokenizer.next(); if (token.getType() != Token.ATOM) { return null; } // the remainder is the parameters String remainder = tokenizer.getRemainder(); Map<String, String> params; if (remainder != null) { params = new HashMap<String, String>(); getParameters(remainder, params); return params.get("charset"); } } catch (ParseException e) { return null; } return null; } /** * Parses a WWW-Authenticate header value. * * @param value * the header value to parse * * @return a map with the (lower case) challenge name as key and as the * value a sub-map with parameters of the challenge */ public static Map<String, Map<String, String>> getChallengesFromAuthenticateHeader(String value) { if (value == null || value.length() == 0) { return null; } final String trimValue = value.trim(); Map<String, Map<String, String>> result = new HashMap<String, Map<String, String>>(); boolean inQuotes = false; boolean inName = true; String challenge = null; String paramName = ""; StringBuilder sb = new StringBuilder(64); for (int i = 0; i < trimValue.length(); i++) { char c = trimValue.charAt(i); if (c == '\\') { if (!inQuotes) { return null; } if (trimValue.length() > i && trimValue.charAt(i + 1) == '\\') { sb.append('\\'); i++; } else if (trimValue.length() > i && trimValue.charAt(i + 1) == '"') { sb.append('"'); i++; } else { return null; } } else if (c == '"') { if (inName) { return null; } if (inQuotes) { Map<String, String> authMap = result.get(challenge); if (authMap == null) { return null; } authMap.put(paramName, sb.toString()); } sb.setLength(0); inQuotes = !inQuotes; } else if (c == '=') { if (inName) { paramName = sb.toString().trim(); int spcIdx = paramName.indexOf(' '); if (spcIdx > -1) { challenge = paramName.substring(0, spcIdx).toLowerCase(Locale.ENGLISH); result.put(challenge, new HashMap<String, String>()); paramName = paramName.substring(spcIdx).trim(); } sb.setLength(0); inName = false; } else if (!inQuotes) { return null; } } else if (c == ',') { if (inName) { challenge = sb.toString().trim().toLowerCase(Locale.ENGLISH); result.put(challenge, new HashMap<String, String>()); sb.setLength(0); } else { if (inQuotes) { sb.append(c); } else { Map<String, String> authMap = result.get(challenge); if (authMap == null) { return null; } if (!authMap.containsKey(paramName)) { authMap.put(paramName, sb.toString().trim()); } sb.setLength(0); inName = true; } } } else { sb.append(c); } } if (inQuotes) { return null; } if (inName) { challenge = sb.toString().trim().toLowerCase(Locale.ENGLISH); result.put(challenge, new HashMap<String, String>()); } else { Map<String, String> authMap = result.get(challenge); if (authMap == null) { return null; } if (!authMap.containsKey(paramName)) { authMap.put(paramName, sb.toString().trim()); } } return result; } /** * Gets the boundary from a <code>multipart/formdata</code> content type * header. * * @param value * the header value to decode * @return the boundary as a byte array or <code>null</code> if no valid * boundary available */ public static byte[] getBoundaryFromMultiPart(String value) { try { HeaderTokenizer tokenizer = new HeaderTokenizer(value, ";", true); // get the first token, which must be an ATOM Token token = tokenizer.next(); if (token.getType() != Token.ATOM) { return null; } // check content type String multipartContentType = token.getValue(); if (multipartContentType == null || !(multipartContentType.equalsIgnoreCase("multipart/form-data") || multipartContentType .equalsIgnoreCase("multipart/related"))) { return null; } // the remainder is the parameters String remainder = tokenizer.getRemainder(); if (remainder != null) { Map<String, String> params = new HashMap<String, String>(); getParameters(remainder, params); String boundaryStr = params.get("boundary"); if (boundaryStr != null && boundaryStr.length() > 0) { try { return boundaryStr.getBytes(IOUtils.ISO_8859_1); } catch (UnsupportedEncodingException e) { // shouldn't happen... throw new CmisRuntimeException("Unsupported encoding 'ISO-8859-1'", e); } } } } catch (ParseException e) { return null; } return null; } protected static class ParseException extends Exception { private static final long serialVersionUID = 1L; public ParseException() { super(); } public ParseException(String message) { super(message); } } /* * From geronimo-javamail_1.4_spec-1.7.1. Token */ protected static class Token { // Constant values from J2SE 1.4 API Docs (Constant values) public static final int ATOM = -1; public static final int COMMENT = -3; public static final int EOF = -4; public static final int QUOTEDSTRING = -2; private final int type; private final String value; public Token(int type, String value) { this.type = type; this.value = value; } public int getType() { return type; } public String getValue() { return value; } } /* * Tweaked from geronimo-javamail_1.4_spec-1.7.1. HeaderTokenizer */ protected static class HeaderTokenizer { private static final Token EOF = new Token(Token.EOF, null); private final String header; private final String delimiters; private final boolean skipComments; private int pos; public HeaderTokenizer(String header) { this(header, MIME_SPECIALS, true); } protected HeaderTokenizer(String header, String delimiters, boolean skipComments) { this.header = header; this.delimiters = delimiters; this.skipComments = skipComments; } public String getRemainder() { return header.substring(pos); } public Token next() throws ParseException { return readToken(); } /** * Read an ATOM token from the parsed header. * * @return A token containing the value of the atom token. */ private Token readAtomicToken() { // skip to next delimiter int start = pos; while (++pos < header.length()) { // break on the first non-atom character. char ch = header.charAt(pos); if (delimiters.indexOf(header.charAt(pos)) != -1 || ch < 32 || ch >= 127) { break; } } return new Token(Token.ATOM, header.substring(start, pos)); } /** * Read the next token from the header. * * @return The next token from the header. White space is skipped, and * comment tokens are also skipped if indicated. */ private Token readToken() throws ParseException { if (pos >= header.length()) { return EOF; } else { char c = header.charAt(pos); // comment token...read and skip over this if (c == '(') { Token comment = readComment(); if (skipComments) { return readToken(); } else { return comment; } // quoted literal } else if (c == '\"') { return readQuotedString(); // white space, eat this and find a real token. } else if (WHITE.indexOf(c) != -1) { eatWhiteSpace(); return readToken(); // either a CTL or special. These characters have a // self-defining token type. } else if (c < 32 || c >= 127 || delimiters.indexOf(c) != -1) { pos++; return new Token(c, String.valueOf(c)); } else { // start of an atom, parse it off. return readAtomicToken(); } } } /** * Extract a substring from the header string and apply any * escaping/folding rules to the string. * * @param start * The starting offset in the header. * @param end * The header end offset + 1. * @return The processed string value. */ private String getEscapedValue(int start, int end) throws ParseException { StringBuilder value = new StringBuilder(32); for (int i = start; i < end; i++) { char ch = header.charAt(i); // is this an escape character? if (ch == '\\') { i++; if (i == end) { throw new ParseException("Invalid escape character"); } value.append(header.charAt(i)); } else if (ch == '\r') { // line breaks are ignored, except for naked '\n' // characters, which are consider parts of linear // whitespace. // see if this is a CRLF sequence, and skip the second if it // is. if (i < end - 1 && header.charAt(i + 1) == '\n') { i++; } } else { // just append the ch value. value.append(ch); } } return value.toString(); } /** * Read a comment from the header, applying nesting and escape rules to * the content. * * @return A comment token with the token value. */ private Token readComment() throws ParseException { int start = pos + 1; int nesting = 1; boolean requiresEscaping = false; // skip to end of comment/string while (++pos < header.length()) { char ch = header.charAt(pos); if (ch == ')') { nesting--; if (nesting == 0) { break; } } else if (ch == '(') { nesting++; } else if (ch == '\\') { pos++; requiresEscaping = true; } else if (ch == '\r') { // we need to process line breaks also requiresEscaping = true; } } if (nesting != 0) { throw new ParseException("Unbalanced comments"); } String value; if (requiresEscaping) { value = getEscapedValue(start, pos); } else { value = header.substring(start, pos++); } return new Token(Token.COMMENT, value); } /** * Parse out a quoted string from the header, applying escaping rules to * the value. * * @return The QUOTEDSTRING token with the value. * @exception ParseException */ private Token readQuotedString() throws ParseException { int start = pos + 1; boolean requiresEscaping = false; // skip to end of comment/string while (++pos < header.length()) { char ch = header.charAt(pos); if (ch == '"') { String value; if (requiresEscaping) { value = getEscapedValue(start, pos++); } else { value = header.substring(start, pos++); } return new Token(Token.QUOTEDSTRING, value); } else if (ch == '\\') { pos++; requiresEscaping = true; } else if (ch == '\r') { // we need to process line breaks also requiresEscaping = true; } } throw new ParseException("Missing '\"'"); } /** * Skip white space in the token string. */ private void eatWhiteSpace() { // skip to end of whitespace while (++pos < header.length() && WHITE.indexOf(header.charAt(pos)) != -1) { // just read } } } /* * Tweaked from geronimo-javamail_1.4_spec-1.7.1. ParameterList */ protected static Map<String, String> getParameters(String list, Map<String, String> params) throws ParseException { HeaderTokenizer tokenizer = new HeaderTokenizer(list); while (true) { Token token = tokenizer.next(); switch (token.getType()) { case Token.EOF: // the EOF token terminates parsing. return params; case ';': // each new parameter is separated by a semicolon, including // the first, which separates // the parameters from the main part of the header. // the next token needs to be a parameter name token = tokenizer.next(); // allow a trailing semicolon on the parameters. if (token.getType() == Token.EOF) { return params; } if (token.getType() != Token.ATOM) { throw new ParseException("Invalid parameter name: " + token.getValue()); } // get the parameter name as a lower case version for better // mapping. String name = token.getValue().toLowerCase(Locale.ENGLISH); token = tokenizer.next(); // parameters are name=value, so we must have the "=" here. if (token.getType() != '=') { throw new ParseException("Missing '='"); } // now the value, which may be an atom or a literal token = tokenizer.next(); if (token.getType() != Token.ATOM && token.getType() != Token.QUOTEDSTRING) { throw new ParseException("Invalid parameter value: " + token.getValue()); } String value = token.getValue(); // we might have to do some additional decoding. A name that // ends with "*" is marked as being encoded, so if requested, we // decode the value. if (name.endsWith("*")) { name = name.substring(0, name.length() - 1); value = decodeRFC2231value(value); } params.put(name, value); break; default: throw new ParseException("Missing ';'"); } } } protected static String decodeRFC2231value(String value) { int q1 = value.indexOf('\''); if (q1 == -1) { // missing charset return value; } String mimeCharset = value.substring(0, q1); int q2 = value.indexOf('\'', q1 + 1); if (q2 == -1) { // missing language return value; } byte[] bytes = fromHex(value.substring(q2 + 1)); try { return new String(bytes, getJavaCharset(mimeCharset)); } catch (UnsupportedEncodingException e) { // incorrect encoding return value; } } protected static byte[] fromHex(String data) { ByteArrayOutputStream out = new ByteArrayOutputStream(data.length()); for (int i = 0; i < data.length();) { char c = data.charAt(i++); if (c == '%') { if (i > data.length() - 2) { break; // unterminated sequence } byte b1 = HEX_DECODE[data.charAt(i++) & 0x7f]; byte b2 = HEX_DECODE[data.charAt(i++) & 0x7f]; out.write((b1 << 4) | b2); } else { out.write((byte) c); } } return out.toByteArray(); } protected static String getJavaCharset(String mimeCharset) { // good enough for standard values return mimeCharset; } }