/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mime4j.codec;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.james.mime4j.util.CharsetUtil;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Static methods for decoding strings, byte arrays and encoded words.
*/
public class DecoderUtil {
private static Log log = LogFactory.getLog(DecoderUtil.class);
/**
* Decodes a string containing quoted-printable encoded data.
*
* @param s the string to decode.
* @return the decoded bytes.
*/
public static byte[] decodeQuotedPrintable(String s) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] bytes = s.getBytes("US-ASCII");
QuotedPrintableInputStream is = new QuotedPrintableInputStream(
new ByteArrayInputStream(bytes));
int b = 0;
while ((b = is.read()) != -1) {
baos.write(b);
}
} catch (IOException e) {
// This should never happen!
log.error(e);
throw new IllegalStateException(e);
}
return baos.toByteArray();
}
/**
* Decodes a string containing base64 encoded data.
*
* @param s the string to decode.
* @return the decoded bytes.
*/
public static byte[] decodeBase64(String s) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] bytes = s.getBytes("US-ASCII");
Base64InputStream is = new Base64InputStream(
new ByteArrayInputStream(bytes));
int b = 0;
while ((b = is.read()) != -1) {
baos.write(b);
}
} catch (IOException e) {
// This should never happen!
log.error(e);
throw new IllegalStateException(e);
}
return baos.toByteArray();
}
/**
* Decodes an encoded text encoded with the 'B' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedText the encoded text to decode.
* @param charset the Java charset to use.
* @return the decoded string.
* @throws UnsupportedEncodingException if the given Java charset isn't
* supported.
*/
public static String decodeB(String encodedText, String charset)
throws UnsupportedEncodingException {
byte[] decodedBytes = decodeBase64(encodedText);
return new String(decodedBytes, charset);
}
/**
* Decodes an encoded text encoded with the 'Q' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedText the encoded text to decode.
* @param charset the Java charset to use.
* @return the decoded string.
* @throws UnsupportedEncodingException if the given Java charset isn't
* supported.
*/
public static String decodeQ(String encodedText, String charset)
throws UnsupportedEncodingException {
encodedText = replaceUnderscores(encodedText);
byte[] decodedBytes = decodeQuotedPrintable(encodedText);
return new String(decodedBytes, charset);
}
final static Pattern parseEncodedWords = Pattern.compile("(.*?)=\\?([^\\?]*?)\\?([^\\?]*?)\\?([^\\?]*?)\\?=", Pattern.DOTALL);
/**
* Decodes a string containing encoded words as defined by RFC 2047.
* Encoded words in have the form
* =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
* quoted-printable and 'B' or 'b' for Base64.
*
* @param body the string to decode.
* @return the decoded string.
*/
public static String decodeEncodedWords(String body) {
StringBuffer sb = new StringBuffer();
boolean previousWasEncoded = false;
final Matcher matcher = parseEncodedWords.matcher(body);
while (matcher.find()) {
String separator = matcher.group(1);
String mimeCharset = matcher.group(2);
String encoding = matcher.group(3);
String encodedText = matcher.group(4);
final String decoded = decodeEncodedWord(mimeCharset, encoding, encodedText);
if (decoded == null) {
matcher.appendReplacement(sb, matcher.group(0));
previousWasEncoded = false;
} else {
if ((!previousWasEncoded) || (!CharsetUtil.isWhitespace(separator)))
sb.append(separator);
matcher.appendReplacement(sb, decoded);
previousWasEncoded = true;
}
}
matcher.appendTail(sb);
return sb.toString();
}
/**
* Decodes one encoded word in a string as defined by RFC 2047.
* Encoded words in have the form
* =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
* quoted-printable and 'B' or 'b' for Base64.
*
* @param body the string to decode.
* @param begin the index of the first char of the encoded word to decode
* @param end the index of the last char of the encoded word to decode
* @return the decoded string or null on error.
*/
public static String decodeEncodedWord(String body, int begin, int end) {
int qm1 = body.indexOf('?', begin + 2);
if (qm1 == end - 2)
return null;
int qm2 = body.indexOf('?', qm1 + 1);
if (qm2 == end - 2)
return null;
String mimeCharset = body.substring(begin + 2, qm1);
String encoding = body.substring(qm1 + 1, qm2);
String encodedText = body.substring(qm2 + 1, end - 2);
return decodeEncodedWord(mimeCharset, encoding, encodedText);
}
/**
* Decodes an encoded string with a given charset and encoding
* enc is either 'Q' or 'q' for
* quoted-printable and 'B' or 'b' for Base64.
*
* @param mimeCharset the charset, e.g. "UTF-8" or "ISO-8859-1"
* @param encoding 'Q','q' for quoted-printable or 'B','b' for Base64
* @param encodedText the encoded text
* @return the decoded string or null on error.
*/
private static String decodeEncodedWord(String mimeCharset, String encoding, String encodedText) {
String charset = CharsetUtil.toJavaCharset(mimeCharset);
if (charset == null) {
if (log.isWarnEnabled()) {
log.warn("MIME charset '" + mimeCharset + "' in encoded word '" +
recombineEncodedWord(mimeCharset, encoding, encodedText) + "' doesn't have a "
+ "corresponding Java charset");
}
return null;
} else if (!CharsetUtil.isDecodingSupported(charset)) {
if (log.isWarnEnabled()) {
log.warn("Current JDK doesn't support decoding of charset '"
+ charset + "' (MIME charset '" + mimeCharset
+ "' in encoded word '" + recombineEncodedWord(mimeCharset, encoding, encodedText)
+ "')");
}
return null;
}
if (encodedText.length() == 0) {
if (log.isWarnEnabled()) {
log.warn("Missing encoded text in encoded word: '" + recombineEncodedWord(mimeCharset, encoding, encodedText) + "'");
}
return null;
}
try {
if (encoding.equalsIgnoreCase("Q")) {
return DecoderUtil.decodeQ(encodedText, charset);
} else if (encoding.equalsIgnoreCase("B")) {
return DecoderUtil.decodeB(encodedText, charset);
} else {
if (log.isWarnEnabled()) {
log.warn("Warning: Unknown encoding in encoded word '" + recombineEncodedWord(mimeCharset, encoding, encodedText) + "'");
}
return null;
}
} catch (UnsupportedEncodingException e) {
// should not happen because of isDecodingSupported check above
if (log.isWarnEnabled()) {
log.warn("Unsupported encoding in encoded word '" + recombineEncodedWord(mimeCharset, encoding, encodedText) + "'", e);
}
return null;
} catch (RuntimeException e) {
if (log.isWarnEnabled()) {
log.warn("Could not decode encoded word '" + recombineEncodedWord(mimeCharset, encoding, encodedText) + "'", e);
}
return null;
}
}
/**
* Helper method to recombine mimeCharset, encoding and encodedText to
* an encoded word in the form =?mimeCharset?encoding?encodedText?=
*
* @param mimeCharset the charset, e.g. "UTF-8" or "ISO-8859-1"
* @param encoding 'Q','q' for quoted-printable or 'B','b' for Base64
* @param encodedText the encoded text
* @return the decoded string or null on error.
*/
private static String recombineEncodedWord(String mimeCharset, String encoding, String encodedText) {
return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
}
// Replace _ with =20
private static String replaceUnderscores(String str) {
// probably faster than String#replace(CharSequence, CharSequence)
StringBuilder sb = new StringBuilder(128);
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '_') {
sb.append("=20");
} else {
sb.append(c);
}
}
return sb.toString();
}
}