/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mime4j.codec;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.james.mime4j.util.CharsetUtil;
/**
* Static methods for decoding strings, byte arrays and encoded words.
*/
public class DecoderUtil {
private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
"(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=", Pattern.DOTALL);
/**
* Decodes a string containing quoted-printable encoded data.
*
* @param s the string to decode.
* @return the decoded bytes.
*/
private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] bytes = s.getBytes("US-ASCII");
QuotedPrintableInputStream is = new QuotedPrintableInputStream(
new ByteArrayInputStream(bytes), monitor);
int b = 0;
while ((b = is.read()) != -1) {
baos.write(b);
}
} catch (IOException e) {
// This should never happen!
throw new IllegalStateException(e);
}
return baos.toByteArray();
}
/**
* Decodes a string containing base64 encoded data.
*
* @param s the string to decode.
* @param monitor
* @return the decoded bytes.
*/
private static byte[] decodeBase64(String s, DecodeMonitor monitor) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] bytes = s.getBytes("US-ASCII");
Base64InputStream is = new Base64InputStream(
new ByteArrayInputStream(bytes), monitor);
int b = 0;
while ((b = is.read()) != -1) {
baos.write(b);
}
} catch (IOException e) {
// This should never happen!
throw new IllegalStateException(e);
}
return baos.toByteArray();
}
/**
* Decodes an encoded text encoded with the 'B' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedText the encoded text to decode.
* @param charset the Java charset to use.
* @param monitor
* @return the decoded string.
* @throws UnsupportedEncodingException if the given Java charset isn't
* supported.
*/
static String decodeB(String encodedText, String charset, DecodeMonitor monitor)
throws UnsupportedEncodingException {
byte[] decodedBytes = decodeBase64(encodedText, monitor);
return new String(decodedBytes, charset);
}
/**
* Decodes an encoded text encoded with the 'Q' encoding (described in
* RFC 2047) found in a header field body.
*
* @param encodedText the encoded text to decode.
* @param charset the Java charset to use.
* @return the decoded string.
* @throws UnsupportedEncodingException if the given Java charset isn't
* supported.
*/
static String decodeQ(String encodedText, String charset, DecodeMonitor monitor)
throws UnsupportedEncodingException {
encodedText = replaceUnderscores(encodedText);
byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor);
return new String(decodedBytes, charset);
}
static String decodeEncodedWords(String body) {
return decodeEncodedWords(body, DecodeMonitor.SILENT);
}
/**
* Decodes a string containing encoded words as defined by RFC 2047. Encoded
* words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
* or 'q' for quoted-printable and 'B' or 'b' for base64.
*
* @param body the string to decode
* @param monitor the DecodeMonitor to be used.
* @return the decoded string.
* @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
*/
public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
int tailIndex = 0;
boolean lastMatchValid = false;
StringBuilder sb = new StringBuilder();
for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
String separator = matcher.group(1);
String mimeCharset = matcher.group(2);
String encoding = matcher.group(3);
String encodedText = matcher.group(4);
String decoded = null;
decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
if (decoded == null) {
sb.append(matcher.group(0));
} else {
if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
sb.append(separator);
}
sb.append(decoded);
}
tailIndex = matcher.end();
lastMatchValid = decoded != null;
}
if (tailIndex == 0) {
return body;
} else {
sb.append(body.substring(tailIndex));
return sb.toString();
}
}
// return null on error
private static String tryDecodeEncodedWord(final String mimeCharset,
final String encoding, final String encodedText, DecodeMonitor monitor) throws IllegalArgumentException {
String charset = CharsetUtil.toJavaCharset(mimeCharset);
if (charset == null) {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
return null;
} else if (!CharsetUtil.isDecodingSupported(charset)) {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Current JDK doesn't support decoding of charset '", charset,
"' - MIME charset '", mimeCharset, "' in encoded word");
return null;
}
if (encodedText.length() == 0) {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Missing encoded text in encoded word");
return null;
}
try {
if (encoding.equalsIgnoreCase("Q")) {
return DecoderUtil.decodeQ(encodedText, charset, monitor);
} else if (encoding.equalsIgnoreCase("B")) {
return DecoderUtil.decodeB(encodedText, charset, monitor);
} else {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Warning: Unknown encoding in encoded word");
return null;
}
} catch (UnsupportedEncodingException e) {
// should not happen because of isDecodingSupported check above
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Unsupported encoding (", e.getMessage(), ") in encoded word");
return null;
} catch (RuntimeException e) {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Could not decode (", e.getMessage(), ") encoded word");
return null;
}
}
private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding,
String encodedText, String dropDesc, String... strings) throws IllegalArgumentException {
if (monitor.isListening()) {
String encodedWord = recombine(mimeCharset, encoding, encodedText);
StringBuilder text = new StringBuilder();
for (String str : strings) {
text.append(str);
}
text.append(" (");
text.append(encodedWord);
text.append(")");
String exceptionDesc = text.toString();
if (monitor.warn(exceptionDesc, dropDesc))
throw new IllegalArgumentException(text.toString());
}
}
private static String recombine(final String mimeCharset,
final String encoding, final String encodedText) {
return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
}
// Replace _ with =20
private static String replaceUnderscores(String str) {
// probably faster than String#replace(CharSequence, CharSequence)
StringBuilder sb = new StringBuilder(128);
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
if (c == '_') {
sb.append("=20");
} else {
sb.append(c);
}
}
return sb.toString();
}
}