package org.apache.james.mime4j.codec;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.james.mime4j.codec.DecoderUtil;
import org.apache.james.mime4j.util.CharsetUtil;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexDecoderUtil {
final static Log log = LogFactory.getLog(RegexDecoderUtil.class);
final static Pattern regex = Pattern.compile("(.*?)=\\?(.*?)\\?(.*?)\\?(.*?)\\?=", Pattern.DOTALL);
public static String decodeEncodedWords(String body) {
StringBuffer sb = new StringBuffer();
boolean previousWasEncoded = false;
final Matcher matcher = regex.matcher(body);
while (matcher.find()) {
String separator = matcher.group(1);
String mimeCharset = matcher.group(2);
String encoding = matcher.group(3);
String encodedText = matcher.group(4);
final String decoded = decodeEncodedWord(mimeCharset, encoding, encodedText);
if (decoded == null) {
matcher.appendReplacement(sb, matcher.group(0));
previousWasEncoded = false;
} else {
if ((!previousWasEncoded) || (!CharsetUtil.isWhitespace(separator)))
sb.append(separator);
matcher.appendReplacement(sb, decoded);
previousWasEncoded = true;
}
}
matcher.appendTail(sb);
return sb.toString();
}
public static String decodeEncodedWord(String body, int begin, int end) {
int qm1 = body.indexOf('?', begin + 2);
if (qm1 == end - 2)
return null;
int qm2 = body.indexOf('?', qm1 + 1);
if (qm2 == end - 2)
return null;
String mimeCharset = body.substring(begin + 2, qm1);
String encoding = body.substring(qm1 + 1, qm2);
String encodedText = body.substring(qm2 + 1, end - 2);
return decodeEncodedWord(mimeCharset, encoding, encodedText);
}
private static String decodeEncodedWord(String mimeCharset, String encoding, String encodedText) {
String charset = CharsetUtil.toJavaCharset(mimeCharset);
if (charset == null) {
if (log.isWarnEnabled()) {
log.warn("MIME charset '" + mimeCharset + "' doesn't have a "
+ "corresponding Java charset");
}
return null;
} else if (!CharsetUtil.isDecodingSupported(charset)) {
if (log.isWarnEnabled()) {
log.warn("Current JDK doesn't support decoding of charset '"
+ charset + "' (MIME charset '" + mimeCharset
+ "')");
}
return null;
}
if (encodedText.length() == 0) {
if (log.isWarnEnabled()) {
log.warn("Missing encoded text in encoded word: ");
}
return null;
}
try {
if (encoding.equalsIgnoreCase("Q")) {
return DecoderUtil.decodeQ(encodedText, charset);
} else if (encoding.equalsIgnoreCase("B")) {
return DecoderUtil.decodeB(encodedText, charset);
} else {
if (log.isWarnEnabled()) {
log.warn("Warning: Unknown encoding in encoded word ");
}
return null;
}
} catch (UnsupportedEncodingException e) {
// should not happen because of isDecodingSupported check above
if (log.isWarnEnabled()) {
log.warn("Unsupported encoding in encoded word", e);
}
return null;
} catch (RuntimeException e) {
if (log.isWarnEnabled()) {
log.warn("Could not decode encoded word", e);
}
return null;
}
}
}