/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.geronimo.mail.util; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.io.PushbackInputStream; import java.io.UnsupportedEncodingException; public class QuotedPrintableEncoder implements Encoder { static protected final byte[] encodingTable = { (byte)'0', (byte)'1', (byte)'2', (byte)'3', (byte)'4', (byte)'5', (byte)'6', (byte)'7', (byte)'8', (byte)'9', (byte)'A', (byte)'B', (byte)'C', (byte)'D', (byte)'E', (byte)'F' }; /* * set up the decoding table. */ static protected final byte[] decodingTable = new byte[128]; static { // initialize the decoding table for (int i = 0; i < encodingTable.length; i++) { decodingTable[encodingTable[i]] = (byte)i; } } // default number of characters we will write per line. static private final int DEFAULT_CHARS_PER_LINE = 76; // the output stream we're wrapped around protected OutputStream out; // the number of bytes written; protected int bytesWritten = 0; // number of bytes written on the current line protected int lineCount = 0; // line length we're dealing with protected int lineLength; // number of deferred whitespace characters in decode mode. protected int deferredWhitespace = 0; protected int cachedCharacter = -1; // indicates whether the last character was a '\r', potentially part of a CRLF sequence. protected boolean lastCR = false; // remember whether last character was a white space. protected boolean lastWhitespace = false; public QuotedPrintableEncoder() { this(null, DEFAULT_CHARS_PER_LINE); } public QuotedPrintableEncoder(final OutputStream out) { this(out, DEFAULT_CHARS_PER_LINE); } public QuotedPrintableEncoder(final OutputStream out, final int lineLength) { this.out = out; this.lineLength = lineLength; } private void checkDeferred(final int ch) throws IOException { // was the last character we looked at a whitespace? Try to decide what to do with it now. if (lastWhitespace) { // if this whitespace is at the end of the line, write it out encoded if (ch == '\r' || ch == '\n') { writeEncodedCharacter(' '); } else { // we can write this out without encoding. writeCharacter(' '); } // we always turn this off. lastWhitespace = false; } // deferred carriage return? else if (lastCR) { // if the char following the CR was not a new line, write an EOL now. if (ch != '\n') { writeEOL(); } // we always turn this off too lastCR = false; } } /** * encode the input data producing a UUEncoded output stream. * * @param data The array of byte data. * @param off The starting offset within the data. * @param length Length of the data to encode. * * @return the number of bytes produced. */ public int encode(final byte[] data, int off, final int length) throws IOException { final int endOffset = off + length; while (off < endOffset) { // get the character final byte ch = data[off++]; // handle the encoding of this character. encode(ch); } return bytesWritten; } public void encode(int ch) throws IOException { // make sure this is just a single byte value. ch = ch &0xFF; // see if we had to defer handling of a whitespace or '\r' character, and handle it if necessary. checkDeferred(ch); // different characters require special handling. switch (ch) { // spaces require special handling. If the next character is a line terminator, then // the space needs to be encoded. case ' ': { // at this point, we don't know whether this needs encoding or not. If the next // character is a linend, it gets encoded. If anything else, we just write it as is. lastWhitespace = true; // turn off any CR flags. lastCR = false; break; } // carriage return, which may be part of a CRLF sequence. case '\r': { // just flag this until we see the next character. lastCR = true; break; } // a new line character...we need to check to see if it was paired up with a '\r' char. case '\n': { // we always write this out for a newline. We defer CRs until we see if the LF follows. writeEOL(); break; } // an '=' is the escape character for an encoded character, so it must also // be written encoded. case '=': { writeEncodedCharacter(ch); break; } // all other characters. If outside the printable character range, write it encoded. default: { if (ch < 32 || ch >= 127) { writeEncodedCharacter(ch); } else { writeCharacter(ch); } break; } } } /** * encode the input data producing a UUEncoded output stream. * * @param data The array of byte data. * @param off The starting offset within the data. * @param length Length of the data to encode. * * @return the number of bytes produced. */ public int encode(final byte[] data, int off, final int length, final String specials) throws IOException { final int endOffset = off + length; while (off < endOffset) { // get the character final byte ch = data[off++]; // handle the encoding of this character. encode(ch, specials); } return bytesWritten; } /** * encode the input data producing a UUEncoded output stream. * * @param data The array of byte data. * @param off The starting offset within the data. * @param length Length of the data to encode. * * @return the number of bytes produced. */ public int encode(final PushbackInputStream in, final StringBuffer out, final String specials, final int limit) throws IOException { int count = 0; while (count < limit) { int ch = in.read(); if (ch == -1) { return count; } // make sure this is just a single byte value. ch = ch &0xFF; // spaces require special handling. If the next character is a line terminator, then // the space needs to be encoded. if (ch == ' ') { // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. out.append('_'); count++; } // non-ascii chars and the designated specials all get encoded. else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { // we need at least 3 characters to write this out, so we need to // forget we saw this one and try in the next segment. if (count + 3 > limit) { in.unread(ch); return count; } out.append('='); out.append((char)encodingTable[ch >> 4]); out.append((char)encodingTable[ch & 0x0F]); count += 3; } else { // good character, just use unchanged. out.append((char)ch); count++; } } return count; } /** * Specialized version of the decoder that handles encoding of * RFC 2047 encoded word values. This has special handling for * certain characters, but less special handling for blanks and * linebreaks. * * @param ch * @param specials * * @exception IOException */ public void encode(int ch, final String specials) throws IOException { // make sure this is just a single byte value. ch = ch &0xFF; // spaces require special handling. If the next character is a line terminator, then // the space needs to be encoded. if (ch == ' ') { // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. writeCharacter('_'); } // non-ascii chars and the designated specials all get encoded. else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { writeEncodedCharacter(ch); } else { // good character, just use unchanged. writeCharacter(ch); } } /** * encode the input data producing a UUEncoded output stream. * * @param data The array of byte data. * @param off The starting offset within the data. * @param length Length of the data to encode. * @param out The output stream the encoded data is written to. * * @return the number of bytes produced. */ public int encode(final byte[] data, final int off, final int length, final OutputStream out) throws IOException { // make sure we're writing to the correct stream this.out = out; bytesWritten = 0; // do the actual encoding return encode(data, off, length); } /** * decode the uuencoded byte data writing it to the given output stream * * @param data The array of byte data to decode. * @param off Starting offset within the array. * @param length The length of data to encode. * @param out The output stream used to return the decoded data. * * @return the number of bytes produced. * @exception IOException */ public int decode(final byte[] data, int off, final int length, final OutputStream out) throws IOException { // make sure we're writing to the correct stream this.out = out; final int endOffset = off + length; int bytesWritten = 0; while (off < endOffset) { final byte ch = data[off++]; // space characters are a pain. We need to scan ahead until we find a non-space character. // if the character is a line terminator, we need to discard the blanks. if (ch == ' ') { int trailingSpaces = 1; // scan forward, counting the characters. while (off < endOffset && data[off] == ' ') { // step forward and count this. off++; trailingSpaces++; } // is this a lineend at the current location? if (off >= endOffset || data[off] == '\r' || data[off] == '\n') { // go to the next one continue; } else { // make sure we account for the spaces in the output count. bytesWritten += trailingSpaces; // write out the blank characters we counted and continue with the non-blank. while (trailingSpaces-- > 0) { out.write(' '); } } } else if (ch == '=') { // we found an encoded character. Reduce the 3 char sequence to one. // but first, make sure we have two characters to work with. if (off + 1 >= endOffset) { throw new IOException("Invalid quoted printable encoding"); } // convert the two bytes back from hex. byte b1 = data[off++]; byte b2 = data[off++]; // we've found an encoded carriage return. The next char needs to be a newline if (b1 == '\r') { if (b2 != '\n') { throw new IOException("Invalid quoted printable encoding"); } // this was a soft linebreak inserted by the encoding. We just toss this away // on decode. } else { // this is a hex pair we need to convert back to a single byte. b1 = decodingTable[b1]; b2 = decodingTable[b2]; out.write((b1 << 4) | b2); // 3 bytes in, one byte out bytesWritten++; } } else { // simple character, just write it out. out.write(ch); bytesWritten++; } } return bytesWritten; } /** * Decode a byte array of data. * * @param data The data array. * @param out The output stream target for the decoded data. * * @return The number of bytes written to the stream. * @exception IOException */ public int decodeWord(final byte[] data, final OutputStream out) throws IOException { return decodeWord(data, 0, data.length, out); } /** * decode the uuencoded byte data writing it to the given output stream * * @param data The array of byte data to decode. * @param off Starting offset within the array. * @param length The length of data to encode. * @param out The output stream used to return the decoded data. * * @return the number of bytes produced. * @exception IOException */ public int decodeWord(final byte[] data, int off, final int length, final OutputStream out) throws IOException { // make sure we're writing to the correct stream this.out = out; final int endOffset = off + length; int bytesWritten = 0; while (off < endOffset) { final byte ch = data[off++]; // space characters were translated to '_' on encode, so we need to translate them back. if (ch == '_') { out.write(' '); } else if (ch == '=') { // we found an encoded character. Reduce the 3 char sequence to one. // but first, make sure we have two characters to work with. if (off + 1 >= endOffset) { throw new IOException("Invalid quoted printable encoding"); } // convert the two bytes back from hex. final byte b1 = data[off++]; final byte b2 = data[off++]; // we've found an encoded carriage return. The next char needs to be a newline if (b1 == '\r') { if (b2 != '\n') { throw new IOException("Invalid quoted printable encoding"); } // this was a soft linebreak inserted by the encoding. We just toss this away // on decode. } else { // this is a hex pair we need to convert back to a single byte. final byte c1 = decodingTable[b1]; final byte c2 = decodingTable[b2]; out.write((c1 << 4) | c2); // 3 bytes in, one byte out bytesWritten++; } } else { // simple character, just write it out. out.write(ch); bytesWritten++; } } return bytesWritten; } /** * decode the UUEncoded String data writing it to the given output stream. * * @param data The String data to decode. * @param out The output stream to write the decoded data to. * * @return the number of bytes produced. * @exception IOException */ public int decode(final String data, final OutputStream out) throws IOException { try { // just get the byte data and decode. final byte[] bytes = data.getBytes("US-ASCII"); return decode(bytes, 0, bytes.length, out); } catch (final UnsupportedEncodingException e) { throw new IOException("Invalid UUEncoding"); } } private void checkLineLength(final int required) throws IOException { // if we're at our line length limit, write out a soft line break and reset. if ((lineCount + required) >= lineLength ) { out.write('='); out.write('\r'); out.write('\n'); bytesWritten += 3; lineCount = 0; } } public void writeEncodedCharacter(final int ch) throws IOException { // we need 3 characters for an encoded value checkLineLength(3); out.write('='); out.write(encodingTable[ch >> 4]); out.write(encodingTable[ch & 0x0F]); lineCount += 3; bytesWritten += 3; } public void writeCharacter(final int ch) throws IOException { // we need 3 characters for an encoded value checkLineLength(1); out.write(ch); lineCount++; bytesWritten++; } public void writeEOL() throws IOException { out.write('\r'); out.write('\n'); lineCount = 0; bytesWritten += 3; } public int decode(final InputStream in) throws IOException { // we potentially need to scan over spans of whitespace characters to determine if they're real // we just return blanks until the count goes to zero. if (deferredWhitespace > 0) { deferredWhitespace--; return ' '; } // we may have needed to scan ahead to find the first non-blank character, which we would store here. // hand that back once we're done with the blanks. if (cachedCharacter != -1) { final int result = cachedCharacter; cachedCharacter = -1; return result; } int ch = in.read(); // reflect back an EOF condition. if (ch == -1) { return -1; } // space characters are a pain. We need to scan ahead until we find a non-space character. // if the character is a line terminator, we need to discard the blanks. if (ch == ' ') { // scan forward, counting the characters. while ((ch = in.read()) == ' ') { deferredWhitespace++; } // is this a lineend at the current location? if (ch == -1 || ch == '\r' || ch == '\n') { // those blanks we so zealously counted up don't really exist. Clear out the counter. deferredWhitespace = 0; // return the real significant character now. return ch; } // remember this character for later, after we've used up the deferred blanks. cachedCharacter = decodeNonspaceChar(in, ch); // return this space. We did not include this one in the deferred count, so we're right in sync. return ' '; } return decodeNonspaceChar(in, ch); } private int decodeNonspaceChar(final InputStream in, final int ch) throws IOException { if (ch == '=') { int b1 = in.read(); // we need to get two characters after the quotation marker if (b1 == -1) { throw new IOException("Truncated quoted printable data"); } int b2 = in.read(); // we need to get two characters after the quotation marker if (b2 == -1) { throw new IOException("Truncated quoted printable data"); } // we've found an encoded carriage return. The next char needs to be a newline if (b1 == '\r') { if (b2 != '\n') { throw new IOException("Invalid quoted printable encoding"); } // this was a soft linebreak inserted by the encoding. We just toss this away // on decode. We need to return something, so recurse and decode the next. return decode(in); } else { // this is a hex pair we need to convert back to a single byte. b1 = decodingTable[b1]; b2 = decodingTable[b2]; return (b1 << 4) | b2; } } else { return ch; } } /** * Perform RFC-2047 word encoding using Q-P data encoding. * * @param in The source for the encoded data. * @param charset The charset tag to be added to each encoded data section. * @param specials The set of special characters that we require to encoded. * @param out The output stream where the encoded data is to be written. * @param fold Controls whether separate sections of encoded data are separated by * linebreaks or whitespace. * * @exception IOException */ public void encodeWord(final InputStream in, final String charset, final String specials, final OutputStream out, final boolean fold) throws IOException { // we need to scan ahead in a few places, which may require pushing characters back on to the stream. // make sure we have a stream where this is possible. final PushbackInputStream inStream = new PushbackInputStream(in); final PrintStream writer = new PrintStream(out); // segments of encoded data are limited to 75 byes, including the control sections. final int limit = 75 - 7 - charset.length(); boolean firstLine = true; final StringBuffer encodedString = new StringBuffer(76); while (true) { // encode another segment of data. encode(inStream, encodedString, specials, limit); // nothing encoded means we've hit the end of the data. if (encodedString.length() == 0) { break; } // if we have more than one segment, we need to insert separators. Depending on whether folding // was requested, this is either a blank or a linebreak. if (!firstLine) { if (fold) { writer.print("\r\n"); } else { writer.print(" "); } } // add the encoded word header writer.print("=?"); writer.print(charset); writer.print("?Q?"); // the data writer.print(encodedString.toString()); // and the terminator mark writer.print("?="); writer.flush(); // we reset the string buffer and reuse it. encodedString.setLength(0); // we need a delimiter between sections from this point on. firstLine = false; } } /** * Perform RFC-2047 word encoding using Base64 data encoding. * * @param in The source for the encoded data. * @param charset The charset tag to be added to each encoded data section. * @param out The output stream where the encoded data is to be written. * @param fold Controls whether separate sections of encoded data are separated by * linebreaks or whitespace. * * @exception IOException */ public void encodeWord(final byte[] data, final StringBuffer out, final String charset, final String specials) throws IOException { // append the word header out.append("=?"); out.append(charset); out.append("?Q?"); // add on the encodeded data encodeWordData(data, out, specials); // the end of the encoding marker out.append("?="); } /** * Perform RFC-2047 word encoding using Q-P data encoding. * * @param in The source for the encoded data. * @param charset The charset tag to be added to each encoded data section. * @param specials The set of special characters that we require to encoded. * @param out The output stream where the encoded data is to be written. * @param fold Controls whether separate sections of encoded data are separated by * linebreaks or whitespace. * * @exception IOException */ public void encodeWordData(final byte[] data, final StringBuffer out, final String specials) throws IOException { for (int i = 0; i < data.length; i++) { final int ch = data[i] & 0xff; ; // spaces require special handling. If the next character is a line terminator, then // the space needs to be encoded. if (ch == ' ') { // blanks get translated into underscores, because the encoded tokens can't have embedded blanks. out.append('_'); } // non-ascii chars and the designated specials all get encoded. else if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { out.append('='); out.append((char)encodingTable[ch >> 4]); out.append((char)encodingTable[ch & 0x0F]); } else { // good character, just use unchanged. out.append((char)ch); } } } /** * Estimate the final encoded size of a segment of data. * This is used to ensure that the encoded blocks do * not get split across a unicode character boundary and * that the encoding will fit within the bounds of * a mail header line. * * @param data The data we're anticipating encoding. * * @return The size of the byte data in encoded form. */ public int estimateEncodedLength(final byte[] data, final String specials) { int count = 0; for (int i = 0; i < data.length; i++) { // make sure this is just a single byte value. final int ch = data[i] & 0xff; // non-ascii chars and the designated specials all get encoded. if (ch < 32 || ch >= 127 || specials.indexOf(ch) != -1) { // Q encoding translates a single char into 3 characters count += 3; } else { // non-encoded character count++; } } return count; } }