4242 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
4343 * gateway.
4444 * </p>
45- *
45+ *
46+ * <p>
47+ * Note:
48+ * </p>
49+ * <p>
50+ * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
51+ * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
52+ * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
53+ * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
54+ * </p>
55+ *
4656 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
4757 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
48- *
58+ *
4959 * @author Apache Software Foundation
5060 * @since 1.3
5161 * @version $Id$
@@ -66,14 +76,6 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
6676 private static final byte TAB = 9 ;
6777
6878 private static final byte SPACE = 32 ;
69-
70- private static final byte CR = 13 ;
71-
72- private static final byte LF = 10 ;
73-
74- /** Safe line length for quoted printable encoded text. */
75- private static final int SAFE_LENGTH = 73 ;
76-
7779 // Static initializer for printable chars collection
7880 static {
7981 // alpha characters
@@ -109,78 +111,26 @@ public QuotedPrintableCodec(String charset) {
109111 * Encodes byte into its quoted-printable representation.
110112 *
111113 * @param b
112- * byte to encode
114+ * byte to encode
113115 * @param buffer
114- * the buffer to write to
115- * @return The number of bytes written to the <code>buffer</code>
116+ * the buffer to write to
116117 */
117- private static final int encodeQuotedPrintable (int b , ByteArrayOutputStream buffer ) {
118+ private static final void encodeQuotedPrintable (int b , ByteArrayOutputStream buffer ) {
118119 buffer .write (ESCAPE_CHAR );
119120 char hex1 = Character .toUpperCase (Character .forDigit ((b >> 4 ) & 0xF , 16 ));
120121 char hex2 = Character .toUpperCase (Character .forDigit (b & 0xF , 16 ));
121122 buffer .write (hex1 );
122123 buffer .write (hex2 );
123- return 3 ;
124- }
125-
126- /**
127- * Return the byte at position <code>index</code> of the byte array and
128- * make sure it is unsigned.
129- *
130- * @param index
131- * position in the array
132- * @param bytes
133- * the byte array
134- * @return the unsigned octet at position <code>index</code> from the array
135- */
136- private static int getUnsignedOctet (final int index , final byte [] bytes ) {
137- int b = bytes [index ];
138- if (b < 0 ) {
139- b = 256 + b ;
140- }
141- return b ;
142- }
143-
144- /**
145- * Write a byte to the buffer.
146- *
147- * @param b
148- * byte to write
149- * @param encode
150- * indicates whether the octet shall be encoded
151- * @param buffer
152- * the buffer to write to
153- * @return the number of bytes that have been written to the buffer
154- */
155- private static int encodeByte (final int b , final boolean encode ,
156- final ByteArrayOutputStream buffer ) {
157- if (encode ) {
158- return encodeQuotedPrintable (b , buffer );
159- } else {
160- buffer .write (b );
161- return 1 ;
162- }
163- }
164-
165- /**
166- * Checks whether the given byte is whitespace.
167- *
168- * @param b
169- * byte to be checked
170- * @return <code>true</code> if the byte is either a space or tab character
171- */
172- private static boolean isWhitespace (final int b ) {
173- return b == SPACE || b == TAB ;
174124 }
175125
176126 /**
177127 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
178- *
128+ *
179129 * <p>
180- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
181- * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
130+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
182132 * </p>
183- *
133+ *
184134 * @param printable
185135 * bitset of characters deemed quoted-printable
186136 * @param bytes
@@ -195,59 +145,29 @@ public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes)
195145 printable = PRINTABLE_CHARS ;
196146 }
197147 ByteArrayOutputStream buffer = new ByteArrayOutputStream ();
198- int pos = 1 ;
199- // encode up to buffer.length - 3, the last three octets will be treated
200- // separately for simplification of note #3
201- for (int i = 0 ; i < bytes .length - 3 ; i ++) {
202- int b = getUnsignedOctet (i , bytes );
203- if (pos < SAFE_LENGTH ) {
204- // up to this length it is safe to add any byte, encoded or not
205- pos += encodeByte (b , !printable .get (b ), buffer );
148+ for (byte c : bytes ) {
149+ int b = c ;
150+ if (b < 0 ) {
151+ b = 256 + b ;
152+ }
153+ if (printable .get (b )) {
154+ buffer .write (b );
206155 } else {
207- // rule #3: whitespace at the end of a line *must* be encoded
208- encodeByte (b , !printable .get (b ) || isWhitespace (b ), buffer );
209-
210- // rule #5: soft line break
211- buffer .write (ESCAPE_CHAR );
212- buffer .write (CR );
213- buffer .write (LF );
214- pos = 1 ;
156+ encodeQuotedPrintable (b , buffer );
215157 }
216158 }
217-
218- // rule #3: whitespace at the end of a line *must* be encoded
219- // if we would do a soft break line after this octet, encode whitespace
220- int b = getUnsignedOctet (bytes .length - 3 , bytes );
221- boolean encode = !printable .get (b ) || (isWhitespace (b ) && pos > SAFE_LENGTH - 5 );
222- pos += encodeByte (b , encode , buffer );
223-
224- // note #3: '=' *must not* be the ultimate or penultimate character
225- // simplification: if < 6 bytes left, do a soft line break as we may need
226- // exactly 6 bytes space for the last 2 bytes
227- if (pos > SAFE_LENGTH - 2 ) {
228- buffer .write (ESCAPE_CHAR );
229- buffer .write (CR );
230- buffer .write (LF );
231- }
232- for (int i = bytes .length - 2 ; i < bytes .length ; i ++) {
233- b = getUnsignedOctet (i , bytes );
234- // rule #3: trailing whitespace shall be encoded
235- encode = !printable .get (b ) || (i > bytes .length - 2 && isWhitespace (b ));
236- encodeByte (b , encode , buffer );
237- }
238-
239159 return buffer .toByteArray ();
240160 }
241161
242162 /**
243- * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are
244- * converted back to their original representation.
245- *
163+ * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164+ * back to their original representation.
165+ *
246166 * <p>
247- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5 ) as
248- * defined in RFC 1521.
167+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2 ) as defined in
168+ * RFC 1521.
249169 * </p>
250- *
170+ *
251171 * @param bytes
252172 * array of quoted-printable characters
253173 * @return array of original bytes
@@ -260,21 +180,16 @@ public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderExc
260180 }
261181 ByteArrayOutputStream buffer = new ByteArrayOutputStream ();
262182 for (int i = 0 ; i < bytes .length ; i ++) {
263- final int b = bytes [i ];
183+ int b = bytes [i ];
264184 if (b == ESCAPE_CHAR ) {
265185 try {
266- // if the next octet is a CR we have found a soft line break
267- if (bytes [++i ] == CR ) {
268- continue ;
269- }
270- int u = Utils .digit16 (bytes [i ]);
186+ int u = Utils .digit16 (bytes [++i ]);
271187 int l = Utils .digit16 (bytes [++i ]);
272188 buffer .write ((char ) ((u << 4 ) + l ));
273189 } catch (ArrayIndexOutOfBoundsException e ) {
274190 throw new DecoderException ("Invalid quoted-printable encoding" , e );
275191 }
276- } else if (b != CR && b != LF ) {
277- // every other octet is appended except for CR & LF
192+ } else {
278193 buffer .write (b );
279194 }
280195 }
@@ -285,8 +200,8 @@ public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderExc
285200 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
286201 *
287202 * <p>
288- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
289- * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
203+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
290205 * </p>
291206 *
292207 * @param bytes
@@ -302,8 +217,8 @@ public byte[] encode(byte[] bytes) {
302217 * back to their original representation.
303218 *
304219 * <p>
305- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
306- * as defined in RFC 1521.
220+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221+ * RFC 1521.
307222 * </p>
308223 *
309224 * @param bytes
@@ -320,8 +235,8 @@ public byte[] decode(byte[] bytes) throws DecoderException {
320235 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
321236 *
322237 * <p>
323- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
324- * as defined in RFC 1521 and is suitable for encoding binary data.
238+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239+ * RFC 1521 and is suitable for encoding binary data.
325240 * </p>
326241 *
327242 * @param pString
@@ -450,8 +365,8 @@ public String getDefaultCharset() {
450365 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
451366 *
452367 * <p>
453- * This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
454- * as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
368+ * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369+ * RFC 1521 and is suitable for encoding binary data and unformatted text.
455370 * </p>
456371 *
457372 * @param pString
0 commit comments