Skip to content

Commit 2922088

Browse files
committed
CODEC-121 will be for 2.0.
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/codec/trunk@1302639 13f79535-47bb-0310-9956-ffa450edef68
1 parent 0c7461b commit 2922088

2 files changed

Lines changed: 65 additions & 211 deletions

File tree

src/main/java/org/apache/commons/codec/net/QuotedPrintableCodec.java

Lines changed: 44 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,20 @@
4242
* to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
4343
* gateway.
4444
* </p>
45-
*
45+
*
46+
* <p>
47+
* Note:
48+
* </p>
49+
* <p>
50+
* Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
51+
* does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
52+
* framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
53+
* for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
54+
* </p>
55+
*
4656
* @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
4757
* Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
48-
*
58+
*
4959
* @author Apache Software Foundation
5060
* @since 1.3
5161
* @version $Id$
@@ -66,14 +76,6 @@ public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, Strin
6676
private static final byte TAB = 9;
6777

6878
private static final byte SPACE = 32;
69-
70-
private static final byte CR = 13;
71-
72-
private static final byte LF = 10;
73-
74-
/** Safe line length for quoted printable encoded text. */
75-
private static final int SAFE_LENGTH = 73;
76-
7779
// Static initializer for printable chars collection
7880
static {
7981
// alpha characters
@@ -109,78 +111,26 @@ public QuotedPrintableCodec(String charset) {
109111
* Encodes byte into its quoted-printable representation.
110112
*
111113
* @param b
112-
* byte to encode
114+
* byte to encode
113115
* @param buffer
114-
* the buffer to write to
115-
* @return The number of bytes written to the <code>buffer</code>
116+
* the buffer to write to
116117
*/
117-
private static final int encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
118+
private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
118119
buffer.write(ESCAPE_CHAR);
119120
char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
120121
char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
121122
buffer.write(hex1);
122123
buffer.write(hex2);
123-
return 3;
124-
}
125-
126-
/**
127-
* Return the byte at position <code>index</code> of the byte array and
128-
* make sure it is unsigned.
129-
*
130-
* @param index
131-
* position in the array
132-
* @param bytes
133-
* the byte array
134-
* @return the unsigned octet at position <code>index</code> from the array
135-
*/
136-
private static int getUnsignedOctet(final int index, final byte[] bytes) {
137-
int b = bytes[index];
138-
if (b < 0) {
139-
b = 256 + b;
140-
}
141-
return b;
142-
}
143-
144-
/**
145-
* Write a byte to the buffer.
146-
*
147-
* @param b
148-
* byte to write
149-
* @param encode
150-
* indicates whether the octet shall be encoded
151-
* @param buffer
152-
* the buffer to write to
153-
* @return the number of bytes that have been written to the buffer
154-
*/
155-
private static int encodeByte(final int b, final boolean encode,
156-
final ByteArrayOutputStream buffer) {
157-
if (encode) {
158-
return encodeQuotedPrintable(b, buffer);
159-
} else {
160-
buffer.write(b);
161-
return 1;
162-
}
163-
}
164-
165-
/**
166-
* Checks whether the given byte is whitespace.
167-
*
168-
* @param b
169-
* byte to be checked
170-
* @return <code>true</code> if the byte is either a space or tab character
171-
*/
172-
private static boolean isWhitespace(final int b) {
173-
return b == SPACE || b == TAB;
174124
}
175125

176126
/**
177127
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
178-
*
128+
*
179129
* <p>
180-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
181-
* as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
130+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131+
* RFC 1521 and is suitable for encoding binary data and unformatted text.
182132
* </p>
183-
*
133+
*
184134
* @param printable
185135
* bitset of characters deemed quoted-printable
186136
* @param bytes
@@ -195,59 +145,29 @@ public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes)
195145
printable = PRINTABLE_CHARS;
196146
}
197147
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
198-
int pos = 1;
199-
// encode up to buffer.length - 3, the last three octets will be treated
200-
// separately for simplification of note #3
201-
for (int i = 0; i < bytes.length - 3; i++) {
202-
int b = getUnsignedOctet(i, bytes);
203-
if (pos < SAFE_LENGTH) {
204-
// up to this length it is safe to add any byte, encoded or not
205-
pos += encodeByte(b, !printable.get(b), buffer);
148+
for (byte c : bytes) {
149+
int b = c;
150+
if (b < 0) {
151+
b = 256 + b;
152+
}
153+
if (printable.get(b)) {
154+
buffer.write(b);
206155
} else {
207-
// rule #3: whitespace at the end of a line *must* be encoded
208-
encodeByte(b, !printable.get(b) || isWhitespace(b), buffer);
209-
210-
// rule #5: soft line break
211-
buffer.write(ESCAPE_CHAR);
212-
buffer.write(CR);
213-
buffer.write(LF);
214-
pos = 1;
156+
encodeQuotedPrintable(b, buffer);
215157
}
216158
}
217-
218-
// rule #3: whitespace at the end of a line *must* be encoded
219-
// if we would do a soft break line after this octet, encode whitespace
220-
int b = getUnsignedOctet(bytes.length - 3, bytes);
221-
boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5);
222-
pos += encodeByte(b, encode, buffer);
223-
224-
// note #3: '=' *must not* be the ultimate or penultimate character
225-
// simplification: if < 6 bytes left, do a soft line break as we may need
226-
// exactly 6 bytes space for the last 2 bytes
227-
if (pos > SAFE_LENGTH - 2) {
228-
buffer.write(ESCAPE_CHAR);
229-
buffer.write(CR);
230-
buffer.write(LF);
231-
}
232-
for (int i = bytes.length - 2; i < bytes.length; i++) {
233-
b = getUnsignedOctet(i, bytes);
234-
// rule #3: trailing whitespace shall be encoded
235-
encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b));
236-
encodeByte(b, encode, buffer);
237-
}
238-
239159
return buffer.toByteArray();
240160
}
241161

242162
/**
243-
* Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are
244-
* converted back to their original representation.
245-
*
163+
* Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164+
* back to their original representation.
165+
*
246166
* <p>
247-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as
248-
* defined in RFC 1521.
167+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
168+
* RFC 1521.
249169
* </p>
250-
*
170+
*
251171
* @param bytes
252172
* array of quoted-printable characters
253173
* @return array of original bytes
@@ -260,21 +180,16 @@ public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderExc
260180
}
261181
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
262182
for (int i = 0; i < bytes.length; i++) {
263-
final int b = bytes[i];
183+
int b = bytes[i];
264184
if (b == ESCAPE_CHAR) {
265185
try {
266-
// if the next octet is a CR we have found a soft line break
267-
if (bytes[++i] == CR) {
268-
continue;
269-
}
270-
int u = Utils.digit16(bytes[i]);
186+
int u = Utils.digit16(bytes[++i]);
271187
int l = Utils.digit16(bytes[++i]);
272188
buffer.write((char) ((u << 4) + l));
273189
} catch (ArrayIndexOutOfBoundsException e) {
274190
throw new DecoderException("Invalid quoted-printable encoding", e);
275191
}
276-
} else if (b != CR && b != LF) {
277-
// every other octet is appended except for CR & LF
192+
} else {
278193
buffer.write(b);
279194
}
280195
}
@@ -285,8 +200,8 @@ public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderExc
285200
* Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
286201
*
287202
* <p>
288-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #5)
289-
* as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
203+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204+
* RFC 1521 and is suitable for encoding binary data and unformatted text.
290205
* </p>
291206
*
292207
* @param bytes
@@ -302,8 +217,8 @@ public byte[] encode(byte[] bytes) {
302217
* back to their original representation.
303218
*
304219
* <p>
305-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
306-
* as defined in RFC 1521.
220+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221+
* RFC 1521.
307222
* </p>
308223
*
309224
* @param bytes
@@ -320,8 +235,8 @@ public byte[] decode(byte[] bytes) throws DecoderException {
320235
* Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
321236
*
322237
* <p>
323-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
324-
* as defined in RFC 1521 and is suitable for encoding binary data.
238+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239+
* RFC 1521 and is suitable for encoding binary data.
325240
* </p>
326241
*
327242
* @param pString
@@ -450,8 +365,8 @@ public String getDefaultCharset() {
450365
* Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
451366
*
452367
* <p>
453-
* This function fully implements the quoted-printable encoding specification (rule #1 through rule #2)
454-
* as defined in RFC 1521 and is suitable for encoding binary data and unformatted text.
368+
* This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369+
* RFC 1521 and is suitable for encoding binary data and unformatted text.
455370
* </p>
456371
*
457372
* @param pString

src/test/java/org/apache/commons/codec/net/QuotedPrintableCodecTest.java

Lines changed: 21 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.codec.CharEncoding;
2525
import org.apache.commons.codec.DecoderException;
2626
import org.apache.commons.codec.EncoderException;
27+
import org.junit.Ignore;
2728
import org.junit.Test;
2829

2930
/**
@@ -263,92 +264,30 @@ public void testDefaultEncoding() throws Exception {
263264
}
264265

265266
@Test
267+
@Ignore
268+
/**
269+
* The QuotedPrintableCodec documentation states that this is not supported.
270+
*
271+
* @throws Exception
272+
* @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
273+
*/
266274
public void testSoftLineBreakDecode() throws Exception {
267-
String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics " +
268-
"is the most beautiful branch of philosophy.";
269-
String expected = "If you believe that truth=beauty, then surely mathematics " +
270-
"is the most beautiful branch of philosophy.";
271-
272-
QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
273-
assertEquals(expected, qpcodec.decode(qpdata));
274-
275-
String encoded = qpcodec.encode(expected);
276-
assertEquals(expected, qpcodec.decode(encoded));
275+
String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
276+
String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
277+
assertEquals(expected, new QuotedPrintableCodec().decode(qpdata));
277278
}
278279

279280
@Test
281+
@Ignore
282+
/**
283+
* The QuotedPrintableCodec documentation states that this is not supported.
284+
*
285+
* @throws Exception
286+
* @see <a href="https://issues.apache.org/jira/browse/CODEC-121">CODEC-121</a>
287+
*/
280288
public void testSoftLineBreakEncode() throws Exception {
281-
String qpdata = "If you believe that truth=3Dbeauty, then surely mathematics is the most " +
282-
"b=\r\neautiful branch of philosophy.";
283-
String expected = "If you believe that truth=beauty, then surely mathematics is the most " +
284-
"beautiful branch of philosophy.";
285-
286-
QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
287-
assertEquals(qpdata, qpcodec.encode(expected));
288-
289-
String decoded = qpcodec.decode(qpdata);
290-
assertEquals(qpdata, qpcodec.encode(decoded));
291-
}
292-
293-
@Test
294-
public void testSkipNotEncodedCRLF() throws Exception {
295-
String qpdata = "CRLF in an\n encoded text should be=20=\r\n\rskipped in the\r decoding.";
296-
String expected = "CRLF in an encoded text should be skipped in the decoding.";
297-
298-
QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
299-
assertEquals(expected, qpcodec.decode(qpdata));
300-
301-
String encoded = qpcodec.encode(expected);
302-
assertEquals(expected, qpcodec.decode(encoded));
303-
}
304-
305-
@Test
306-
public void testTrailingSpecial() throws Exception {
307-
final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
308-
309-
String plain ="This is a example of a quoted-printable text file. This might contain sp=cial chars.";
310-
String expected = "This is a example of a quoted-printable text file. This might contain sp=3D=\r\ncial chars.";
311-
assertEquals(expected, qpcodec.encode(plain));
312-
313-
plain ="This is a example of a quoted-printable text file. This might contain ta\tbs as well.";
314-
expected = "This is a example of a quoted-printable text file. This might contain ta=09=\r\nbs as well.";
315-
assertEquals(expected, qpcodec.encode(plain));
289+
String qpdata = "If you believe that truth=3Dbeauty, then surely=20=\r\nmathematics is the most beautiful branch of philosophy.";
290+
String expected = "If you believe that truth=beauty, then surely mathematics is the most beautiful branch of philosophy.";
291+
assertEquals(qpdata, new QuotedPrintableCodec().encode(expected));
316292
}
317-
318-
@Test
319-
public void testUltimateSoftBreak() throws Exception {
320-
final QuotedPrintableCodec qpcodec = new QuotedPrintableCodec();
321-
322-
String plain ="This is a example of a quoted-printable text file. There is no end to it\t";
323-
String expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=09";
324-
325-
assertEquals(expected, qpcodec.encode(plain));
326-
327-
plain ="This is a example of a quoted-printable text file. There is no end to it ";
328-
expected = "This is a example of a quoted-printable text file. There is no end to i=\r\nt=20";
329-
330-
assertEquals(expected, qpcodec.encode(plain));
331-
332-
// whitespace before soft break
333-
plain ="This is a example of a quoted-printable text file. There is no end to ";
334-
expected = "This is a example of a quoted-printable text file. There is no end to=20=\r\n =20";
335-
336-
assertEquals(expected, qpcodec.encode(plain));
337-
338-
// non-printable character before soft break
339-
plain ="This is a example of a quoted-printable text file. There is no end to= ";
340-
expected = "This is a example of a quoted-printable text file. There is no end to=3D=\r\n =20";
341-
342-
assertEquals(expected, qpcodec.encode(plain));
343-
}
344-
345-
@Test
346-
public void testFinalBytes() throws Exception {
347-
// whitespace, but does not need to be encoded
348-
String plain ="This is a example of a quoted=printable text file. There is no tt";
349-
String expected = "This is a example of a quoted=3Dprintable text file. There is no tt";
350-
351-
assertEquals(expected, new QuotedPrintableCodec().encode(plain));
352-
}
353-
354293
}

0 commit comments

Comments
 (0)