Skip to content

Commit 6b67d6f

Browse files
apinskeaherbert
authored andcommitted
CODEC-301: Reduce byte[] allocations by reusing buffers
* Reduces byte[] allocations from 280MB to <4MB when reading a 133MB base64 stream. Messured with JFR. * Keep reusing inital buffer when decoding BaseN * Attempt to fill up the user-provided buffer Previously we only filled up to a maximum of 8KB - encoding-overhead (e.g. 6KB for Base64) even if the provided buffer was bigger. * Reuse hasData method for checking pos/readPos markers
1 parent a66b760 commit 6b67d6f

3 files changed

Lines changed: 46 additions & 8 deletions

File tree

src/main/java/org/apache/commons/codec/binary/BaseNCodec.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
394394
* @return The amount of buffered data available for reading.
395395
*/
396396
int available(final Context context) { // package protected for access from I/O streams
397-
return context.buffer != null ? context.pos - context.readPos : 0;
397+
return hasData(context) ? context.pos - context.readPos : 0;
398398
}
399399

400400
/**
@@ -632,7 +632,7 @@ public long getEncodedLength(final byte[] pArray) {
632632
* @return true if there is data still available for reading.
633633
*/
634634
boolean hasData(final Context context) { // package protected for access from I/O streams
635-
return context.buffer != null;
635+
return context.pos > context.readPos;
636636
}
637637

638638
/**
@@ -711,12 +711,16 @@ public boolean isStrictDecoding() {
711711
* @return The number of bytes successfully extracted into the provided byte[] array.
712712
*/
713713
int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
714-
if (context.buffer != null) {
714+
if (hasData(context)) {
715715
final int len = Math.min(available(context), bAvail);
716716
System.arraycopy(context.buffer, context.readPos, b, bPos, len);
717717
context.readPos += len;
718-
if (context.readPos >= context.pos) {
719-
context.buffer = null; // so hasData() will return false, and this method can return -1
718+
if (!hasData(context)) {
719+
// All data read.
720+
// Reset position markers but do not set buffer to null to allow its reuse.
721+
// hasData(context) will still return false, and this method will return 0 until
722+
// more data is available, or -1 if EOF.
723+
context.pos = context.readPos = 0;
720724
}
721725
return len;
722726
}

src/main/java/org/apache/commons/codec/binary/BaseNCodecInputStream.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,15 @@ public class BaseNCodecInputStream extends FilterInputStream {
3939

4040
private final byte[] singleByte = new byte[1];
4141

42+
private final byte[] buf;
43+
4244
private final Context context = new Context();
4345

4446
protected BaseNCodecInputStream(final InputStream input, final BaseNCodec baseNCodec, final boolean doEncode) {
4547
super(input);
4648
this.doEncode = doEncode;
4749
this.baseNCodec = baseNCodec;
50+
this.buf = new byte[doEncode ? 4096 : 8192];
4851
}
4952

5053
/**
@@ -169,17 +172,24 @@ public int read(final byte array[], final int offset, final int len) throws IOEx
169172
-----
170173
This is a fix for CODEC-101
171174
*/
172-
while (readLen == 0) {
175+
// Attempt to read the request length
176+
while (readLen < len) {
173177
if (!baseNCodec.hasData(context)) {
174-
final byte[] buf = new byte[doEncode ? 4096 : 8192];
178+
// Obtain more data.
179+
// buf is reused across calls to read to avoid repeated allocations
175180
final int c = in.read(buf);
176181
if (doEncode) {
177182
baseNCodec.encode(buf, 0, c, context);
178183
} else {
179184
baseNCodec.decode(buf, 0, c, context);
180185
}
181186
}
182-
readLen = baseNCodec.readResults(array, offset, len, context);
187+
final int read = baseNCodec.readResults(array, offset + readLen, len - readLen, context);
188+
if (read < 0) {
189+
// Return the amount read or EOF
190+
return readLen != 0 ? readLen : -1;
191+
}
192+
readLen += read;
183193
}
184194
return readLen;
185195
}

src/test/java/org/apache/commons/codec/binary/Base64InputStreamTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,30 @@ public void testMarkSupported() throws Exception {
408408
}
409409
}
410410

411+
/**
412+
* Tests read using different buffer sizes
413+
*
414+
* @throws Exception
415+
* for some failure scenarios.
416+
*/
417+
@Test
418+
public void testReadMultipleBufferSizes() throws Exception {
419+
final byte[][] randomData = BaseNTestData.randomData(new Base64(0, null, false), 1024 * 64);
420+
byte[] encoded = randomData[1];
421+
byte[] decoded = randomData[0];
422+
final ByteArrayInputStream bin = new ByteArrayInputStream(encoded);
423+
final ByteArrayOutputStream out = new ByteArrayOutputStream();
424+
try (final Base64InputStream in = new Base64InputStream(bin)) {
425+
for (int i : new int[] { 4 * 1024, 4 * 1024, 8 * 1024, 8 * 1024, 16 * 1024, 16 * 1024, 8 * 1024 }) {
426+
final byte[] buf = new byte[i];
427+
int bytesRead = in.read(buf);
428+
assertEquals(i, bytesRead);
429+
out.write(buf, 0, bytesRead);
430+
}
431+
}
432+
assertArrayEquals(decoded, out.toByteArray());
433+
}
434+
411435
/**
412436
* Tests read returning 0
413437
*

0 commit comments

Comments
 (0)