From debe34abe1e9fb522cd94babb6c98a055f2e4b5a Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Thu, 4 Sep 2025 23:13:32 +0200
Subject: [PATCH 01/19] feat: Add incremental `toByteArray` method
This introduces `toByteArray(InputStream input, int size, int bufferSize)`, which reads the stream in chunks of `bufferSize` instead of allocating the full array up front.
By reading incrementally, the method:
* Validates that the stream actually contains `size` bytes before completing the allocation.
* Prevents excessive memory usage if a corrupted or malicious `size` value is provided.
* Offers safer handling for untrusted input compared to the direct-allocation variant.
---
src/changes/changes.xml | 1 +
.../java/org/apache/commons/io/IOUtils.java | 129 ++++++++++++++----
.../apache/commons/io/RandomAccessFiles.java | 4 +
.../org/apache/commons/io/IOUtilsTest.java | 45 ++++++
4 files changed, 152 insertions(+), 27 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index de92deabf64..a10b4653880 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -57,6 +57,7 @@ The type attribute can be add,update,fix,remove.
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long).
+ Added toByteArray(InputStream, int, int) for safer incremental reading with size validation.
Bump org.apache.commons:commons-parent from 85 to 87 #774.
[test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0.
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index cd8d042a922..96c6e682407 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -65,6 +65,7 @@
import org.apache.commons.io.function.IOConsumer;
import org.apache.commons.io.function.IOSupplier;
import org.apache.commons.io.function.IOTriFunction;
+import org.apache.commons.io.input.BoundedInputStream;
import org.apache.commons.io.input.CharSequenceReader;
import org.apache.commons.io.input.QueueInputStream;
import org.apache.commons.io.output.AppendableWriter;
@@ -2659,37 +2660,60 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
}
/**
- * Gets the contents of an {@link InputStream} as a {@code byte[]}. Use this method instead of
- * {@link #toByteArray(InputStream)} when {@link InputStream} size is known.
+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * @param input the {@link InputStream} to read.
- * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= length of input stream.
- * @return byte [] of length {@code size}.
- * @throws IOException if an I/O error occurs or {@link InputStream} length is smaller than parameter {@code size}.
- * @throws IllegalArgumentException if {@code size} is less than zero.
+ *
+ * This variant allocates the target array immediately and attempts to fill it in one pass.
+ * It assumes that {@code size} is correct.
+ * If the stream ends prematurely, an {@link EOFException} is thrown.
+ *
+ *
+ *
+ * Important: This method does not defend against corrupted
+ * or untrusted {@code size} values.
+ * For untrusted input, use {@link #toByteArray(InputStream, int, int)} instead,
+ * which validates that the stream contains at least {@code size} bytes before allocating the target array.
+ *
+ *
+ * @param input the {@link InputStream} to read; must not be {@code null}.
+ * @param size the exact number of bytes to read; must be {@code >= 0}.
+ * @return a new byte array of length {@code size}.
+ * @throws IllegalArgumentException if {@code size} is negative.
+ * @throws EOFException if the stream ends before {@code size} bytes are read.
+ * @throws IOException if an I/O error occurs while reading.
+ * @throws NullPointerException if {@code input} is {@code null}.
* @since 2.1
*/
public static byte[] toByteArray(final InputStream input, final int size) throws IOException {
- if (size == 0) {
- return EMPTY_BYTE_ARRAY;
+ Objects.requireNonNull(input, "input");
+ if (size < 0) {
+ throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
}
- return toByteArray(Objects.requireNonNull(input, "input")::read, size);
+ return toByteArray(input::read, size);
}
/**
- * Gets contents of an {@link InputStream} as a {@code byte[]}.
- * Use this method instead of {@link #toByteArray(InputStream)}
- * when {@link InputStream} size is known.
- * NOTE: the method checks that the length can safely be cast to an int without truncation
- * before using {@link IOUtils#toByteArray(InputStream, int)} to read into the byte array.
- * (Arrays can have no more than Integer.MAX_VALUE entries anyway.)
+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * @param input the {@link InputStream} to read.
- * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= min(Integer.MAX_VALUE, length of input stream).
- * @return byte [] the requested byte array, of length {@code size}.
- * @throws IOException if an I/O error occurs or {@link InputStream} length is less than {@code size}.
- * @throws IllegalArgumentException if size is less than zero or size is greater than Integer.MAX_VALUE.
- * @see IOUtils#toByteArray(InputStream, int)
+ *
+ * This is a convenience overload of {@link #toByteArray(InputStream, int, int)} that accepts a
+ * {@code long} size parameter. The value is checked to ensure it does not exceed
+ * {@link Integer#MAX_VALUE} before being safely converted to {@code int}.
+ *
+ *
+ *
+ * All behavior, validation rules, and exceptions are otherwise identical to
+ * {@link #toByteArray(InputStream, int, int)}.
+ *
+ *
+ * @param input the {@link InputStream} to read; must not be {@code null}.
+ * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
+ * @return a new byte array of length {@code size}.
+ * @throws IllegalArgumentException if {@code size} is negative or greater than {@link Integer#MAX_VALUE}.
+ * @throws EOFException if the stream ends before {@code size} bytes are read.
+ * @throws IOException if an I/O error occurs while reading.
+ * @throws NullPointerException if {@code input} is {@code null}.
+ * @see #toByteArray(InputStream, int, int)
* @since 2.1
*/
public static byte[] toByteArray(final InputStream input, final long size) throws IOException {
@@ -2699,6 +2723,62 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
return toByteArray(input, (int) size);
}
+ /**
+ * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
+ *
+ *
+ * This variant validates that the stream actually contains {@code size} bytes.
+ * It is suitable for untrusted input because it prevents oversized allocations when the provided {@code size}
+ * is corrupted or malicious.
+ *
+ *
+ *
+ * - If {@code size <= bufferSize}, the array is allocated directly and filled in a single pass.
+ * -
+ * If {@code size > bufferSize}, the stream is read incrementally using a buffer of length {@code bufferSize}.
+ * This avoids allocating an excessively large array up front,
+ * but may temporarily double memory usage due to buffering.
+ *
+ *
+ *
+ * @param input the {@link InputStream} to read; must not be {@code null}.
+ * @param size the exact number of bytes to read; must be {@code >= 0}.
+ * The actual bytes read are validated to equal {@code size}.
+ * @param bufferSize the buffer size for incremental reading; must be {@code > 0}.
+ * @return a new byte array of length {@code size}.
+ * @throws IllegalArgumentException if {@code size} is negative or {@code bufferSize <= 0}.
+ * @throws EOFException if the stream ends before {@code size} bytes are read.
+ * @throws IOException if an I/O error occurs while reading.
+ * @throws NullPointerException if {@code input} is {@code null}.
+ * @since 2.21.0
+ */
+ public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException {
+ Objects.requireNonNull(input, "input");
+ if (size < 0) {
+ throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
+ }
+ if (bufferSize <= 0) {
+ throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize);
+ }
+ if (size <= bufferSize) {
+ return toByteArray(input::read, size);
+ }
+ try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder()
+ .setBufferSize(bufferSize)
+ .get();
+ InputStream boundedInput = BoundedInputStream.builder()
+ .setMaxCount(size)
+ .setPropagateClose(false)
+ .setInputStream(input)
+ .get()) {
+ output.write(boundedInput);
+ if (output.size() != size) {
+ throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size);
+ }
+ return output.toByteArray();
+ }
+ }
+
/**
* Gets the contents of an input as a {@code byte[]}.
*
@@ -2709,11 +2789,6 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
* @throws IllegalArgumentException if {@code size} is less than zero.
*/
static byte[] toByteArray(final IOTriFunction input, final int size) throws IOException {
-
- if (size < 0) {
- throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
- }
-
if (size == 0) {
return EMPTY_BYTE_ARRAY;
}
diff --git a/src/main/java/org/apache/commons/io/RandomAccessFiles.java b/src/main/java/org/apache/commons/io/RandomAccessFiles.java
index 46de0dbf077..d9baa8f73b6 100644
--- a/src/main/java/org/apache/commons/io/RandomAccessFiles.java
+++ b/src/main/java/org/apache/commons/io/RandomAccessFiles.java
@@ -76,7 +76,11 @@ private static long length(final RandomAccessFile raf) throws IOException {
* other I/O error occurs.
*/
public static byte[] read(final RandomAccessFile input, final long position, final int length) throws IOException {
+ Objects.requireNonNull(input, "input");
input.seek(position);
+ if (length < 0) {
+ throw new IllegalArgumentException("Size must be equal or greater than zero: " + length);
+ }
return IOUtils.toByteArray(input::read, length);
}
diff --git a/src/test/java/org/apache/commons/io/IOUtilsTest.java b/src/test/java/org/apache/commons/io/IOUtilsTest.java
index 56fd1307eb1..92925bdd785 100644
--- a/src/test/java/org/apache/commons/io/IOUtilsTest.java
+++ b/src/test/java/org/apache/commons/io/IOUtilsTest.java
@@ -90,6 +90,9 @@
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
/**
* This is used to test {@link IOUtils} for correctness. The following checks are performed:
@@ -1659,6 +1662,48 @@ void testToByteArray_InputStream_SizeZero() throws Exception {
}
}
+ @ParameterizedTest
+ @MethodSource
+ void testToByteArray_InputStream_Size_BufferSize_Succeeds(byte[] data, int size, int bufferSize) throws IOException {
+ final ByteArrayInputStream input = new ByteArrayInputStream(data);
+ final byte[] expected = Arrays.copyOf(data, size);
+ final byte[] actual = IOUtils.toByteArray(input, size, bufferSize);
+ assertArrayEquals(expected, actual);
+ }
+
+ private static Stream testToByteArray_InputStream_Size_BufferSize_Succeeds() {
+ final byte[] data = new byte[1024];
+ for (int i = 0; i < 1024; i++) {
+ data[i] = (byte) i;
+ }
+ return Stream.of(
+ // Eager reading
+ Arguments.of(data.clone(), 512, 1024),
+ // Incremental reading
+ Arguments.of(data.clone(), 1024, 512),
+ // No reading
+ Arguments.of(data.clone(), 0, 128));
+ }
+
+ @ParameterizedTest
+ @MethodSource
+ void testToByteArray_InputStream_Size_BufferSize_Throws(
+ int size, int bufferSize, Class extends Exception> exceptionClass) throws IOException {
+ try (InputStream input = new NullInputStream(0)) {
+ assertThrows(exceptionClass, () -> IOUtils.toByteArray(input, size, bufferSize));
+ }
+ }
+
+ static Stream testToByteArray_InputStream_Size_BufferSize_Throws() {
+ return Stream.of(
+ // Negative size
+ Arguments.of(-1, 128, IllegalArgumentException.class),
+ // Invalid buffer size
+ Arguments.of(0, 0, IllegalArgumentException.class),
+ // Huge size: should not cause OutOfMemoryError
+ Arguments.of(Integer.MAX_VALUE, 128, EOFException.class));
+ }
+
@Test
void testToByteArray_Reader() throws IOException {
final String charsetName = UTF_8;
From d748e99d390eef117d54bf175dcaa569e2b9106d Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Fri, 5 Sep 2025 15:27:46 +0200
Subject: [PATCH 02/19] fix: move back positivity check to helper method
---
src/main/java/org/apache/commons/io/IOUtils.java | 10 ++++------
.../java/org/apache/commons/io/RandomAccessFiles.java | 4 ----
2 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 07bdb711bf9..9cdb8b2ff6e 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2684,13 +2684,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
* @throws NullPointerException if {@code input} is {@code null}.
* @since 2.1
*/
- @SuppressWarnings("resource")
public static byte[] toByteArray(final InputStream input, final int size) throws IOException {
- Objects.requireNonNull(input, "input");
- if (size < 0) {
- throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
- }
- return toByteArray(input::read, size);
+ return toByteArray(Objects.requireNonNull(input, "input")::read, size);
}
/**
@@ -2790,6 +2785,9 @@ public static byte[] toByteArray(final InputStream input, final int size, final
* @throws IllegalArgumentException if {@code size} is less than zero.
*/
static byte[] toByteArray(final IOTriFunction input, final int size) throws IOException {
+ if (size < 0) {
+ throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
+ }
if (size == 0) {
return EMPTY_BYTE_ARRAY;
}
diff --git a/src/main/java/org/apache/commons/io/RandomAccessFiles.java b/src/main/java/org/apache/commons/io/RandomAccessFiles.java
index d9baa8f73b6..46de0dbf077 100644
--- a/src/main/java/org/apache/commons/io/RandomAccessFiles.java
+++ b/src/main/java/org/apache/commons/io/RandomAccessFiles.java
@@ -76,11 +76,7 @@ private static long length(final RandomAccessFile raf) throws IOException {
* other I/O error occurs.
*/
public static byte[] read(final RandomAccessFile input, final long position, final int length) throws IOException {
- Objects.requireNonNull(input, "input");
input.seek(position);
- if (length < 0) {
- throw new IllegalArgumentException("Size must be equal or greater than zero: " + length);
- }
return IOUtils.toByteArray(input::read, length);
}
From 33a4fe15a7c05a3b850b6a9872ac6dd3b7c0aaf6 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Fri, 5 Sep 2025 15:37:09 +0200
Subject: [PATCH 03/19] fix: changelog entry
---
src/changes/changes.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index a10b4653880..348081a0782 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -57,7 +57,7 @@ The type attribute can be add,update,fix,remove.
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long).
- Added toByteArray(InputStream, int, int) for safer incremental reading with size validation.
+ Add IOUtils.toByteArray(InputStream, int, int) for safer incremental reading with size validation.
Bump org.apache.commons:commons-parent from 85 to 87 #774.
[test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0.
From 7362d3e34801662b029ea6ef4e005dfb1194c57b Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Fri, 5 Sep 2025 16:18:54 +0200
Subject: [PATCH 04/19] fix: Javadoc details
---
.../java/org/apache/commons/io/IOUtils.java | 65 +++++--------------
1 file changed, 18 insertions(+), 47 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 9cdb8b2ff6e..25e751f9308 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2638,23 +2638,24 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
}
/**
- * Gets the contents of an {@link InputStream} as a {@code byte[]}.
- *
- * This method buffers the input internally, so there is no need to use a {@link BufferedInputStream}.
- *
+ * Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * @param inputStream the {@link InputStream} to read.
- * @return the requested byte array.
- * @throws NullPointerException if the InputStream is {@code null}.
- * @throws IOException if an I/O error occurs or reading more than {@link Integer#MAX_VALUE} occurs.
+ * The method accumulates the data in temporary buffers and returns a single array
+ * containing the entire contents once the end of the stream is reached.
+ *
+ * @param input the {@link InputStream} to read; must not be {@code null}.
+ * @return a new byte array.
+ * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}.
+ * @throws IOException if an I/O error occurs while reading.
+ * @throws NullPointerException if {@code input} is {@code null}.
*/
- public static byte[] toByteArray(final InputStream inputStream) throws IOException {
+ public static byte[] toByteArray(final InputStream input) throws IOException {
// We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE.
try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get();
ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> {
throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE));
}, os -> ubaOutput)) {
- copy(inputStream, thresholdOutput);
+ copy(input, thresholdOutput);
return ubaOutput.toByteArray();
}
}
@@ -2662,18 +2663,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- *
- * This variant allocates the target array immediately and attempts to fill it in one pass.
- * It assumes that {@code size} is correct.
- * If the stream ends prematurely, an {@link EOFException} is thrown.
- *
- *
- *
- * Important: This method does not defend against corrupted
- * or untrusted {@code size} values.
- * For untrusted input, use {@link #toByteArray(InputStream, int, int)} instead,
- * which validates that the stream contains at least {@code size} bytes before allocating the target array.
- *
+ * The method allocates a single array of the requested size and fills it directly
+ * from the stream.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
@@ -2691,21 +2682,13 @@ public static byte[] toByteArray(final InputStream input, final int size) throws
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- *
- * This is a convenience overload of {@link #toByteArray(InputStream, int, int)} that accepts a
- * {@code long} size parameter. The value is checked to ensure it does not exceed
- * {@link Integer#MAX_VALUE} before being safely converted to {@code int}.
- *
- *
- *
- * All behavior, validation rules, and exceptions are otherwise identical to
- * {@link #toByteArray(InputStream, int, int)}.
- *
+ * The method allocates a single array of the requested size and fills it directly
+ * from the stream.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
* @return a new byte array of length {@code size}.
- * @throws IllegalArgumentException if {@code size} is negative or greater than {@link Integer#MAX_VALUE}.
+ * @throws IllegalArgumentException if {@code size} is negative or does not fit into an int.
* @throws EOFException if the stream ends before {@code size} bytes are read.
* @throws IOException if an I/O error occurs while reading.
* @throws NullPointerException if {@code input} is {@code null}.
@@ -2722,20 +2705,8 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- *
- * This variant validates that the stream actually contains {@code size} bytes.
- * It is suitable for untrusted input because it prevents oversized allocations when the provided {@code size}
- * is corrupted or malicious.
- *
- *
- *
- * - If {@code size <= bufferSize}, the array is allocated directly and filled in a single pass.
- * -
- * If {@code size > bufferSize}, the stream is read incrementally using a buffer of length {@code bufferSize}.
- * This avoids allocating an excessively large array up front,
- * but may temporarily double memory usage due to buffering.
- *
- *
+ * The method accumulates the data in temporary buffers of size at most {@code bufferSize}
+ * and returns a single array containing the entire contents once the end of the stream is reached.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
From fe39b777c447f2eeb185ae2520ca6b547d84e719 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Fri, 5 Sep 2025 16:26:47 +0200
Subject: [PATCH 05/19] fix: remove negative size check
---
src/main/java/org/apache/commons/io/IOUtils.java | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 25e751f9308..80d3e62f5d1 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2721,13 +2721,11 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
*/
public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException {
Objects.requireNonNull(input, "input");
- if (size < 0) {
- throw new IllegalArgumentException("Size must be equal or greater than zero: " + size);
- }
if (bufferSize <= 0) {
throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize);
}
if (size <= bufferSize) {
+ // throws if size < 0
return toByteArray(input::read, size);
}
try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder()
From c6c79a3dc4fcfcc84a82189f9ae41e3a051a5d35 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Fri, 5 Sep 2025 16:27:31 +0200
Subject: [PATCH 06/19] fix: exception message
---
src/main/java/org/apache/commons/io/IOUtils.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 80d3e62f5d1..288d4f1ff8d 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2722,7 +2722,7 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException {
Objects.requireNonNull(input, "input");
if (bufferSize <= 0) {
- throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize);
+ throw new IllegalArgumentException("Buffer size must be greater than zero: " + bufferSize);
}
if (size <= bufferSize) {
// throws if size < 0
From 9439095923e901ef3e4eac9ae4f06c00268a280c Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sat, 6 Sep 2025 08:05:45 +0200
Subject: [PATCH 07/19] fix: restore parameter name
---
src/main/java/org/apache/commons/io/IOUtils.java | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 288d4f1ff8d..2f92b7bc5ab 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2643,19 +2643,19 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
* The method accumulates the data in temporary buffers and returns a single array
* containing the entire contents once the end of the stream is reached.
*
- * @param input the {@link InputStream} to read; must not be {@code null}.
+ * @param inputStream the {@link InputStream} to read; must not be {@code null}.
* @return a new byte array.
* @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}.
* @throws IOException if an I/O error occurs while reading.
- * @throws NullPointerException if {@code input} is {@code null}.
+ * @throws NullPointerException if {@code inputStream} is {@code null}.
*/
- public static byte[] toByteArray(final InputStream input) throws IOException {
+ public static byte[] toByteArray(final InputStream inputStream) throws IOException {
// We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE.
try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get();
ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> {
throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE));
}, os -> ubaOutput)) {
- copy(input, thresholdOutput);
+ copy(inputStream, thresholdOutput);
return ubaOutput.toByteArray();
}
}
From 97d37a94de8c935ea8e4961e354a1cd1d5c49b6a Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sat, 6 Sep 2025 08:15:14 +0200
Subject: [PATCH 08/19] fix: remove details and add guidance
---
.../java/org/apache/commons/io/IOUtils.java | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 2f92b7bc5ab..dbb7b0e8d80 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2640,9 +2640,6 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
/**
* Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The method accumulates the data in temporary buffers and returns a single array
- * containing the entire contents once the end of the stream is reached.
- *
* @param inputStream the {@link InputStream} to read; must not be {@code null}.
* @return a new byte array.
* @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}.
@@ -2663,9 +2660,6 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The method allocates a single array of the requested size and fills it directly
- * from the stream.
- *
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
* @return a new byte array of length {@code size}.
@@ -2682,9 +2676,6 @@ public static byte[] toByteArray(final InputStream input, final int size) throws
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The method allocates a single array of the requested size and fills it directly
- * from the stream.
- *
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
* @return a new byte array of length {@code size}.
@@ -2705,8 +2696,12 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The method accumulates the data in temporary buffers of size at most {@code bufferSize}
- * and returns a single array containing the entire contents once the end of the stream is reached.
+ * When reading from an untrusted stream, this variant lowers the risk of
+ * {@link OutOfMemoryError} by allocating data in buffers of up to {@code bufferSize}
+ * bytes rather than in one large array.
+ *
+ * Note, however, that this approach requires additional temporary memory
+ * compared to {@link #toByteArray(InputStream, int)}.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
From cbfa307f526f8c6fac4744fea135de16edbdb1b0 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sat, 6 Sep 2025 08:19:21 +0200
Subject: [PATCH 09/19] fix: simplify description
---
src/main/java/org/apache/commons/io/IOUtils.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index dbb7b0e8d80..878cea2918b 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2638,7 +2638,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
}
/**
- * Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}.
+ * Reads all the bytes from an input stream in a byte array.
*
* @param inputStream the {@link InputStream} to read; must not be {@code null}.
* @return a new byte array.
From d7e886edb8d573fab394a37ffcf6c2fb624cc4e7 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sat, 6 Sep 2025 20:21:37 +0200
Subject: [PATCH 10/19] fix: apply an incremental threshold to all
`toByteArray` overloads
* Extends incremental (chunked) reading to all `toByteArray` variants when the requested size is unknown or exceeds 128 KiB.
* The 128 KiB threshold matches the default buffer size used in CPython.
* Updates Javadoc to emphasize that memory usage grows **proportionally** with the number of bytes actually **read**, making these methods suitable for large streams when sufficient memory is available.
---
.../java/org/apache/commons/io/IOUtils.java | 114 ++++++++++++------
1 file changed, 77 insertions(+), 37 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 878cea2918b..f06c082bec5 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -73,7 +73,6 @@
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.io.output.NullWriter;
import org.apache.commons.io.output.StringBuilderWriter;
-import org.apache.commons.io.output.ThresholdingOutputStream;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
/**
@@ -222,6 +221,21 @@ public class IOUtils {
*/
private static final char[] SCRATCH_CHAR_BUFFER_WO = charArray();
+ /**
+ * The maximum size of an array in many Java VMs.
+ */
+ private static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
+
+ /*
+ * Default maximum chunk size used when copying large streams into a byte array.
+ *
+ * This value is somewhat arbitrary, currently aligned with the value used by
+ * Python
+ * for copying streams.
+ *
+ */
+ private static final int DEFAULT_CHUNK_SIZE = 128 * 1024;
+
/**
* Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a
* BufferedInputStream from the given InputStream.
@@ -2640,26 +2654,34 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
/**
* Reads all the bytes from an input stream in a byte array.
*
- * @param inputStream the {@link InputStream} to read; must not be {@code null}.
- * @return a new byte array.
- * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}.
- * @throws IOException if an I/O error occurs while reading.
- * @throws NullPointerException if {@code inputStream} is {@code null}.
+ * The memory used by this method is proportional to the number
+ * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. This makes it unsuitable for
+ * processing large input streams, unless sufficient heap space is available.
+ *
+ * @param inputStream The {@link InputStream} to read; must not be {@code null}.
+ * @return A new byte array.
+ * @throws IllegalArgumentException If the size of the stream is greater than the maximum array size.
+ * @throws IOException If an I/O error occurs while reading.
+ * @throws NullPointerException If {@code inputStream} is {@code null}.
*/
public static byte[] toByteArray(final InputStream inputStream) throws IOException {
- // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE.
- try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get();
- ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> {
- throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE));
- }, os -> ubaOutput)) {
- copy(inputStream, thresholdOutput);
- return ubaOutput.toByteArray();
+ final UnsynchronizedByteArrayOutputStream output =
+ copyToOutputStream(inputStream, MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
+ if (output.size() > MAX_ARRAY_LENGTH) {
+ throw new IllegalArgumentException(
+ String.format("Cannot read more than %,d into a byte array", MAX_ARRAY_LENGTH));
}
+ return output.toByteArray();
}
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
+ * The memory used by this method is proportional to the number
+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
+ * processing large input streams, provided that sufficient heap space is
+ * available.
+ *
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
* @return a new byte array of length {@code size}.
@@ -2670,12 +2692,17 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
* @since 2.1
*/
public static byte[] toByteArray(final InputStream input, final int size) throws IOException {
- return toByteArray(Objects.requireNonNull(input, "input")::read, size);
+ return toByteArray(input, size, DEFAULT_CHUNK_SIZE);
}
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
+ * The memory used by this method is proportional to the number
+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
+ * processing large input streams, provided that sufficient heap space is
+ * available.
+ *
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
* @return a new byte array of length {@code size}.
@@ -2696,46 +2723,63 @@ public static byte[] toByteArray(final InputStream input, final long size) throw
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * When reading from an untrusted stream, this variant lowers the risk of
- * {@link OutOfMemoryError} by allocating data in buffers of up to {@code bufferSize}
- * bytes rather than in one large array.
+ * The memory used by this method is proportional to the number
+ * of bytes read and limited by the specified {@code size}. This makes it suitable for
+ * processing large input streams, provided that sufficient heap space is
+ * available.
*
- * Note, however, that this approach requires additional temporary memory
- * compared to {@link #toByteArray(InputStream, int)}.
+ * This method processes the input stream in successive chunks of up to
+ * {@code chunkSize} bytes.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
* The actual bytes read are validated to equal {@code size}.
- * @param bufferSize the buffer size for incremental reading; must be {@code > 0}.
+ * @param chunkSize The chunk size for incremental reading; must be {@code > 0}.
* @return a new byte array of length {@code size}.
- * @throws IllegalArgumentException if {@code size} is negative or {@code bufferSize <= 0}.
+ * @throws IllegalArgumentException if {@code size} is negative or {@code chunkSize <= 0}.
* @throws EOFException if the stream ends before {@code size} bytes are read.
* @throws IOException if an I/O error occurs while reading.
* @throws NullPointerException if {@code input} is {@code null}.
* @since 2.21.0
*/
- public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException {
+ public static byte[] toByteArray(final InputStream input, final int size, final int chunkSize) throws IOException {
Objects.requireNonNull(input, "input");
- if (bufferSize <= 0) {
- throw new IllegalArgumentException("Buffer size must be greater than zero: " + bufferSize);
+ if (chunkSize <= 0) {
+ throw new IllegalArgumentException("Chunk size must be greater than zero: " + chunkSize);
}
- if (size <= bufferSize) {
+ if (size <= chunkSize) {
// throws if size < 0
return toByteArray(input::read, size);
}
+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(input, size, chunkSize);
+ if (output.size() != size) {
+ throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size);
+ }
+ return output.toByteArray();
+ }
+
+ /**
+ * Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}.
+ *
+ *
+ * @param input The {@link InputStream} to read; must not be {@code null}.
+ * @param limit The maximum number of bytes to read; must be {@code >= 0}.
+ * The actual bytes read are validated to equal {@code size}.
+ * @param bufferSize The buffer size of the output stream; must be {@code > 0}.
+ * @return a ByteArrayOutputStream containing the read bytes.
+ */
+ private static UnsynchronizedByteArrayOutputStream copyToOutputStream(
+ final InputStream input, final long limit, final int bufferSize) throws IOException {
try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder()
.setBufferSize(bufferSize)
.get();
InputStream boundedInput = BoundedInputStream.builder()
- .setMaxCount(size)
+ .setMaxCount(limit)
.setPropagateClose(false)
.setInputStream(input)
.get()) {
output.write(boundedInput);
- if (output.size() != size) {
- throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size);
- }
- return output.toByteArray();
+ return output;
}
}
@@ -2756,13 +2800,9 @@ static byte[] toByteArray(final IOTriFunction
return EMPTY_BYTE_ARRAY;
}
final byte[] data = byteArray(size);
- int offset = 0;
- int read;
- while (offset < size && (read = input.apply(data, offset, size - offset)) != EOF) {
- offset += read;
- }
- if (offset != size) {
- throw new IOException("Unexpected read size, current: " + offset + ", expected: " + size);
+ final int read = read(input, data, 0, size);
+ if (read != size) {
+ throw new IOException("Unexpected read size, current: " + read + ", expected: " + size);
}
return data;
}
From 38a6a2c6b7dcb3cf0646a54ce4a762913f6b7627 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Mon, 8 Sep 2025 11:36:09 +0200
Subject: [PATCH 11/19] fix: Javadoc of constants
---
src/main/java/org/apache/commons/io/IOUtils.java | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index f06c082bec5..b72578251b1 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -223,16 +223,14 @@ public class IOUtils {
/**
* The maximum size of an array in many Java VMs.
+ *
+ * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}.
+ *
*/
- private static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
+ private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
/*
* Default maximum chunk size used when copying large streams into a byte array.
- *
- * This value is somewhat arbitrary, currently aligned with the value used by
- * Python
- * for copying streams.
- *
*/
private static final int DEFAULT_CHUNK_SIZE = 128 * 1024;
@@ -2666,10 +2664,10 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
*/
public static byte[] toByteArray(final InputStream inputStream) throws IOException {
final UnsynchronizedByteArrayOutputStream output =
- copyToOutputStream(inputStream, MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
- if (output.size() > MAX_ARRAY_LENGTH) {
+ copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
+ if (output.size() > SOFT_MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException(
- String.format("Cannot read more than %,d into a byte array", MAX_ARRAY_LENGTH));
+ String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH));
}
return output.toByteArray();
}
From 29f365be5d485dce7d03f0d0384dbf36fd40cc9d Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Mon, 8 Sep 2025 11:37:46 +0200
Subject: [PATCH 12/19] fix: Formatting
---
src/main/java/org/apache/commons/io/IOUtils.java | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index b72578251b1..a5a16cd23ab 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2663,11 +2663,9 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
* @throws NullPointerException If {@code inputStream} is {@code null}.
*/
public static byte[] toByteArray(final InputStream inputStream) throws IOException {
- final UnsynchronizedByteArrayOutputStream output =
- copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
if (output.size() > SOFT_MAX_ARRAY_LENGTH) {
- throw new IllegalArgumentException(
- String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH));
+ throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH));
}
return output.toByteArray();
}
@@ -2759,7 +2757,6 @@ public static byte[] toByteArray(final InputStream input, final int size, final
/**
* Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}.
*
- *
* @param input The {@link InputStream} to read; must not be {@code null}.
* @param limit The maximum number of bytes to read; must be {@code >= 0}.
* The actual bytes read are validated to equal {@code size}.
From 3afae073c62d55f241f3893612e85ce650b79bbe Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Wed, 10 Sep 2025 11:38:12 +0200
Subject: [PATCH 13/19] fix: restore previous `toByteArray(InputStream, int)`
behavior
---
src/main/java/org/apache/commons/io/IOUtils.java | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index a5a16cd23ab..fd90a3ac126 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2673,10 +2673,9 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The memory used by this method is proportional to the number
- * of bytes read and limited by the specified {@code size}. This makes it suitable for
- * processing large input streams, provided that sufficient heap space is
- * available.
+ * This variant provides no safeguards against allocating very large arrays.
+ * For large streams, prefer {@link #toByteArray(InputStream, int, int)},
+ * which enforces stricter memory usage constraints.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0}.
@@ -2688,16 +2687,15 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
* @since 2.1
*/
public static byte[] toByteArray(final InputStream input, final int size) throws IOException {
- return toByteArray(input, size, DEFAULT_CHUNK_SIZE);
+ return toByteArray(input::read, size);
}
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * The memory used by this method is proportional to the number
- * of bytes read and limited by the specified {@code size}. This makes it suitable for
- * processing large input streams, provided that sufficient heap space is
- * available.
+ * This variant provides no safeguards against allocating very large arrays.
+ * For large streams, prefer {@link #toByteArray(InputStream, int, int)},
+ * which enforces stricter memory usage constraints.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
* @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}.
From 4aba097d871332ca1ac537abb5f6880c0bf0ffc3 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Wed, 10 Sep 2025 11:39:17 +0200
Subject: [PATCH 14/19] fix: use default buffer size as chunk size
---
src/main/java/org/apache/commons/io/IOUtils.java | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index fd90a3ac126..f580c0613a4 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -229,11 +229,6 @@ public class IOUtils {
*/
private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;
- /*
- * Default maximum chunk size used when copying large streams into a byte array.
- */
- private static final int DEFAULT_CHUNK_SIZE = 128 * 1024;
-
/**
* Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a
* BufferedInputStream from the given InputStream.
@@ -2663,7 +2658,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
* @throws NullPointerException If {@code inputStream} is {@code null}.
*/
public static byte[] toByteArray(final InputStream inputStream) throws IOException {
- final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE);
+ final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_BUFFER_SIZE);
if (output.size() > SOFT_MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH));
}
From 15b249da4f4e6bc8238dfce9ae34f4d21f7f88a4 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Wed, 10 Sep 2025 17:13:04 +0200
Subject: [PATCH 15/19] fix: possible NPE
---
src/main/java/org/apache/commons/io/IOUtils.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index f580c0613a4..bc0e68470a9 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2682,7 +2682,7 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
* @since 2.1
*/
public static byte[] toByteArray(final InputStream input, final int size) throws IOException {
- return toByteArray(input::read, size);
+ return toByteArray(Objects.requireNonNull(input, "input")::read, size);
}
/**
From 624de9fa1a2d376e944219249c56639e02da441b Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Wed, 10 Sep 2025 17:13:28 +0200
Subject: [PATCH 16/19] fix: remove unrelated change
---
src/main/java/org/apache/commons/io/IOUtils.java | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index bc0e68470a9..713d1dde110 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2788,9 +2788,13 @@ static byte[] toByteArray(final IOTriFunction
return EMPTY_BYTE_ARRAY;
}
final byte[] data = byteArray(size);
- final int read = read(input, data, 0, size);
- if (read != size) {
- throw new IOException("Unexpected read size, current: " + read + ", expected: " + size);
+ int offset = 0;
+ int read;
+ while (offset < size && (read = input.apply(data, offset, size - offset)) != EOF) {
+ offset += read;
+ }
+ if (offset != size) {
+ throw new IOException("Unexpected read size, current: " + offset + ", expected: " + size);
}
return data;
}
From eb6c2bccb61dda3b586e7f5828ae7cf009970a70 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Wed, 10 Sep 2025 17:23:25 +0200
Subject: [PATCH 17/19] fix: `toByteArray(InputStream)` Javadoc
---
src/main/java/org/apache/commons/io/IOUtils.java | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 713d1dde110..94d7e5a68f6 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2648,8 +2648,9 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
* Reads all the bytes from an input stream in a byte array.
*
* The memory used by this method is proportional to the number
- * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. This makes it unsuitable for
- * processing large input streams, unless sufficient heap space is available.
+ * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. Only streams
+ * which fit into a single byte array with roughly 2 GiB limit can be processed
+ * with this method.
*
* @param inputStream The {@link InputStream} to read; must not be {@code null}.
* @return A new byte array.
@@ -2658,6 +2659,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz
* @throws NullPointerException If {@code inputStream} is {@code null}.
*/
public static byte[] toByteArray(final InputStream inputStream) throws IOException {
+ // Using SOFT_MAX_ARRAY_LENGTH guarantees that size() will not overflow
final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_BUFFER_SIZE);
if (output.size() > SOFT_MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH));
From e6e2a4d6e1dafb74ef8162116183862fdd9f8d70 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Thu, 11 Sep 2025 09:03:59 +0200
Subject: [PATCH 18/19] fix: Javadoc
---
src/changes/changes.xml | 2 +-
src/main/java/org/apache/commons/io/IOUtils.java | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 348081a0782..bb8ad58cbe6 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -57,7 +57,7 @@ The type attribute can be add,update,fix,remove.
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long).
Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long).
- Add IOUtils.toByteArray(InputStream, int, int) for safer incremental reading with size validation.
+ Add IOUtils.toByteArray(InputStream, int, int) for safer chunked reading with size validation.
Bump org.apache.commons:commons-parent from 85 to 87 #774.
[test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0.
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index 94d7e5a68f6..e5ff048db21 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -2670,8 +2670,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * This variant provides no safeguards against allocating very large arrays.
- * For large streams, prefer {@link #toByteArray(InputStream, int, int)},
+ *
This variant always allocates the whole requested array size,
+ * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)},
* which enforces stricter memory usage constraints.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
@@ -2690,8 +2690,8 @@ public static byte[] toByteArray(final InputStream input, final int size) throws
/**
* Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}.
*
- * This variant provides no safeguards against allocating very large arrays.
- * For large streams, prefer {@link #toByteArray(InputStream, int, int)},
+ *
This variant always allocates the whole requested array size,
+ * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)},
* which enforces stricter memory usage constraints.
*
* @param input the {@link InputStream} to read; must not be {@code null}.
From 91636d30fad8814a8339624288c40bf133de6b96 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Thu, 11 Sep 2025 18:31:15 -0400
Subject: [PATCH 19/19] Fix comment formatting for SOFT_MAX_ARRAY_LENGTH
---
src/main/java/org/apache/commons/io/IOUtils.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java
index e5ff048db21..8a0c16320a2 100644
--- a/src/main/java/org/apache/commons/io/IOUtils.java
+++ b/src/main/java/org/apache/commons/io/IOUtils.java
@@ -224,7 +224,7 @@ public class IOUtils {
/**
* The maximum size of an array in many Java VMs.
*
- * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}.
+ * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}.
*
*/
private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;