From debe34abe1e9fb522cd94babb6c98a055f2e4b5a Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Thu, 4 Sep 2025 23:13:32 +0200 Subject: [PATCH 01/19] feat: Add incremental `toByteArray` method This introduces `toByteArray(InputStream input, int size, int bufferSize)`, which reads the stream in chunks of `bufferSize` instead of allocating the full array up front. By reading incrementally, the method: * Validates that the stream actually contains `size` bytes before completing the allocation. * Prevents excessive memory usage if a corrupted or malicious `size` value is provided. * Offers safer handling for untrusted input compared to the direct-allocation variant. --- src/changes/changes.xml | 1 + .../java/org/apache/commons/io/IOUtils.java | 129 ++++++++++++++---- .../apache/commons/io/RandomAccessFiles.java | 4 + .../org/apache/commons/io/IOUtilsTest.java | 45 ++++++ 4 files changed, 152 insertions(+), 27 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index de92deabf64..a10b4653880 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -57,6 +57,7 @@ The type attribute can be add,update,fix,remove. Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long). + Added toByteArray(InputStream, int, int) for safer incremental reading with size validation. Bump org.apache.commons:commons-parent from 85 to 87 #774. [test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index cd8d042a922..96c6e682407 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -65,6 +65,7 @@ import org.apache.commons.io.function.IOConsumer; import org.apache.commons.io.function.IOSupplier; import org.apache.commons.io.function.IOTriFunction; +import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.input.CharSequenceReader; import org.apache.commons.io.input.QueueInputStream; import org.apache.commons.io.output.AppendableWriter; @@ -2659,37 +2660,60 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti } /** - * Gets the contents of an {@link InputStream} as a {@code byte[]}. Use this method instead of - * {@link #toByteArray(InputStream)} when {@link InputStream} size is known. + * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - * @param input the {@link InputStream} to read. - * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= length of input stream. - * @return byte [] of length {@code size}. - * @throws IOException if an I/O error occurs or {@link InputStream} length is smaller than parameter {@code size}. - * @throws IllegalArgumentException if {@code size} is less than zero. + *

+ * This variant allocates the target array immediately and attempts to fill it in one pass. + * It assumes that {@code size} is correct. + * If the stream ends prematurely, an {@link EOFException} is thrown. + *

+ * + *

+ * Important: This method does not defend against corrupted + * or untrusted {@code size} values. + * For untrusted input, use {@link #toByteArray(InputStream, int, int)} instead, + * which validates that the stream contains at least {@code size} bytes before allocating the target array. + *

+ * + * @param input the {@link InputStream} to read; must not be {@code null}. + * @param size the exact number of bytes to read; must be {@code >= 0}. + * @return a new byte array of length {@code size}. + * @throws IllegalArgumentException if {@code size} is negative. + * @throws EOFException if the stream ends before {@code size} bytes are read. + * @throws IOException if an I/O error occurs while reading. + * @throws NullPointerException if {@code input} is {@code null}. * @since 2.1 */ public static byte[] toByteArray(final InputStream input, final int size) throws IOException { - if (size == 0) { - return EMPTY_BYTE_ARRAY; + Objects.requireNonNull(input, "input"); + if (size < 0) { + throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); } - return toByteArray(Objects.requireNonNull(input, "input")::read, size); + return toByteArray(input::read, size); } /** - * Gets contents of an {@link InputStream} as a {@code byte[]}. - * Use this method instead of {@link #toByteArray(InputStream)} - * when {@link InputStream} size is known. - * NOTE: the method checks that the length can safely be cast to an int without truncation - * before using {@link IOUtils#toByteArray(InputStream, int)} to read into the byte array. - * (Arrays can have no more than Integer.MAX_VALUE entries anyway.) + * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - * @param input the {@link InputStream} to read. - * @param size the size of {@link InputStream} to read, where 0 < {@code size} <= min(Integer.MAX_VALUE, length of input stream). - * @return byte [] the requested byte array, of length {@code size}. - * @throws IOException if an I/O error occurs or {@link InputStream} length is less than {@code size}. - * @throws IllegalArgumentException if size is less than zero or size is greater than Integer.MAX_VALUE. - * @see IOUtils#toByteArray(InputStream, int) + *

+ * This is a convenience overload of {@link #toByteArray(InputStream, int, int)} that accepts a + * {@code long} size parameter. The value is checked to ensure it does not exceed + * {@link Integer#MAX_VALUE} before being safely converted to {@code int}. + *

+ * + *

+ * All behavior, validation rules, and exceptions are otherwise identical to + * {@link #toByteArray(InputStream, int, int)}. + *

+ * + * @param input the {@link InputStream} to read; must not be {@code null}. + * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}. + * @return a new byte array of length {@code size}. + * @throws IllegalArgumentException if {@code size} is negative or greater than {@link Integer#MAX_VALUE}. + * @throws EOFException if the stream ends before {@code size} bytes are read. + * @throws IOException if an I/O error occurs while reading. + * @throws NullPointerException if {@code input} is {@code null}. + * @see #toByteArray(InputStream, int, int) * @since 2.1 */ public static byte[] toByteArray(final InputStream input, final long size) throws IOException { @@ -2699,6 +2723,62 @@ public static byte[] toByteArray(final InputStream input, final long size) throw return toByteArray(input, (int) size); } + /** + * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. + * + *

+ * This variant validates that the stream actually contains {@code size} bytes. + * It is suitable for untrusted input because it prevents oversized allocations when the provided {@code size} + * is corrupted or malicious. + *

+ * + * + * + * @param input the {@link InputStream} to read; must not be {@code null}. + * @param size the exact number of bytes to read; must be {@code >= 0}. + * The actual bytes read are validated to equal {@code size}. + * @param bufferSize the buffer size for incremental reading; must be {@code > 0}. + * @return a new byte array of length {@code size}. + * @throws IllegalArgumentException if {@code size} is negative or {@code bufferSize <= 0}. + * @throws EOFException if the stream ends before {@code size} bytes are read. + * @throws IOException if an I/O error occurs while reading. + * @throws NullPointerException if {@code input} is {@code null}. + * @since 2.21.0 + */ + public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException { + Objects.requireNonNull(input, "input"); + if (size < 0) { + throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); + } + if (bufferSize <= 0) { + throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize); + } + if (size <= bufferSize) { + return toByteArray(input::read, size); + } + try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder() + .setBufferSize(bufferSize) + .get(); + InputStream boundedInput = BoundedInputStream.builder() + .setMaxCount(size) + .setPropagateClose(false) + .setInputStream(input) + .get()) { + output.write(boundedInput); + if (output.size() != size) { + throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size); + } + return output.toByteArray(); + } + } + /** * Gets the contents of an input as a {@code byte[]}. * @@ -2709,11 +2789,6 @@ public static byte[] toByteArray(final InputStream input, final long size) throw * @throws IllegalArgumentException if {@code size} is less than zero. */ static byte[] toByteArray(final IOTriFunction input, final int size) throws IOException { - - if (size < 0) { - throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); - } - if (size == 0) { return EMPTY_BYTE_ARRAY; } diff --git a/src/main/java/org/apache/commons/io/RandomAccessFiles.java b/src/main/java/org/apache/commons/io/RandomAccessFiles.java index 46de0dbf077..d9baa8f73b6 100644 --- a/src/main/java/org/apache/commons/io/RandomAccessFiles.java +++ b/src/main/java/org/apache/commons/io/RandomAccessFiles.java @@ -76,7 +76,11 @@ private static long length(final RandomAccessFile raf) throws IOException { * other I/O error occurs. */ public static byte[] read(final RandomAccessFile input, final long position, final int length) throws IOException { + Objects.requireNonNull(input, "input"); input.seek(position); + if (length < 0) { + throw new IllegalArgumentException("Size must be equal or greater than zero: " + length); + } return IOUtils.toByteArray(input::read, length); } diff --git a/src/test/java/org/apache/commons/io/IOUtilsTest.java b/src/test/java/org/apache/commons/io/IOUtilsTest.java index 56fd1307eb1..92925bdd785 100644 --- a/src/test/java/org/apache/commons/io/IOUtilsTest.java +++ b/src/test/java/org/apache/commons/io/IOUtilsTest.java @@ -90,6 +90,9 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * This is used to test {@link IOUtils} for correctness. The following checks are performed: @@ -1659,6 +1662,48 @@ void testToByteArray_InputStream_SizeZero() throws Exception { } } + @ParameterizedTest + @MethodSource + void testToByteArray_InputStream_Size_BufferSize_Succeeds(byte[] data, int size, int bufferSize) throws IOException { + final ByteArrayInputStream input = new ByteArrayInputStream(data); + final byte[] expected = Arrays.copyOf(data, size); + final byte[] actual = IOUtils.toByteArray(input, size, bufferSize); + assertArrayEquals(expected, actual); + } + + private static Stream testToByteArray_InputStream_Size_BufferSize_Succeeds() { + final byte[] data = new byte[1024]; + for (int i = 0; i < 1024; i++) { + data[i] = (byte) i; + } + return Stream.of( + // Eager reading + Arguments.of(data.clone(), 512, 1024), + // Incremental reading + Arguments.of(data.clone(), 1024, 512), + // No reading + Arguments.of(data.clone(), 0, 128)); + } + + @ParameterizedTest + @MethodSource + void testToByteArray_InputStream_Size_BufferSize_Throws( + int size, int bufferSize, Class exceptionClass) throws IOException { + try (InputStream input = new NullInputStream(0)) { + assertThrows(exceptionClass, () -> IOUtils.toByteArray(input, size, bufferSize)); + } + } + + static Stream testToByteArray_InputStream_Size_BufferSize_Throws() { + return Stream.of( + // Negative size + Arguments.of(-1, 128, IllegalArgumentException.class), + // Invalid buffer size + Arguments.of(0, 0, IllegalArgumentException.class), + // Huge size: should not cause OutOfMemoryError + Arguments.of(Integer.MAX_VALUE, 128, EOFException.class)); + } + @Test void testToByteArray_Reader() throws IOException { final String charsetName = UTF_8; From d748e99d390eef117d54bf175dcaa569e2b9106d Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 5 Sep 2025 15:27:46 +0200 Subject: [PATCH 02/19] fix: move back positivity check to helper method --- src/main/java/org/apache/commons/io/IOUtils.java | 10 ++++------ .../java/org/apache/commons/io/RandomAccessFiles.java | 4 ---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 07bdb711bf9..9cdb8b2ff6e 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2684,13 +2684,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti * @throws NullPointerException if {@code input} is {@code null}. * @since 2.1 */ - @SuppressWarnings("resource") public static byte[] toByteArray(final InputStream input, final int size) throws IOException { - Objects.requireNonNull(input, "input"); - if (size < 0) { - throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); - } - return toByteArray(input::read, size); + return toByteArray(Objects.requireNonNull(input, "input")::read, size); } /** @@ -2790,6 +2785,9 @@ public static byte[] toByteArray(final InputStream input, final int size, final * @throws IllegalArgumentException if {@code size} is less than zero. */ static byte[] toByteArray(final IOTriFunction input, final int size) throws IOException { + if (size < 0) { + throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); + } if (size == 0) { return EMPTY_BYTE_ARRAY; } diff --git a/src/main/java/org/apache/commons/io/RandomAccessFiles.java b/src/main/java/org/apache/commons/io/RandomAccessFiles.java index d9baa8f73b6..46de0dbf077 100644 --- a/src/main/java/org/apache/commons/io/RandomAccessFiles.java +++ b/src/main/java/org/apache/commons/io/RandomAccessFiles.java @@ -76,11 +76,7 @@ private static long length(final RandomAccessFile raf) throws IOException { * other I/O error occurs. */ public static byte[] read(final RandomAccessFile input, final long position, final int length) throws IOException { - Objects.requireNonNull(input, "input"); input.seek(position); - if (length < 0) { - throw new IllegalArgumentException("Size must be equal or greater than zero: " + length); - } return IOUtils.toByteArray(input::read, length); } From 33a4fe15a7c05a3b850b6a9872ac6dd3b7c0aaf6 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 5 Sep 2025 15:37:09 +0200 Subject: [PATCH 03/19] fix: changelog entry --- src/changes/changes.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index a10b4653880..348081a0782 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -57,7 +57,7 @@ The type attribute can be add,update,fix,remove. Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long). - Added toByteArray(InputStream, int, int) for safer incremental reading with size validation. + Add IOUtils.toByteArray(InputStream, int, int) for safer incremental reading with size validation. Bump org.apache.commons:commons-parent from 85 to 87 #774. [test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. From 7362d3e34801662b029ea6ef4e005dfb1194c57b Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 5 Sep 2025 16:18:54 +0200 Subject: [PATCH 04/19] fix: Javadoc details --- .../java/org/apache/commons/io/IOUtils.java | 65 +++++-------------- 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 9cdb8b2ff6e..25e751f9308 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2638,23 +2638,24 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz } /** - * Gets the contents of an {@link InputStream} as a {@code byte[]}. - *

- * This method buffers the input internally, so there is no need to use a {@link BufferedInputStream}. - *

+ * Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}. * - * @param inputStream the {@link InputStream} to read. - * @return the requested byte array. - * @throws NullPointerException if the InputStream is {@code null}. - * @throws IOException if an I/O error occurs or reading more than {@link Integer#MAX_VALUE} occurs. + *

The method accumulates the data in temporary buffers and returns a single array + * containing the entire contents once the end of the stream is reached.

+ * + * @param input the {@link InputStream} to read; must not be {@code null}. + * @return a new byte array. + * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}. + * @throws IOException if an I/O error occurs while reading. + * @throws NullPointerException if {@code input} is {@code null}. */ - public static byte[] toByteArray(final InputStream inputStream) throws IOException { + public static byte[] toByteArray(final InputStream input) throws IOException { // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE. try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get(); ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> { throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE)); }, os -> ubaOutput)) { - copy(inputStream, thresholdOutput); + copy(input, thresholdOutput); return ubaOutput.toByteArray(); } } @@ -2662,18 +2663,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

- * This variant allocates the target array immediately and attempts to fill it in one pass. - * It assumes that {@code size} is correct. - * If the stream ends prematurely, an {@link EOFException} is thrown. - *

- * - *

- * Important: This method does not defend against corrupted - * or untrusted {@code size} values. - * For untrusted input, use {@link #toByteArray(InputStream, int, int)} instead, - * which validates that the stream contains at least {@code size} bytes before allocating the target array. - *

+ *

The method allocates a single array of the requested size and fills it directly + * from the stream.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. @@ -2691,21 +2682,13 @@ public static byte[] toByteArray(final InputStream input, final int size) throws /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

- * This is a convenience overload of {@link #toByteArray(InputStream, int, int)} that accepts a - * {@code long} size parameter. The value is checked to ensure it does not exceed - * {@link Integer#MAX_VALUE} before being safely converted to {@code int}. - *

- * - *

- * All behavior, validation rules, and exceptions are otherwise identical to - * {@link #toByteArray(InputStream, int, int)}. - *

+ *

The method allocates a single array of the requested size and fills it directly + * from the stream.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}. * @return a new byte array of length {@code size}. - * @throws IllegalArgumentException if {@code size} is negative or greater than {@link Integer#MAX_VALUE}. + * @throws IllegalArgumentException if {@code size} is negative or does not fit into an int. * @throws EOFException if the stream ends before {@code size} bytes are read. * @throws IOException if an I/O error occurs while reading. * @throws NullPointerException if {@code input} is {@code null}. @@ -2722,20 +2705,8 @@ public static byte[] toByteArray(final InputStream input, final long size) throw /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

- * This variant validates that the stream actually contains {@code size} bytes. - * It is suitable for untrusted input because it prevents oversized allocations when the provided {@code size} - * is corrupted or malicious. - *

- * - *
    - *
  • If {@code size <= bufferSize}, the array is allocated directly and filled in a single pass.
  • - *
  • - * If {@code size > bufferSize}, the stream is read incrementally using a buffer of length {@code bufferSize}. - * This avoids allocating an excessively large array up front, - * but may temporarily double memory usage due to buffering. - *
  • - *
+ *

The method accumulates the data in temporary buffers of size at most {@code bufferSize} + * and returns a single array containing the entire contents once the end of the stream is reached.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. From fe39b777c447f2eeb185ae2520ca6b547d84e719 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 5 Sep 2025 16:26:47 +0200 Subject: [PATCH 05/19] fix: remove negative size check --- src/main/java/org/apache/commons/io/IOUtils.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 25e751f9308..80d3e62f5d1 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2721,13 +2721,11 @@ public static byte[] toByteArray(final InputStream input, final long size) throw */ public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException { Objects.requireNonNull(input, "input"); - if (size < 0) { - throw new IllegalArgumentException("Size must be equal or greater than zero: " + size); - } if (bufferSize <= 0) { throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize); } if (size <= bufferSize) { + // throws if size < 0 return toByteArray(input::read, size); } try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder() From c6c79a3dc4fcfcc84a82189f9ae41e3a051a5d35 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Fri, 5 Sep 2025 16:27:31 +0200 Subject: [PATCH 06/19] fix: exception message --- src/main/java/org/apache/commons/io/IOUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 80d3e62f5d1..288d4f1ff8d 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2722,7 +2722,7 @@ public static byte[] toByteArray(final InputStream input, final long size) throw public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException { Objects.requireNonNull(input, "input"); if (bufferSize <= 0) { - throw new IllegalArgumentException("Chunk size must be greater than zero: " + bufferSize); + throw new IllegalArgumentException("Buffer size must be greater than zero: " + bufferSize); } if (size <= bufferSize) { // throws if size < 0 From 9439095923e901ef3e4eac9ae4f06c00268a280c Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sat, 6 Sep 2025 08:05:45 +0200 Subject: [PATCH 07/19] fix: restore parameter name --- src/main/java/org/apache/commons/io/IOUtils.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 288d4f1ff8d..2f92b7bc5ab 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2643,19 +2643,19 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz *

The method accumulates the data in temporary buffers and returns a single array * containing the entire contents once the end of the stream is reached.

* - * @param input the {@link InputStream} to read; must not be {@code null}. + * @param inputStream the {@link InputStream} to read; must not be {@code null}. * @return a new byte array. * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}. * @throws IOException if an I/O error occurs while reading. - * @throws NullPointerException if {@code input} is {@code null}. + * @throws NullPointerException if {@code inputStream} is {@code null}. */ - public static byte[] toByteArray(final InputStream input) throws IOException { + public static byte[] toByteArray(final InputStream inputStream) throws IOException { // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE. try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get(); ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> { throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE)); }, os -> ubaOutput)) { - copy(input, thresholdOutput); + copy(inputStream, thresholdOutput); return ubaOutput.toByteArray(); } } From 97d37a94de8c935ea8e4961e354a1cd1d5c49b6a Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sat, 6 Sep 2025 08:15:14 +0200 Subject: [PATCH 08/19] fix: remove details and add guidance --- .../java/org/apache/commons/io/IOUtils.java | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 2f92b7bc5ab..dbb7b0e8d80 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2640,9 +2640,6 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz /** * Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The method accumulates the data in temporary buffers and returns a single array - * containing the entire contents once the end of the stream is reached.

- * * @param inputStream the {@link InputStream} to read; must not be {@code null}. * @return a new byte array. * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}. @@ -2663,9 +2660,6 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The method allocates a single array of the requested size and fills it directly - * from the stream.

- * * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. * @return a new byte array of length {@code size}. @@ -2682,9 +2676,6 @@ public static byte[] toByteArray(final InputStream input, final int size) throws /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The method allocates a single array of the requested size and fills it directly - * from the stream.

- * * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}. * @return a new byte array of length {@code size}. @@ -2705,8 +2696,12 @@ public static byte[] toByteArray(final InputStream input, final long size) throw /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The method accumulates the data in temporary buffers of size at most {@code bufferSize} - * and returns a single array containing the entire contents once the end of the stream is reached.

+ *

When reading from an untrusted stream, this variant lowers the risk of + * {@link OutOfMemoryError} by allocating data in buffers of up to {@code bufferSize} + * bytes rather than in one large array.

+ * + *

Note, however, that this approach requires additional temporary memory + * compared to {@link #toByteArray(InputStream, int)}.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. From cbfa307f526f8c6fac4744fea135de16edbdb1b0 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sat, 6 Sep 2025 08:19:21 +0200 Subject: [PATCH 09/19] fix: simplify description --- src/main/java/org/apache/commons/io/IOUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index dbb7b0e8d80..878cea2918b 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2638,7 +2638,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz } /** - * Reads all remaining bytes from the given {@link InputStream} into a new {@code byte[]}. + * Reads all the bytes from an input stream in a byte array. * * @param inputStream the {@link InputStream} to read; must not be {@code null}. * @return a new byte array. From d7e886edb8d573fab394a37ffcf6c2fb624cc4e7 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sat, 6 Sep 2025 20:21:37 +0200 Subject: [PATCH 10/19] fix: apply an incremental threshold to all `toByteArray` overloads * Extends incremental (chunked) reading to all `toByteArray` variants when the requested size is unknown or exceeds 128 KiB. * The 128 KiB threshold matches the default buffer size used in CPython. * Updates Javadoc to emphasize that memory usage grows **proportionally** with the number of bytes actually **read**, making these methods suitable for large streams when sufficient memory is available. --- .../java/org/apache/commons/io/IOUtils.java | 114 ++++++++++++------ 1 file changed, 77 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 878cea2918b..f06c082bec5 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -73,7 +73,6 @@ import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.io.output.NullWriter; import org.apache.commons.io.output.StringBuilderWriter; -import org.apache.commons.io.output.ThresholdingOutputStream; import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; /** @@ -222,6 +221,21 @@ public class IOUtils { */ private static final char[] SCRATCH_CHAR_BUFFER_WO = charArray(); + /** + * The maximum size of an array in many Java VMs. + */ + private static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8; + + /* + * Default maximum chunk size used when copying large streams into a byte array. + *

+ * This value is somewhat arbitrary, currently aligned with the value used by + * Python + * for copying streams. + *

+ */ + private static final int DEFAULT_CHUNK_SIZE = 128 * 1024; + /** * Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a * BufferedInputStream from the given InputStream. @@ -2640,26 +2654,34 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz /** * Reads all the bytes from an input stream in a byte array. * - * @param inputStream the {@link InputStream} to read; must not be {@code null}. - * @return a new byte array. - * @throws IllegalArgumentException if the size of the stream is greater than {@code Integer.MAX_VALUE}. - * @throws IOException if an I/O error occurs while reading. - * @throws NullPointerException if {@code inputStream} is {@code null}. + *

The memory used by this method is proportional to the number + * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. This makes it unsuitable for + * processing large input streams, unless sufficient heap space is available.

+ * + * @param inputStream The {@link InputStream} to read; must not be {@code null}. + * @return A new byte array. + * @throws IllegalArgumentException If the size of the stream is greater than the maximum array size. + * @throws IOException If an I/O error occurs while reading. + * @throws NullPointerException If {@code inputStream} is {@code null}. */ public static byte[] toByteArray(final InputStream inputStream) throws IOException { - // We use a ThresholdingOutputStream to avoid reading AND writing more than Integer.MAX_VALUE. - try (UnsynchronizedByteArrayOutputStream ubaOutput = UnsynchronizedByteArrayOutputStream.builder().get(); - ThresholdingOutputStream thresholdOutput = new ThresholdingOutputStream(Integer.MAX_VALUE, os -> { - throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", Integer.MAX_VALUE)); - }, os -> ubaOutput)) { - copy(inputStream, thresholdOutput); - return ubaOutput.toByteArray(); + final UnsynchronizedByteArrayOutputStream output = + copyToOutputStream(inputStream, MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); + if (output.size() > MAX_ARRAY_LENGTH) { + throw new IllegalArgumentException( + String.format("Cannot read more than %,d into a byte array", MAX_ARRAY_LENGTH)); } + return output.toByteArray(); } /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * + *

The memory used by this method is proportional to the number + * of bytes read and limited by the specified {@code size}. This makes it suitable for + * processing large input streams, provided that sufficient heap space is + * available.

+ * * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. * @return a new byte array of length {@code size}. @@ -2670,12 +2692,17 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti * @since 2.1 */ public static byte[] toByteArray(final InputStream input, final int size) throws IOException { - return toByteArray(Objects.requireNonNull(input, "input")::read, size); + return toByteArray(input, size, DEFAULT_CHUNK_SIZE); } /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * + *

The memory used by this method is proportional to the number + * of bytes read and limited by the specified {@code size}. This makes it suitable for + * processing large input streams, provided that sufficient heap space is + * available.

+ * * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}. * @return a new byte array of length {@code size}. @@ -2696,46 +2723,63 @@ public static byte[] toByteArray(final InputStream input, final long size) throw /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

When reading from an untrusted stream, this variant lowers the risk of - * {@link OutOfMemoryError} by allocating data in buffers of up to {@code bufferSize} - * bytes rather than in one large array.

+ *

The memory used by this method is proportional to the number + * of bytes read and limited by the specified {@code size}. This makes it suitable for + * processing large input streams, provided that sufficient heap space is + * available.

* - *

Note, however, that this approach requires additional temporary memory - * compared to {@link #toByteArray(InputStream, int)}.

+ *

This method processes the input stream in successive chunks of up to + * {@code chunkSize} bytes.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. * The actual bytes read are validated to equal {@code size}. - * @param bufferSize the buffer size for incremental reading; must be {@code > 0}. + * @param chunkSize The chunk size for incremental reading; must be {@code > 0}. * @return a new byte array of length {@code size}. - * @throws IllegalArgumentException if {@code size} is negative or {@code bufferSize <= 0}. + * @throws IllegalArgumentException if {@code size} is negative or {@code chunkSize <= 0}. * @throws EOFException if the stream ends before {@code size} bytes are read. * @throws IOException if an I/O error occurs while reading. * @throws NullPointerException if {@code input} is {@code null}. * @since 2.21.0 */ - public static byte[] toByteArray(final InputStream input, final int size, final int bufferSize) throws IOException { + public static byte[] toByteArray(final InputStream input, final int size, final int chunkSize) throws IOException { Objects.requireNonNull(input, "input"); - if (bufferSize <= 0) { - throw new IllegalArgumentException("Buffer size must be greater than zero: " + bufferSize); + if (chunkSize <= 0) { + throw new IllegalArgumentException("Chunk size must be greater than zero: " + chunkSize); } - if (size <= bufferSize) { + if (size <= chunkSize) { // throws if size < 0 return toByteArray(input::read, size); } + final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(input, size, chunkSize); + if (output.size() != size) { + throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size); + } + return output.toByteArray(); + } + + /** + * Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}. + * + * + * @param input The {@link InputStream} to read; must not be {@code null}. + * @param limit The maximum number of bytes to read; must be {@code >= 0}. + * The actual bytes read are validated to equal {@code size}. + * @param bufferSize The buffer size of the output stream; must be {@code > 0}. + * @return a ByteArrayOutputStream containing the read bytes. + */ + private static UnsynchronizedByteArrayOutputStream copyToOutputStream( + final InputStream input, final long limit, final int bufferSize) throws IOException { try (UnsynchronizedByteArrayOutputStream output = UnsynchronizedByteArrayOutputStream.builder() .setBufferSize(bufferSize) .get(); InputStream boundedInput = BoundedInputStream.builder() - .setMaxCount(size) + .setMaxCount(limit) .setPropagateClose(false) .setInputStream(input) .get()) { output.write(boundedInput); - if (output.size() != size) { - throw new EOFException("Unexpected read size, current: " + output.size() + ", expected: " + size); - } - return output.toByteArray(); + return output; } } @@ -2756,13 +2800,9 @@ static byte[] toByteArray(final IOTriFunction return EMPTY_BYTE_ARRAY; } final byte[] data = byteArray(size); - int offset = 0; - int read; - while (offset < size && (read = input.apply(data, offset, size - offset)) != EOF) { - offset += read; - } - if (offset != size) { - throw new IOException("Unexpected read size, current: " + offset + ", expected: " + size); + final int read = read(input, data, 0, size); + if (read != size) { + throw new IOException("Unexpected read size, current: " + read + ", expected: " + size); } return data; } From 38a6a2c6b7dcb3cf0646a54ce4a762913f6b7627 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Mon, 8 Sep 2025 11:36:09 +0200 Subject: [PATCH 11/19] fix: Javadoc of constants --- src/main/java/org/apache/commons/io/IOUtils.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index f06c082bec5..b72578251b1 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -223,16 +223,14 @@ public class IOUtils { /** * The maximum size of an array in many Java VMs. + *

+ * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}. + *

*/ - private static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8; + private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8; /* * Default maximum chunk size used when copying large streams into a byte array. - *

- * This value is somewhat arbitrary, currently aligned with the value used by - * Python - * for copying streams. - *

*/ private static final int DEFAULT_CHUNK_SIZE = 128 * 1024; @@ -2666,10 +2664,10 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz */ public static byte[] toByteArray(final InputStream inputStream) throws IOException { final UnsynchronizedByteArrayOutputStream output = - copyToOutputStream(inputStream, MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); - if (output.size() > MAX_ARRAY_LENGTH) { + copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); + if (output.size() > SOFT_MAX_ARRAY_LENGTH) { throw new IllegalArgumentException( - String.format("Cannot read more than %,d into a byte array", MAX_ARRAY_LENGTH)); + String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH)); } return output.toByteArray(); } From 29f365be5d485dce7d03f0d0384dbf36fd40cc9d Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Mon, 8 Sep 2025 11:37:46 +0200 Subject: [PATCH 12/19] fix: Formatting --- src/main/java/org/apache/commons/io/IOUtils.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index b72578251b1..a5a16cd23ab 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2663,11 +2663,9 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz * @throws NullPointerException If {@code inputStream} is {@code null}. */ public static byte[] toByteArray(final InputStream inputStream) throws IOException { - final UnsynchronizedByteArrayOutputStream output = - copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); + final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); if (output.size() > SOFT_MAX_ARRAY_LENGTH) { - throw new IllegalArgumentException( - String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH)); + throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH)); } return output.toByteArray(); } @@ -2759,7 +2757,6 @@ public static byte[] toByteArray(final InputStream input, final int size, final /** * Copies up to {@code size} bytes from the given {@link InputStream} into a new {@link UnsynchronizedByteArrayOutputStream}. * - * * @param input The {@link InputStream} to read; must not be {@code null}. * @param limit The maximum number of bytes to read; must be {@code >= 0}. * The actual bytes read are validated to equal {@code size}. From 3afae073c62d55f241f3893612e85ce650b79bbe Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Wed, 10 Sep 2025 11:38:12 +0200 Subject: [PATCH 13/19] fix: restore previous `toByteArray(InputStream, int)` behavior --- src/main/java/org/apache/commons/io/IOUtils.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index a5a16cd23ab..fd90a3ac126 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2673,10 +2673,9 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The memory used by this method is proportional to the number - * of bytes read and limited by the specified {@code size}. This makes it suitable for - * processing large input streams, provided that sufficient heap space is - * available.

+ *

This variant provides no safeguards against allocating very large arrays. + * For large streams, prefer {@link #toByteArray(InputStream, int, int)}, + * which enforces stricter memory usage constraints.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0}. @@ -2688,16 +2687,15 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti * @since 2.1 */ public static byte[] toByteArray(final InputStream input, final int size) throws IOException { - return toByteArray(input, size, DEFAULT_CHUNK_SIZE); + return toByteArray(input::read, size); } /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

The memory used by this method is proportional to the number - * of bytes read and limited by the specified {@code size}. This makes it suitable for - * processing large input streams, provided that sufficient heap space is - * available.

+ *

This variant provides no safeguards against allocating very large arrays. + * For large streams, prefer {@link #toByteArray(InputStream, int, int)}, + * which enforces stricter memory usage constraints.

* * @param input the {@link InputStream} to read; must not be {@code null}. * @param size the exact number of bytes to read; must be {@code >= 0} and {@code <= Integer.MAX_VALUE}. From 4aba097d871332ca1ac537abb5f6880c0bf0ffc3 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Wed, 10 Sep 2025 11:39:17 +0200 Subject: [PATCH 14/19] fix: use default buffer size as chunk size --- src/main/java/org/apache/commons/io/IOUtils.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index fd90a3ac126..f580c0613a4 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -229,11 +229,6 @@ public class IOUtils { */ private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8; - /* - * Default maximum chunk size used when copying large streams into a byte array. - */ - private static final int DEFAULT_CHUNK_SIZE = 128 * 1024; - /** * Returns the given InputStream if it is already a {@link BufferedInputStream}, otherwise creates a * BufferedInputStream from the given InputStream. @@ -2663,7 +2658,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz * @throws NullPointerException If {@code inputStream} is {@code null}. */ public static byte[] toByteArray(final InputStream inputStream) throws IOException { - final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_CHUNK_SIZE); + final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_BUFFER_SIZE); if (output.size() > SOFT_MAX_ARRAY_LENGTH) { throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH)); } From 15b249da4f4e6bc8238dfce9ae34f4d21f7f88a4 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Wed, 10 Sep 2025 17:13:04 +0200 Subject: [PATCH 15/19] fix: possible NPE --- src/main/java/org/apache/commons/io/IOUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index f580c0613a4..bc0e68470a9 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2682,7 +2682,7 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti * @since 2.1 */ public static byte[] toByteArray(final InputStream input, final int size) throws IOException { - return toByteArray(input::read, size); + return toByteArray(Objects.requireNonNull(input, "input")::read, size); } /** From 624de9fa1a2d376e944219249c56639e02da441b Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Wed, 10 Sep 2025 17:13:28 +0200 Subject: [PATCH 16/19] fix: remove unrelated change --- src/main/java/org/apache/commons/io/IOUtils.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index bc0e68470a9..713d1dde110 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2788,9 +2788,13 @@ static byte[] toByteArray(final IOTriFunction return EMPTY_BYTE_ARRAY; } final byte[] data = byteArray(size); - final int read = read(input, data, 0, size); - if (read != size) { - throw new IOException("Unexpected read size, current: " + read + ", expected: " + size); + int offset = 0; + int read; + while (offset < size && (read = input.apply(data, offset, size - offset)) != EOF) { + offset += read; + } + if (offset != size) { + throw new IOException("Unexpected read size, current: " + offset + ", expected: " + size); } return data; } From eb6c2bccb61dda3b586e7f5828ae7cf009970a70 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Wed, 10 Sep 2025 17:23:25 +0200 Subject: [PATCH 17/19] fix: `toByteArray(InputStream)` Javadoc --- src/main/java/org/apache/commons/io/IOUtils.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 713d1dde110..94d7e5a68f6 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2648,8 +2648,9 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz * Reads all the bytes from an input stream in a byte array. * *

The memory used by this method is proportional to the number - * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. This makes it unsuitable for - * processing large input streams, unless sufficient heap space is available.

+ * of bytes read, which is only limited by {@link Integer#MAX_VALUE}. Only streams + * which fit into a single byte array with roughly 2 GiB limit can be processed + * with this method.

* * @param inputStream The {@link InputStream} to read; must not be {@code null}. * @return A new byte array. @@ -2658,6 +2659,7 @@ public static BufferedReader toBufferedReader(final Reader reader, final int siz * @throws NullPointerException If {@code inputStream} is {@code null}. */ public static byte[] toByteArray(final InputStream inputStream) throws IOException { + // Using SOFT_MAX_ARRAY_LENGTH guarantees that size() will not overflow final UnsynchronizedByteArrayOutputStream output = copyToOutputStream(inputStream, SOFT_MAX_ARRAY_LENGTH + 1, DEFAULT_BUFFER_SIZE); if (output.size() > SOFT_MAX_ARRAY_LENGTH) { throw new IllegalArgumentException(String.format("Cannot read more than %,d into a byte array", SOFT_MAX_ARRAY_LENGTH)); From e6e2a4d6e1dafb74ef8162116183862fdd9f8d70 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Thu, 11 Sep 2025 09:03:59 +0200 Subject: [PATCH 18/19] fix: Javadoc --- src/changes/changes.xml | 2 +- src/main/java/org/apache/commons/io/IOUtils.java | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 348081a0782..bb8ad58cbe6 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -57,7 +57,7 @@ The type attribute can be add,update,fix,remove. Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], int, int, long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(byte[], long). Add org.apache.commons.io.output.ProxyOutputStream.writeRepeat(int, long). - Add IOUtils.toByteArray(InputStream, int, int) for safer incremental reading with size validation. + Add IOUtils.toByteArray(InputStream, int, int) for safer chunked reading with size validation. Bump org.apache.commons:commons-parent from 85 to 87 #774. [test] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0. diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index 94d7e5a68f6..e5ff048db21 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -2670,8 +2670,8 @@ public static byte[] toByteArray(final InputStream inputStream) throws IOExcepti /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

This variant provides no safeguards against allocating very large arrays. - * For large streams, prefer {@link #toByteArray(InputStream, int, int)}, + *

This variant always allocates the whole requested array size, + * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)}, * which enforces stricter memory usage constraints.

* * @param input the {@link InputStream} to read; must not be {@code null}. @@ -2690,8 +2690,8 @@ public static byte[] toByteArray(final InputStream input, final int size) throws /** * Reads exactly {@code size} bytes from the given {@link InputStream} into a new {@code byte[]}. * - *

This variant provides no safeguards against allocating very large arrays. - * For large streams, prefer {@link #toByteArray(InputStream, int, int)}, + *

This variant always allocates the whole requested array size, + * for a dynamic growing variant use {@link #toByteArray(InputStream, int, int)}, * which enforces stricter memory usage constraints.

* * @param input the {@link InputStream} to read; must not be {@code null}. From 91636d30fad8814a8339624288c40bf133de6b96 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 11 Sep 2025 18:31:15 -0400 Subject: [PATCH 19/19] Fix comment formatting for SOFT_MAX_ARRAY_LENGTH --- src/main/java/org/apache/commons/io/IOUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/io/IOUtils.java b/src/main/java/org/apache/commons/io/IOUtils.java index e5ff048db21..8a0c16320a2 100644 --- a/src/main/java/org/apache/commons/io/IOUtils.java +++ b/src/main/java/org/apache/commons/io/IOUtils.java @@ -224,7 +224,7 @@ public class IOUtils { /** * The maximum size of an array in many Java VMs. *

- * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}. + * The constant is copied from OpenJDK's {@link jdk.internal.util.ArraysSupport#SOFT_MAX_ARRAY_LENGTH}. *

*/ private static final int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8;