apache · garydgregory · Oct 17, 2025 · Sep 28, 2025 · Sep 29, 2025 · Oct 9, 2025
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -26,11 +26,12 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.EOFException;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.SequenceInputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -150,21 +151,15 @@ public static boolean matches(final byte[] signature, final int length) {
     /** True if stream is at EOF. */
     private boolean atEof;
 
-    /** Size of the current. */
-    private long entrySize;
-
     /** How far into the entry the stream is at. */
     private long entryOffset;
 
-    /** Input streams for reading sparse entries. **/
-    private List<InputStream> sparseInputStreams;
-
-    /** The index of current input stream being read when reading sparse entries. */
-    private int currentSparseInputStreamIndex;
-
     /** The meta-data about the current entry. */
     private TarArchiveEntry currEntry;
 
+    /** The current input stream. */
+    private InputStream currentInputStream;
+
     /** The encoding of the file. */
     private final ZipEncoding zipEncoding;
 
@@ -322,6 +317,17 @@ public TarArchiveInputStream(final InputStream inputStream, final String encodin
         this(builder().setInputStream(inputStream).setCharset(encoding));
     }
 
+    private void afterRead(final int read) throws IOException {
+        // Count the bytes read
+        count(read);
+        // Check for truncated entries
+        if (read == -1 && entryOffset < currEntry.getSize()) {
+            throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(),
+                    entryOffset));
+        }
+        entryOffset += Math.max(0, read);
+    }
+
     /**
      * Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only
      * in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry.
@@ -350,8 +356,7 @@ public int available() throws IOException {
      * </p>
      */
     private void buildSparseInputStreams() throws IOException {
-        currentSparseInputStreamIndex = -1;
-        sparseInputStreams = new ArrayList<>();
+        final List<InputStream> sparseInputStreams = new ArrayList<>();
         final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
         // Stream doesn't need to be closed at all as it doesn't use any resources
         final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
@@ -377,15 +382,15 @@ private void buildSparseInputStreams() throws IOException {
                 // @formatter:off
                 sparseInputStreams.add(BoundedInputStream.builder()
                         .setInputStream(in)
+                        .setAfterRead(this::afterRead)
                         .setMaxCount(sparseHeader.getNumbytes())
+                        .setPropagateClose(false)
                         .get());
                 // @formatter:on
             }
             offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
         }
-        if (!sparseInputStreams.isEmpty()) {
-            currentSparseInputStreamIndex = 0;
-        }
+        currentInputStream = new SequenceInputStream(Collections.enumeration(sparseInputStreams));
     }
 
     /**
@@ -406,10 +411,9 @@ public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
     @Override
     public void close() throws IOException {
         // Close all the input streams in sparseInputStreams
-        if (sparseInputStreams != null) {
-            for (final InputStream inputStream : sparseInputStreams) {
-                inputStream.close();
-            }
+        if (currentInputStream != null) {
+            currentInputStream.close();
+            currentInputStream = null;
         }
         in.close();
     }
@@ -425,26 +429,6 @@ private void consumeRemainderOfLastBlock() throws IOException {
         }
     }
 
-    /**
-     * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped
-     *
-     * @param available available bytes returned by {@link InputStream#available()}.
-     * @param skipped   skipped bytes returned by {@link InputStream#skip()}.
-     * @param expected  bytes expected to skip.
-     * @return number of bytes actually skipped.
-     * @throws IOException if a truncated tar archive is detected.
-     */
-    private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
-        long actuallySkipped = skipped;
-        if (in instanceof FileInputStream) {
-            actuallySkipped = Math.min(skipped, available);
-        }
-        if (actuallySkipped != expected) {
-            throw new ArchiveException("Truncated TAR archive");
-        }
-        return actuallySkipped;
-    }
-
     /**
      * Gets the current TAR Archive Entry that this input stream is processing
      *
@@ -509,8 +493,8 @@ public TarArchiveEntry getNextEntry() throws IOException {
         boolean lastWasSpecial = false;
         do {
             // If there is a current entry, skip any unread data and padding
-            if (currEntry != null) {
-                IOUtils.skip(this, Long.MAX_VALUE); // Skip to end of current entry
+            if (currentInputStream != null) {
+                IOUtils.skip(currentInputStream, Long.MAX_VALUE); // Skip to end of current entry
                 skipRecordPadding(); // Skip padding to align to the next record
             }
             // Read the next header record
@@ -525,23 +509,32 @@ public TarArchiveEntry getNextEntry() throws IOException {
             }
             // Parse the header into a new entry
             currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
+            // Set up the input stream for the new entry
+            currentInputStream = BoundedInputStream.builder()
+                    .setInputStream(in)
+                    .setAfterRead(this::afterRead)
+                    .setMaxCount(currEntry.getSize())
+                    .setPropagateClose(false)
+                    .get();
             entryOffset = 0;
-            entrySize = currEntry.getSize();
             lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry);
             if (lastWasSpecial) {
                 // Handle PAX, GNU long name, or other special records
-                TarUtils.handleSpecialTarRecord(this, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders, globalPaxHeaders,
-                        globalSparseHeaders);
+                TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders,
+                        globalPaxHeaders, globalSparseHeaders);
             }
         } while (lastWasSpecial);
         // Apply global and local PAX headers
         TarUtils.applyPaxHeadersToEntry(currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
         // Handle sparse files
         if (currEntry.isSparse()) {
             if (currEntry.isOldGNUSparse()) {
+                // Old GNU sparse format uses extra header blocks for metadata.
+                // These blocks are not included in the entry’s size, so we cannot
+                // rely on BoundedInputStream here.
                 readOldGNUSparse();
             } else if (currEntry.isPaxGNU1XSparse()) {
-                currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(in, getRecordSize()));
+                currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentInputStream, getRecordSize()));
             }
             // sparse headers are all done reading, we need to build
             // sparse input streams using these sparse headers
@@ -551,8 +544,6 @@ public TarArchiveEntry getNextEntry() throws IOException {
         if (currEntry.isDirectory() && !currEntry.getName().endsWith("/")) {
             currEntry.setName(currEntry.getName() + "/");
         }
-        // Update entry size in case it changed due to PAX headers
-        entrySize = currEntry.getSize();
         return currEntry;
     }
 
@@ -664,33 +655,13 @@ public int read(final byte[] buf, final int offset, int numToRead) throws IOExce
         if (numToRead == 0) {
             return 0;
         }
-        int totalRead = 0;
         if (isAtEOF() || isDirectory()) {
             return -1;
         }
-        if (currEntry == null) {
+        if (currEntry == null || currentInputStream == null) {
             throw new IllegalStateException("No current tar entry");
         }
-        if (entryOffset >= currEntry.getRealSize()) {
-            return -1;
-        }
-        numToRead = Math.min(numToRead, available());
-        if (currEntry.isSparse()) {
-            // for sparse entries, we need to read them in another way
-            totalRead = readSparse(buf, offset, numToRead);
-        } else {
-            totalRead = in.read(buf, offset, numToRead);
-        }
-        if (totalRead == -1) {
-            if (numToRead > 0) {
-                throw new EOFException("Truncated TAR archive");
-            }
-            setAtEOF(true);
-        } else {
-            count(totalRead);
-            entryOffset += totalRead;
-        }
-        return totalRead;
+        return currentInputStream.read(buf, offset, numToRead);
     }
 
     /**
@@ -710,9 +681,6 @@ private void readOldGNUSparse() throws IOException {
                 currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
             } while (entry.isExtended());
         }
-        // sparse headers are all done reading, we need to build
-        // sparse input streams using these sparse headers
-        buildSparseInputStreams();
     }
 
     /**
@@ -730,52 +698,6 @@ protected byte[] readRecord() throws IOException {
         return recordBuffer;
     }
 
-    /**
-     * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored
-     * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the
-     * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined according to
-     * the sparse headers.
-     *
-     * @param buf       The buffer into which to place bytes read.
-     * @param offset    The offset at which to place bytes read.
-     * @param numToRead The number of bytes to read.
-     * @return The number of bytes read, or -1 at EOF.
-     * @throws IOException on error.
-     */
-    private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
-        // if there are no actual input streams, just read from the original input stream
-        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
-            return in.read(buf, offset, numToRead);
-        }
-        if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
-            return -1;
-        }
-        final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
-        final int readLen = currentInputStream.read(buf, offset, numToRead);
-        // if the current input stream is the last input stream,
-        // just return the number of bytes read from current input stream
-        if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
-            return readLen;
-        }
-        // if EOF of current input stream is meet, open a new input stream and recursively call read
-        if (readLen == -1) {
-            currentSparseInputStreamIndex++;
-            return readSparse(buf, offset, numToRead);
-        }
-        // if the rest data of current input stream is not long enough, open a new input stream
-        // and recursively call read
-        if (readLen < numToRead) {
-            currentSparseInputStreamIndex++;
-            final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
-            if (readLenOfNext == -1) {
-                return readLen;
-            }
-            return readLen + readLenOfNext;
-        }
-        // if the rest data of current input stream is enough(which means readLen == len), just return readLen
-        return readLen;
-    }
-
     /**
      * Since we do not support marking just yet, we do nothing.
      */
@@ -816,21 +738,11 @@ public long skip(final long n) throws IOException {
         if (n <= 0 || isDirectory()) {
             return 0;
         }
-        final long availableOfInputStream = in.available();
-        final long available = currEntry.getRealSize() - entryOffset;
-        final long numToSkip = Math.min(n, available);
-        long skipped;
-        if (!currEntry.isSparse()) {
-            skipped = IOUtils.skip(in, numToSkip);
-            // for non-sparse entry, we should get the bytes actually skipped bytes along with
-            // inputStream.available() if inputStream is instance of FileInputStream
-            skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
-        } else {
-            skipped = skipSparse(numToSkip);
+        if (currEntry == null || currentInputStream == null) {
+            throw new IllegalStateException("No current tar entry");
         }
-        count(skipped);
-        entryOffset += skipped;
-        return skipped;
+        // Use Apache Commons IO to skip as it handles skipping fully
+        return org.apache.commons.io.IOUtils.skip(currentInputStream, n);
     }
 
     /**
@@ -839,37 +751,15 @@ public long skip(final long n) throws IOException {
      * @throws IOException if a truncated tar archive is detected.
      */
     private void skipRecordPadding() throws IOException {
-        if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) {
-            final long available = in.available();
-            final long numRecords = this.entrySize / getRecordSize() + 1;
-            final long padding = numRecords * getRecordSize() - this.entrySize;
-            long skipped = IOUtils.skip(in, padding);
-            skipped = getActuallySkipped(available, skipped, padding);
+        final long entrySize = currEntry != null ? currEntry.getSize() : 0;
+        if (!isDirectory() && entrySize > 0 && entrySize % getRecordSize() != 0) {
+            final long padding = getRecordSize() - (entrySize % getRecordSize());
+            final long skipped = org.apache.commons.io.IOUtils.skip(in, padding);
             count(skipped);
-        }
-    }
-
-    /**
-     * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest
-     * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped
-     *
-     * @param n bytes of data to skip.
-     * @return actual bytes of data skipped.
-     * @throws IOException if an I/O error occurs.
-     */
-    private long skipSparse(final long n) throws IOException {
-        if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
-            return in.skip(n);
-        }
-        long bytesSkipped = 0;
-        while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
-            final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
-            bytesSkipped += currentInputStream.skip(n - bytesSkipped);
-            if (bytesSkipped < n) {
-                currentSparseInputStreamIndex++;
+            if (skipped != padding) {
+                throw new EOFException(String.format("Truncated TAR archive: failed to skip record padding for entry '%s'", currEntry.getName()));
             }
         }
-        return bytesSkipped;
     }
 
     /**