From 2cf3b7c20cd220edda553d5265ea67ab6db0cc5a Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sun, 28 Sep 2025 11:45:44 +0200
Subject: [PATCH 1/6] Improve sparse file handling performance
Previously, sparse files were processed recursively. On highly fragmented files, this led to deep recursion and significant inefficiency.
This change replaces the recursive approach with an iterative strategy, which scales better for files with many fragments. It also introduces generated tests that simulate sparse files with very high fragmentation to ensure correctness and performance under stress.
---
.../archivers/tar/ComposedTarInputStream.java | 103 +++++
.../archivers/tar/TarArchiveInputStream.java | 191 +++------
.../compress/archivers/tar/TarFile.java | 112 ++---
.../commons/compress/utils/ArchiveUtils.java | 18 +
.../utils/BoundedArchiveInputStream.java | 18 +-
.../archivers/TestArchiveGenerator.java | 390 ++++++++++++++++++
.../archivers/tar/SparseFilesTest.java | 61 +++
.../tar/TarArchiveInputStreamTest.java | 7 +-
8 files changed, 679 insertions(+), 221 deletions(-)
create mode 100644 src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
create mode 100644 src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
new file mode 100644
index 00000000000..6a95437d30c
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.archivers.tar;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.commons.compress.utils.ArchiveUtils;
+
+final class ComposedTarInputStream extends InputStream {
+
+ final Iterator extends InputStream> streams;
+ private final long size;
+ private InputStream current;
+ private long position;
+
+ ComposedTarInputStream(final Iterable extends InputStream> streams, final long size) {
+ this.streams = streams.iterator();
+ this.size = size;
+ this.current = this.streams.hasNext() ? this.streams.next() : null;
+ this.position = 0;
+ }
+
+ @Override
+ public void close() throws IOException {
+ while (current != null) {
+ current.close();
+ current = streams.hasNext() ? streams.next() : null;
+ }
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (position >= size) {
+ return -1;
+ }
+ while (current != null) {
+ final int ret = current.read();
+ if (ret != -1) {
+ position++;
+ return ret;
+ }
+ nextStream();
+ }
+ throw new EOFException(String.format("Truncated TAR archive: expected %d bytes, but got only %d bytes", size, position));
+ }
+
+ @Override
+ public int read(final byte[] b, final int off, final int len) throws IOException {
+ ArchiveUtils.checkFromIndexSize(b, off, len);
+ if (len == 0) {
+ return 0;
+ }
+ if (position >= size) {
+ return -1;
+ }
+
+ final int toRead = (int) Math.min(size - position, len);
+ int remaining = toRead;
+ int dst = off;
+
+ while (current != null && remaining > 0) {
+ final int n = current.read(b, dst, remaining);
+ if (n == -1) {
+ nextStream();
+ continue;
+ }
+ position += n;
+ dst += n;
+ remaining -= n;
+ }
+
+ if (remaining == 0) {
+ return toRead;
+ }
+ throw new EOFException(String.format("Truncated TAR archive: expected %d bytes, but got only %d bytes", size, position));
+ }
+
+ private void nextStream() throws IOException {
+ if (current != null) {
+ current.close();
+ }
+ current = streams.hasNext() ? streams.next() : null;
+ }
+}
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 80dd6859fb1..a8c02dc8a6b 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -25,6 +25,7 @@
package org.apache.commons.compress.archivers.tar;
import java.io.ByteArrayOutputStream;
+import java.io.EOFException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -149,21 +150,15 @@ public static boolean matches(final byte[] signature, final int length) {
/** True if stream is at EOF. */
private boolean atEof;
- /** Size of the current. */
- private long entrySize;
-
/** How far into the entry the stream is at. */
private long entryOffset;
- /** Input streams for reading sparse entries. **/
- private List sparseInputStreams;
-
- /** The index of current input stream being read when reading sparse entries. */
- private int currentSparseInputStreamIndex;
-
/** The meta-data about the current entry. */
private TarArchiveEntry currEntry;
+ /** The current input stream. */
+ private InputStream currentInputStream;
+
/** The encoding of the file. */
private final ZipEncoding zipEncoding;
@@ -332,8 +327,7 @@ public int available() throws IOException {
*
*/
private void buildSparseInputStreams() throws IOException {
- currentSparseInputStreamIndex = -1;
- sparseInputStreams = new ArrayList<>();
+ final List sparseInputStreams = new ArrayList<>();
final List sparseHeaders = currEntry.getOrderedSparseHeaders();
// Stream doesn't need to be closed at all as it doesn't use any resources
final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
@@ -359,15 +353,15 @@ private void buildSparseInputStreams() throws IOException {
// @formatter:off
sparseInputStreams.add(BoundedInputStream.builder()
.setInputStream(in)
+ .setAfterRead(this::afterRead)
.setMaxCount(sparseHeader.getNumbytes())
+ .setPropagateClose(false)
.get());
// @formatter:on
}
offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
}
- if (!sparseInputStreams.isEmpty()) {
- currentSparseInputStreamIndex = 0;
- }
+ currentInputStream = new ComposedTarInputStream(sparseInputStreams, currEntry.getRealSize());
}
/**
@@ -388,10 +382,9 @@ public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
@Override
public void close() throws IOException {
// Close all the input streams in sparseInputStreams
- if (sparseInputStreams != null) {
- for (final InputStream inputStream : sparseInputStreams) {
- inputStream.close();
- }
+ if (currentInputStream != null) {
+ currentInputStream.close();
+ currentInputStream = null;
}
in.close();
}
@@ -411,7 +404,7 @@ private void consumeRemainderOfLastBlock() throws IOException {
* For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped
*
* @param available available bytes returned by {@link InputStream#available()}.
- * @param skipped skipped bytes returned by {@link InputStream#skip()}.
+ * @param skipped skipped bytes returned by {@link InputStream#skip(long)}.
* @param expected bytes expected to skip.
* @return number of bytes actually skipped.
* @throws IOException if a truncated tar archive is detected.
@@ -491,8 +484,8 @@ public TarArchiveEntry getNextEntry() throws IOException {
boolean lastWasSpecial = false;
do {
// If there is a current entry, skip any unread data and padding
- if (currEntry != null) {
- IOUtils.skip(this, Long.MAX_VALUE); // Skip to end of current entry
+ if (currentInputStream != null) {
+ IOUtils.skip(currentInputStream, Long.MAX_VALUE); // Skip to end of current entry
skipRecordPadding(); // Skip padding to align to the next record
}
// Read the next header record
@@ -507,12 +500,18 @@ public TarArchiveEntry getNextEntry() throws IOException {
}
// Parse the header into a new entry
currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
+ // Set up the input stream for the new entry
+ currentInputStream = BoundedInputStream.builder()
+ .setInputStream(in)
+ .setAfterRead(this::afterRead)
+ .setMaxCount(currEntry.getSize())
+ .setPropagateClose(false)
+ .get();
entryOffset = 0;
- entrySize = currEntry.getSize();
lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry);
if (lastWasSpecial) {
// Handle PAX, GNU long name, or other special records
- TarUtils.handleSpecialTarRecord(this, zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
+ TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
}
} while (lastWasSpecial);
// Apply global and local PAX headers
@@ -520,9 +519,12 @@ public TarArchiveEntry getNextEntry() throws IOException {
// Handle sparse files
if (currEntry.isSparse()) {
if (currEntry.isOldGNUSparse()) {
+ // Old GNU sparse format uses extra header blocks for metadata.
+ // These blocks are not included in the entry’s size, so we cannot
+ // rely on BoundedInputStream here.
readOldGNUSparse();
} else if (currEntry.isPaxGNU1XSparse()) {
- currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(in, getRecordSize()));
+ currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentInputStream, getRecordSize()));
}
// sparse headers are all done reading, we need to build
// sparse input streams using these sparse headers
@@ -532,11 +534,20 @@ public TarArchiveEntry getNextEntry() throws IOException {
if (currEntry.isDirectory() && !currEntry.getName().endsWith("/")) {
currEntry.setName(currEntry.getName() + "/");
}
- // Update entry size in case it changed due to PAX headers
- entrySize = currEntry.getSize();
return currEntry;
}
+ private void afterRead(final int read) throws IOException {
+ // Count the bytes read
+ count(read);
+ // Check for truncated entries
+ if (read == -1 && entryOffset < currEntry.getSize()) {
+ throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %d bytes, but got %d", currEntry.getName(), currEntry.getSize(),
+ entryOffset));
+ }
+ entryOffset += Math.max(0, read);
+ }
+
/**
* Gets the next entry in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the
* header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in
@@ -634,40 +645,23 @@ public boolean markSupported() {
* @param offset The offset at which to place bytes read.
* @param numToRead The number of bytes to read.
* @return The number of bytes read, or -1 at EOF.
+ * @throws NullPointerException if {@code buf} is null
+ * @throws IndexOutOfBoundsException if {@code [offset, offset + numToRead)} is not a valid range within {@code buf}
* @throws IOException on error
*/
@Override
public int read(final byte[] buf, final int offset, int numToRead) throws IOException {
+ ArchiveUtils.checkFromIndexSize(buf, offset, numToRead);
if (numToRead == 0) {
return 0;
}
- int totalRead = 0;
if (isAtEOF() || isDirectory()) {
return -1;
}
- if (currEntry == null) {
+ if (currEntry == null || currentInputStream == null) {
throw new IllegalStateException("No current tar entry");
}
- if (entryOffset >= currEntry.getRealSize()) {
- return -1;
- }
- numToRead = Math.min(numToRead, available());
- if (currEntry.isSparse()) {
- // for sparse entries, we need to read them in another way
- totalRead = readSparse(buf, offset, numToRead);
- } else {
- totalRead = in.read(buf, offset, numToRead);
- }
- if (totalRead == -1) {
- if (numToRead > 0) {
- throw new ArchiveException("Truncated TAR archive");
- }
- setAtEOF(true);
- } else {
- count(totalRead);
- entryOffset += totalRead;
- }
- return totalRead;
+ return currentInputStream.read(buf, offset, numToRead);
}
/**
@@ -687,9 +681,6 @@ private void readOldGNUSparse() throws IOException {
currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
} while (entry.isExtended());
}
- // sparse headers are all done reading, we need to build
- // sparse input streams using these sparse headers
- buildSparseInputStreams();
}
/**
@@ -707,52 +698,6 @@ protected byte[] readRecord() throws IOException {
return recordBuffer;
}
- /**
- * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored
- * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the
- * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined according to
- * the sparse headers.
- *
- * @param buf The buffer into which to place bytes read.
- * @param offset The offset at which to place bytes read.
- * @param numToRead The number of bytes to read.
- * @return The number of bytes read, or -1 at EOF.
- * @throws IOException on error.
- */
- private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
- // if there are no actual input streams, just read from the original input stream
- if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
- return in.read(buf, offset, numToRead);
- }
- if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
- return -1;
- }
- final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
- final int readLen = currentInputStream.read(buf, offset, numToRead);
- // if the current input stream is the last input stream,
- // just return the number of bytes read from current input stream
- if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
- return readLen;
- }
- // if EOF of current input stream is meet, open a new input stream and recursively call read
- if (readLen == -1) {
- currentSparseInputStreamIndex++;
- return readSparse(buf, offset, numToRead);
- }
- // if the rest data of current input stream is not long enough, open a new input stream
- // and recursively call read
- if (readLen < numToRead) {
- currentSparseInputStreamIndex++;
- final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
- if (readLenOfNext == -1) {
- return readLen;
- }
- return readLen + readLenOfNext;
- }
- // if the rest data of current input stream is enough(which means readLen == len), just return readLen
- return readLen;
- }
-
/**
* Since we do not support marking just yet, we do nothing.
*/
@@ -793,21 +738,11 @@ public long skip(final long n) throws IOException {
if (n <= 0 || isDirectory()) {
return 0;
}
- final long availableOfInputStream = in.available();
- final long available = currEntry.getRealSize() - entryOffset;
- final long numToSkip = Math.min(n, available);
- long skipped;
- if (!currEntry.isSparse()) {
- skipped = IOUtils.skip(in, numToSkip);
- // for non-sparse entry, we should get the bytes actually skipped bytes along with
- // inputStream.available() if inputStream is instance of FileInputStream
- skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
- } else {
- skipped = skipSparse(numToSkip);
+ if (currEntry == null || currentInputStream == null) {
+ throw new IllegalStateException("No current tar entry");
}
- count(skipped);
- entryOffset += skipped;
- return skipped;
+ // Use Apache Commons IO to skip as it handles skipping fully
+ return org.apache.commons.io.IOUtils.skip(currentInputStream, n);
}
/**
@@ -816,37 +751,15 @@ public long skip(final long n) throws IOException {
* @throws IOException if a truncated tar archive is detected.
*/
private void skipRecordPadding() throws IOException {
- if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) {
- final long available = in.available();
- final long numRecords = this.entrySize / getRecordSize() + 1;
- final long padding = numRecords * getRecordSize() - this.entrySize;
- long skipped = IOUtils.skip(in, padding);
- skipped = getActuallySkipped(available, skipped, padding);
+ final long entrySize = currEntry != null ? currEntry.getSize() : 0;
+ if (!isDirectory() && entrySize > 0 && entrySize % getRecordSize() != 0) {
+ final long padding = getRecordSize() - (entrySize % getRecordSize());
+ final long skipped = org.apache.commons.io.IOUtils.skip(in, padding);
count(skipped);
- }
- }
-
- /**
- * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest
- * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped
- *
- * @param n bytes of data to skip.
- * @return actual bytes of data skipped.
- * @throws IOException if an I/O error occurs.
- */
- private long skipSparse(final long n) throws IOException {
- if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
- return in.skip(n);
- }
- long bytesSkipped = 0;
- while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
- final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
- bytesSkipped += currentInputStream.skip(n - bytesSkipped);
- if (bytesSkipped < n) {
- currentSparseInputStreamIndex++;
+ if (skipped != padding) {
+ throw new EOFException(String.format("Truncated TAR archive: failed to skip record padding for entry '%s'", currEntry.getName()));
}
}
- return bytesSkipped;
}
/**
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
index 9ebb6c1d6e1..d12789dc105 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
@@ -18,7 +18,10 @@
*/
package org.apache.commons.compress.archivers.tar;
+import static java.util.Objects.requireNonNull;
+
import java.io.Closeable;
+import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -27,6 +30,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
@@ -58,85 +62,30 @@ private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream
private final TarArchiveEntry entry;
- private long entryOffset;
-
- private int currentSparseInputStreamIndex;
-
BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
- super(entry.getDataOffset(), entry.getRealSize());
- if (channel.size() - entry.getSize() < entry.getDataOffset()) {
- throw new ArchiveException("Entry size exceeds archive size");
- }
+ super(entry.getDataOffset(), entry.getSize());
this.entry = entry;
this.channel = channel;
}
@Override
protected int read(final long pos, final ByteBuffer buf) throws IOException {
- if (entryOffset >= entry.getRealSize()) {
- return -1;
- }
- final int totalRead;
- if (entry.isSparse()) {
- totalRead = readSparse(entryOffset, buf, buf.limit());
- } else {
- totalRead = readArchive(pos, buf);
- }
+ requireNonNull(buf, "ByteBuffer");
+ // The caller ensures that [pos, pos + buf.remaining()] is within [start, end]
+ channel.position(pos);
+ final int totalRead = channel.read(buf);
if (totalRead == -1) {
- if (buf.array().length > 0) {
- throw new ArchiveException("Truncated TAR archive");
+ if (buf.remaining() > 0) {
+ throw new EOFException(String.format("Truncated TAR archive: expected at least %d bytes, but got only %d bytes",
+ entry.getDataOffset() + entry.getSize(), channel.position()));
}
+ // Marks the TarFile as having reached EOF.
setAtEOF(true);
} else {
- entryOffset += totalRead;
buf.flip();
}
return totalRead;
}
-
- private int readArchive(final long pos, final ByteBuffer buf) throws IOException {
- channel.position(pos);
- return channel.read(buf);
- }
-
- private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException {
- // if there are no actual input streams, just read from the original archive
- final List entrySparseInputStreams = sparseInputStreams.get(entry.getName());
- if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) {
- return readArchive(entry.getDataOffset() + pos, buf);
- }
- if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) {
- return -1;
- }
- final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex);
- final byte[] bufArray = new byte[numToRead];
- final int readLen = currentInputStream.read(bufArray);
- if (readLen != -1) {
- buf.put(bufArray, 0, readLen);
- }
- // if the current input stream is the last input stream,
- // just return the number of bytes read from current input stream
- if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) {
- return readLen;
- }
- // if EOF of current input stream is meet, open a new input stream and recursively call read
- if (readLen == -1) {
- currentSparseInputStreamIndex++;
- return readSparse(pos, buf, numToRead);
- }
- // if the rest data of current input stream is not long enough, open a new input stream
- // and recursively call read
- if (readLen < numToRead) {
- currentSparseInputStreamIndex++;
- final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen);
- if (readLenOfNext == -1) {
- return readLen;
- }
- return readLen + readLenOfNext;
- }
- // if the rest data of current input stream is enough(which means readLen == len), just return readLen
- return readLen;
- }
}
// @formatter:off
@@ -462,6 +411,12 @@ public List getEntries() {
*/
public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
try {
+ // Sparse entries are composed of multiple fragments: wrap them in a ComposedTarInputStream
+ if (entry.isSparse()) {
+ final List streams = sparseInputStreams.get(entry.getName());
+ return new ComposedTarInputStream(streams != null ? streams : Collections.emptyList(), entry.getRealSize());
+ }
+ // Regular entries are bounded: wrap in BoundedTarEntryInputStream to enforce size and detect premature EOF
return new BoundedTarEntryInputStream(entry, archive);
} catch (final RuntimeException e) {
throw new ArchiveException("Corrupted TAR archive. Can't read entry", (Throwable) e);
@@ -484,6 +439,7 @@ private TarArchiveEntry getNextTarEntry() throws IOException {
final List sparseHeaders = new ArrayList<>();
// Handle special tar records
boolean lastWasSpecial = false;
+ InputStream currentStream;
do {
// If there is a current entry, skip any unread data and padding
if (currEntry != null) {
@@ -504,10 +460,13 @@ private TarArchiveEntry getNextTarEntry() throws IOException {
// Parse the header into a new entry
final long position = archive.position();
currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
+ currentStream = new BoundedTarEntryInputStream(currEntry, archive);
lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry);
if (lastWasSpecial) {
// Handle PAX, GNU long name, or other special records
- TarUtils.handleSpecialTarRecord(getInputStream(currEntry), zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders,
+ // Make sure not to read beyond the entry data
+ final BoundedTarEntryInputStream inputStream = new BoundedTarEntryInputStream(currEntry, archive);
+ TarUtils.handleSpecialTarRecord(inputStream, zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders,
globalSparseHeaders);
}
} while (lastWasSpecial);
@@ -515,11 +474,21 @@ private TarArchiveEntry getNextTarEntry() throws IOException {
TarUtils.applyPaxHeadersToEntry(currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
// Handle sparse files
if (currEntry.isSparse()) {
+ // These sparse formats have the sparse headers in the entry
if (currEntry.isOldGNUSparse()) {
+ // Old GNU sparse format uses extra header blocks for metadata.
+ // These blocks are not included in the entry’s size, so we cannot
+ // rely on BoundedTarEntryInputStream here.
readOldGNUSparse();
+ // Reposition to the start of the entry data to correctly compute the sparse streams
+ currEntry.setDataOffset(archive.position());
} else if (currEntry.isPaxGNU1XSparse()) {
- currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(getInputStream(currEntry), recordSize));
- currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
+ final long position = archive.position();
+ currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentStream, recordSize));
+ // Adjust the current entry to point to the start of the sparse file data
+ final long sparseHeadersSize = archive.position() - position;
+ currEntry.setSize(currEntry.getSize() - sparseHeadersSize);
+ currEntry.setDataOffset(currEntry.getDataOffset() + sparseHeadersSize);
}
// sparse headers are all done reading, we need to build
// sparse input streams using these sparse headers
@@ -620,12 +589,8 @@ private void readOldGNUSparse() throws IOException {
}
entry = new TarArchiveSparseEntry(headerBuf.array());
currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
- currEntry.setDataOffset(currEntry.getDataOffset() + recordSize);
} while (entry.isExtended());
}
- // sparse headers are all done reading, we need to build
- // sparse input streams using these sparse headers
- buildSparseInputStreams();
}
/**
@@ -671,8 +636,7 @@ protected final void setAtEOF(final boolean eof) {
*/
private void skipRecordPadding() throws IOException {
if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) {
- final long numRecords = currEntry.getSize() / recordSize + 1;
- final long padding = numRecords * recordSize - currEntry.getSize();
+ final long padding = recordSize - (currEntry.getSize() % recordSize);
repositionForwardBy(padding);
throwExceptionIfPositionIsNotInArchive();
}
@@ -685,7 +649,7 @@ private void skipRecordPadding() throws IOException {
*/
private void throwExceptionIfPositionIsNotInArchive() throws IOException {
if (archive.size() < archive.position()) {
- throw new ArchiveException("Truncated TAR archive");
+ throw new EOFException("Truncated TAR archive: archive should be at least " + archive.position() + " bytes but was " + archive.size() + " bytes");
}
}
diff --git a/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
index 1b3c827b7ce..51ba82d477d 100644
--- a/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
+++ b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java
@@ -21,6 +21,7 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
+import java.util.Objects;
import org.apache.commons.compress.archivers.ArchiveEntry;
@@ -261,6 +262,23 @@ public static String toString(final ArchiveEntry entry) {
return sb.toString();
}
+ /**
+ * Checks that the specified range is within the bounds of an array of the specified length.
+ *
+ * @param b the array
+ * @param off the starting offset of the range
+ * @param len the length of the range
+ * @throws IndexOutOfBoundsException if {@code off} is negative, or {@code len} is negative, or {@code off + len} is greater than {@code arrayLength}
+ * @since 1.29.0
+ */
+ public static void checkFromIndexSize(final byte[] b, final int off, final int len) {
+ // TODO: replace with IOUtils.checkFromIndexSize, after upgrading to Commons IO 2.21.0
+ Objects.requireNonNull(b, "byte array");
+ if ((off | len) < 0 || b.length - len < off) {
+ throw new IndexOutOfBoundsException(String.format("Range [%s, %= end) {
return -1;
}
@@ -89,12 +94,15 @@ public synchronized int read(final byte[] b, final int off, final int len) throw
}
/**
- * Reads content of the stream into a {@link ByteBuffer}.
+ * Reads bytes from this stream into the given {@link ByteBuffer}, starting at the specified position.
+ *
+ * The caller is responsible for ensuring that the requested range
+ * {@code [pos, pos + buf.remaining())} lies within the valid bounds of the stream.
*
- * @param pos position to start the read.
- * @param buf buffer to add the read content.
- * @return number of read bytes.
- * @throws IOException if I/O fails.
+ * @param pos the position within the stream at which to begin reading
+ * @param buf the buffer into which bytes are read; bytes are written starting at the buffer’s current position
+ * @return the number of bytes read into the buffer
+ * @throws IOException if an I/O error occurs while reading
*/
protected abstract int read(long pos, ByteBuffer buf) throws IOException;
}
diff --git a/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
new file mode 100644
index 00000000000..4bbe44510ee
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.archivers;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+
+public final class TestArchiveGenerator {
+
+ private static final int TIMESTAMP = 0;
+ private static final int OWNER_ID = 0;
+ private static final String OWNER_NAME = "owner";
+ private static final int GROUP_ID = 0;
+ private static final String GROUP_NAME = "group";
+ private static final int FILE_MODE = 0100644;
+ // TAR
+ private static final String OLD_GNU_MAGIC = "ustar ";
+ private static final String PAX_MAGIC = "ustar\u000000";
+
+ public static void main(final String[] args) throws IOException {
+ if (args.length != 1) {
+ System.err.println("Expected one argument: output directory");
+ System.exit(1);
+ }
+ final Path path = Paths.get(args[0]);
+ if (!Files.isDirectory(path)) {
+ System.err.println("Not a directory: " + path);
+ System.exit(1);
+ }
+ // Sparse file examples
+ final Path sparsePath = path.resolve("sparse");
+ Files.createDirectories(sparsePath);
+ createSparseFileTestCases(sparsePath);
+ }
+
+ public static void createSparseFileTestCases(final Path path) throws IOException {
+ if (!Files.isDirectory(path)) {
+ throw new IllegalArgumentException("Not a directory: " + path);
+ }
+ oldGnuSparse(path);
+ gnuSparse00(path);
+ gnuSparse01(path);
+ gnuSparse1X(path);
+ }
+
+ private static void oldGnuSparse(final Path path) throws IOException {
+ final Path file = path.resolve("old-gnu-sparse.tar");
+ try (OutputStream out = Files.newOutputStream(file)) {
+ final byte[] data = createData(8 * 1024);
+ final List> sparseEntries = createFragmentedSparseEntries(data.length);
+ writeOldGnuSparseFile(sparseEntries, data, data.length, out);
+ writeUstarTrailer(out);
+ }
+ }
+
+ private static void gnuSparse00(final Path path) throws IOException {
+ final Path file = path.resolve("gnu-sparse-00.tar");
+ try (OutputStream out = Files.newOutputStream(file)) {
+ final byte[] data = createData(8 * 1024);
+ final List> sparseEntries = createFragmentedSparseEntries(data.length);
+ final byte[] paxData = createGnuSparse00PaxData(sparseEntries, data.length);
+ writeGnuSparse0File(data, paxData, out);
+ writeUstarTrailer(out);
+ }
+ }
+
+ private static void gnuSparse01(final Path path) throws IOException {
+ final Path file = path.resolve("gnu-sparse-01.tar");
+ try (OutputStream out = Files.newOutputStream(file)) {
+ final byte[] data = createData(8 * 1024);
+ final List> sparseEntries = createFragmentedSparseEntries(data.length);
+ final byte[] paxData = createGnuSparse01PaxData(sparseEntries, data.length);
+ writeGnuSparse0File(data, paxData, out);
+ writeUstarTrailer(out);
+ }
+ }
+
+ private static void gnuSparse1X(final Path path) throws IOException {
+ final Path file = path.resolve("gnu-sparse-1.tar");
+ try (OutputStream out = Files.newOutputStream(file)) {
+ final byte[] data = createData(8 * 1024);
+ final List> sparseEntries = createFragmentedSparseEntries(data.length);
+ writeGnuSparse1File(sparseEntries, data, out);
+ writeUstarTrailer(out);
+ }
+ }
+
+ // Very fragmented sparse file
+ private static List> createFragmentedSparseEntries(final int realSize) {
+ final List> sparseEntries = new ArrayList<>();
+ for (int offset = 0; offset < realSize; offset++) {
+ sparseEntries.add(Pair.of(offset, 1));
+ }
+ return sparseEntries;
+ }
+
+ private static byte[] createData(final int size) {
+ final byte[] data = new byte[size];
+ for (int i = 0; i < size; i++) {
+ data[i] = (byte) (i % 256);
+ }
+ return data;
+ }
+
+ private static void writeOldGnuSparseFile(
+ final Collection> sparseEntries,
+ final byte[] data,
+ final int realSize,
+ final OutputStream out)
+ throws IOException {
+ int offset = writeTarUstarHeader("sparse-file.txt", data.length, OLD_GNU_MAGIC, 'S', out);
+ while (offset < 386) {
+ out.write(0);
+ offset++;
+ }
+ // Sparse entries (24 bytes each)
+ offset += writeOldGnuSparseEntries(sparseEntries, 4, out);
+ // Real size (12 bytes)
+ offset += writeOctalString(realSize, 12, out);
+ offset = padTo512Bytes(offset, out);
+ // Write extended headers
+ while (!sparseEntries.isEmpty()) {
+ offset += writeOldGnuSparseExtendedHeader(sparseEntries, out);
+ }
+ // Write file data
+ out.write(data);
+ offset += data.length;
+ padTo512Bytes(offset, out);
+ }
+
+ private static void writeGnuSparse0File(final byte[] data, final byte[] paxData, final OutputStream out)
+ throws IOException {
+ // PAX entry
+ int offset = writeTarUstarHeader("./GNUSparseFile.1/" + "sparse-file.txt", paxData.length, PAX_MAGIC, 'x', out);
+ offset = padTo512Bytes(offset, out);
+ // PAX data
+ out.write(paxData);
+ offset += paxData.length;
+ offset = padTo512Bytes(offset, out);
+ // File entry
+ offset += writeTarUstarHeader("sparse-file.txt", data.length, PAX_MAGIC, '0', out);
+ offset = padTo512Bytes(offset, out);
+ // File data
+ out.write(data);
+ offset += data.length;
+ padTo512Bytes(offset, out);
+ }
+
+ private static void writeGnuSparse1File(
+ final Collection> sparseEntries, final byte[] data, final OutputStream out)
+ throws IOException {
+ // PAX entry
+ final byte[] paxData = createGnuSparse1PaxData(sparseEntries, data.length);
+ int offset = writeTarUstarHeader("./GNUSparseFile.1/sparse-file.txt", paxData.length, PAX_MAGIC, 'x', out);
+ offset = padTo512Bytes(offset, out);
+ // PAX data
+ out.write(paxData);
+ offset += paxData.length;
+ offset = padTo512Bytes(offset, out);
+ // File entry
+ final byte[] sparseEntriesData = createGnuSparse1EntriesData(sparseEntries);
+ offset += writeTarUstarHeader("sparse-file.txt", sparseEntriesData.length + data.length, PAX_MAGIC, '0', out);
+ offset = padTo512Bytes(offset, out);
+ // File data
+ out.write(sparseEntriesData);
+ offset += sparseEntriesData.length;
+ out.write(data);
+ offset += data.length;
+ padTo512Bytes(offset, out);
+ }
+
+ private static int writeTarUstarHeader(
+ final String fileName,
+ final long fileSize,
+ final String magicAndVersion,
+ final char typeFlag,
+ final OutputStream out)
+ throws IOException {
+ int count = 0;
+ // File name (100 bytes)
+ count += writeString(fileName, 100, out);
+ // File mode (8 bytes)
+ count += writeOctalString(FILE_MODE, 8, out);
+ // Owner ID (8 bytes)
+ count += writeOctalString(OWNER_ID, 8, out);
+ // Group ID (8 bytes)
+ count += writeOctalString(GROUP_ID, 8, out);
+ // File size (12 bytes)
+ count += writeOctalString(fileSize, 12, out);
+ // Modification timestamp (12 bytes)
+ count += writeOctalString(TIMESTAMP, 12, out);
+ // Checksum (8 bytes), filled with spaces for now
+ count += writeString(StringUtils.repeat(' ', 7), 8, out);
+ // Link indicator (1 byte)
+ out.write(typeFlag);
+ count++;
+ // Name of linked file (100 bytes)
+ count += writeString("", 100, out);
+ // Magic (6 bytes) + Version (2 bytes)
+ count += writeString(magicAndVersion, 8, out);
+ // Owner user name (32 bytes)
+ count += writeString(OWNER_NAME, 32, out);
+ // Owner group name (32 bytes)
+ count += writeString(GROUP_NAME, 32, out);
+ // Device major number (8 bytes)
+ count += writeString("", 8, out);
+ // Device minor number (8 bytes)
+ count += writeString("", 8, out);
+ return count;
+ }
+
+ private static int writeOldGnuSparseExtendedHeader(
+ final Iterable> sparseEntries, final OutputStream out) throws IOException {
+ int offset = 0;
+ offset += writeOldGnuSparseEntries(sparseEntries, 21, out);
+ offset = padTo512Bytes(offset, out);
+ return offset;
+ }
+
+ private static void writeUstarTrailer(final OutputStream out) throws IOException {
+ int offset = 0;
+ // 1024 bytes of zero
+ while (offset < 1024) {
+ out.write(0);
+ offset++;
+ }
+ }
+
+ private static int writeOldGnuSparseEntries(
+ final Iterable> sparseEntries, final int limit, final OutputStream out)
+ throws IOException {
+ int offset = 0;
+ int count = 0;
+ final Iterator> it = sparseEntries.iterator();
+ while (it.hasNext()) {
+ if (count >= limit) {
+ out.write(1); // more entries follow
+ return ++offset;
+ }
+ final Pair entry = it.next();
+ it.remove();
+ count++;
+ offset += writeOldGnuSparseEntry(entry.getLeft(), entry.getRight(), out);
+ }
+ while (count < limit) {
+ // pad with empty entries
+ offset += writeOldGnuSparseEntry(0, 0, out);
+ count++;
+ }
+ out.write(0); // no more entries
+ return ++offset;
+ }
+
+ private static int writeOldGnuSparseEntry(final int offset, final int length, final OutputStream out)
+ throws IOException {
+ int count = 0;
+ count += writeOctalString(offset, 12, out);
+ count += writeOctalString(length, 12, out);
+ return count;
+ }
+
+ private static byte[] createGnuSparse00PaxData(
+ final Collection extends Pair> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.size", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ for (final Pair entry : sparseEntries) {
+ writePaxKeyValue("GNU.sparse.offset", entry.getLeft(), writer);
+ writePaxKeyValue("GNU.sparse.numbytes", entry.getRight(), writer);
+ }
+ }
+ return baos.toByteArray();
+ }
+
+ private static byte[] createGnuSparse01PaxData(
+ final Collection extends Pair> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.size", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ final String map = sparseEntries.stream()
+ .map(e -> e.getLeft() + "," + e.getRight())
+ .collect(Collectors.joining(","));
+ writePaxKeyValue("GNU.sparse.map", map, writer);
+ }
+ return baos.toByteArray();
+ }
+
+ private static byte[] createGnuSparse1PaxData(
+ final Collection> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.realsize", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ writePaxKeyValue("GNU.sparse.major", 1, writer);
+ writePaxKeyValue("GNU.sparse.minor", 0, writer);
+ }
+ return baos.toByteArray();
+ }
+
+ private static void writePaxKeyValue(final String key, final String value, final PrintWriter out) {
+ final String entry = ' ' + key + "=" + value + "\n";
+ // Guess length: length of length + space + entry
+ final int length = String.valueOf(entry.length()).length() + entry.length();
+ // Recompute if number of digits changes
+ out.print(String.valueOf(length).length() + entry.length());
+ out.print(entry);
+ }
+
+ private static void writePaxKeyValue(final String key, final int value, final PrintWriter out) {
+ writePaxKeyValue(key, Integer.toString(value), out);
+ }
+
+ private static byte[] createGnuSparse1EntriesData(final Collection extends Pair> sparseEntries)
+ throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writer.printf("%d\n", sparseEntries.size());
+ for (final Pair entry : sparseEntries) {
+ writer.printf("%d\n", entry.getLeft());
+ writer.printf("%d\n", entry.getRight());
+ }
+ }
+ padTo512Bytes(baos.size(), baos);
+ return baos.toByteArray();
+ }
+
+ private static int writeOctalString(final long value, final int length, final OutputStream out) throws IOException {
+ int count = 0;
+ final String s = Long.toOctalString(value);
+ count += writeString(s, length - 1, out);
+ out.write('\0');
+ return ++count;
+ }
+
+ private static int writeString(final String s, final int length, final OutputStream out) throws IOException {
+ final byte[] bytes = s.getBytes(US_ASCII);
+ out.write(bytes);
+ for (int i = bytes.length; i < length; i++) {
+ out.write('\0');
+ }
+ return length;
+ }
+
+ private static int padTo512Bytes(final int offset, final OutputStream out) throws IOException {
+ int count = offset;
+ while (count % 512 != 0) {
+ out.write(0);
+ count++;
+ }
+ return count;
+ }
+
+ private TestArchiveGenerator() {
+ // hide constructor
+ }
+}
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
index 4d09fddcaa3..cada4fb8afd 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
@@ -23,6 +23,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeFalse;
@@ -35,14 +36,27 @@
import java.util.List;
import org.apache.commons.compress.AbstractTest;
+import org.apache.commons.compress.archivers.TestArchiveGenerator;
import org.apache.commons.io.IOUtils;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledOnOs;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
class SparseFilesTest extends AbstractTest {
+ @TempDir
+ private static Path tempDir;
+
+ @BeforeAll
+ static void setupAll() throws IOException {
+ TestArchiveGenerator.createSparseFileTestCases(tempDir);
+ }
+
private void assertPaxGNUEntry(final TarArchiveEntry entry, final String suffix) {
assertEquals("sparsefile-" + suffix, entry.getName());
assertEquals(TarConstants.LF_NORMAL, entry.getLinkFlag());
@@ -245,6 +259,53 @@ void testExtractSparseTarsOnWindows() throws IOException {
}
}
+ @ParameterizedTest
+ @ValueSource(strings = {"old-gnu-sparse.tar" , "gnu-sparse-00.tar", "gnu-sparse-01.tar", "gnu-sparse-1.tar"})
+ void testMaximallyFragmentedTarFile(final String fileName) throws IOException {
+ final int expectedSize = 8192;
+ try (TarFile input = TarFile.builder().setPath(tempDir.resolve(fileName)).get()) {
+ final List entries = input.getEntries();
+ assertEquals(1, entries.size());
+ final TarArchiveEntry entry = entries.get(0);
+ assertNotNull(entry);
+ assertEquals("sparse-file.txt", entry.getName());
+
+ try (InputStream inputStream = input.getInputStream(entry)) {
+ // read the expected amount of data
+ final byte[] content = new byte[expectedSize];
+ assertEquals(expectedSize, inputStream.read(content));
+ // verify that the stream is at EOF
+ assertEquals(IOUtils.EOF, inputStream.read());
+ // check content
+ for (int i = 0; i < content.length; i++) {
+ assertEquals((byte) (i % 256), content[i], "at index " + i);
+ }
+ }
+ }
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"old-gnu-sparse.tar", "gnu-sparse-00.tar", "gnu-sparse-01.tar", "gnu-sparse-1.tar"})
+ void testMaximallyFragmentedTarStream(final String fileName) throws IOException {
+ final int expectedSize = 8192;
+ try (TarArchiveInputStream input = TarArchiveInputStream.builder().setPath(tempDir.resolve(fileName)).get()) {
+ final TarArchiveEntry entry = input.getNextEntry();
+ assertNotNull(entry);
+ assertEquals("sparse-file.txt", entry.getName());
+ // read the expected amount of data
+ final byte[] content = new byte[expectedSize];
+ assertEquals(expectedSize, input.read(content));
+ // verify that the stream is at EOF
+ assertEquals(IOUtils.EOF, input.read());
+ // check content
+ for (int i = 0; i < content.length; i++) {
+ assertEquals((byte) (i % 256), content[i], "at index " + i);
+ }
+ // check that there are no more entries
+ assertNull(input.getNextEntry());
+ }
+ }
+
@Test
void testOldGNU() throws Throwable {
try (TarArchiveInputStream tin = TarArchiveInputStream.builder()
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
index c823b0757a6..12162896af1 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java
@@ -36,6 +36,7 @@
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -405,7 +406,7 @@ void testParseTarWithNonNumberPaxHeaders() throws IOException {
void testParseTarWithSpecialPaxHeaders() throws IOException {
try (TarArchiveInputStream archive = getTestStream("COMPRESS-530-fail.tar")) {
assertThrows(ArchiveException.class, () -> archive.getNextEntry());
- assertThrows(ArchiveException.class, () -> IOUtils.toByteArray(archive));
+ assertThrows(EOFException.class, () -> IOUtils.toByteArray(archive));
}
}
@@ -501,7 +502,7 @@ void testShouldReadGNULongNameEntryWithWrongName() throws Exception {
void testShouldThrowAnExceptionOnTruncatedEntries() throws Exception {
final Path dir = createTempDirectory("COMPRESS-279");
try (TarArchiveInputStream is = getTestStream("COMPRESS-279-fail.tar")) {
- assertThrows(ArchiveException.class, () -> {
+ assertThrows(EOFException.class, () -> {
TarArchiveEntry entry = is.getNextTarEntry();
int count = 0;
while (entry != null) {
@@ -518,7 +519,7 @@ void testShouldThrowAnExceptionOnTruncatedStream() throws Exception {
final Path dir = createTempDirectory("COMPRESS-279");
try (TarArchiveInputStream is = getTestStream("COMPRESS-279-fail.tar")) {
final AtomicInteger count = new AtomicInteger();
- assertThrows(ArchiveException.class, () -> is.forEach(entry -> Files.copy(is, dir.resolve(String.valueOf(count.getAndIncrement())))));
+ assertThrows(EOFException.class, () -> is.forEach(entry -> Files.copy(is, dir.resolve(String.valueOf(count.getAndIncrement())))));
}
}
From 961d0fc12f1aec779e8bb7d0e5902b0abe444fae Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Mon, 29 Sep 2025 17:13:56 +0200
Subject: [PATCH 2/6] fix: remove unused method
---
.../archivers/tar/TarArchiveInputStream.java | 21 -------------------
1 file changed, 21 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index a8c02dc8a6b..4d40574662f 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -26,7 +26,6 @@
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@@ -400,26 +399,6 @@ private void consumeRemainderOfLastBlock() throws IOException {
}
}
- /**
- * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped
- *
- * @param available available bytes returned by {@link InputStream#available()}.
- * @param skipped skipped bytes returned by {@link InputStream#skip(long)}.
- * @param expected bytes expected to skip.
- * @return number of bytes actually skipped.
- * @throws IOException if a truncated tar archive is detected.
- */
- private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
- long actuallySkipped = skipped;
- if (in instanceof FileInputStream) {
- actuallySkipped = Math.min(skipped, available);
- }
- if (actuallySkipped != expected) {
- throw new ArchiveException("Truncated TAR archive");
- }
- return actuallySkipped;
- }
-
/**
* Gets the current TAR Archive Entry that this input stream is processing
*
From 38b1ecb9953c8af09ef3ab18b08d014418b3bfb0 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sun, 12 Oct 2025 21:39:23 +0200
Subject: [PATCH 3/6] fix: simplify input streams
---
.../archivers/tar/ComposedTarInputStream.java | 103 ------------------
.../archivers/tar/TarArchiveInputStream.java | 4 +-
.../compress/archivers/tar/TarFile.java | 35 +++---
.../utils/BoundedArchiveInputStream.java | 12 +-
.../archivers/tar/SparseFilesTest.java | 4 +-
5 files changed, 29 insertions(+), 129 deletions(-)
delete mode 100644 src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
deleted file mode 100644
index 848fc3571ba..00000000000
--- a/src/main/java/org/apache/commons/compress/archivers/tar/ComposedTarInputStream.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.commons.compress.archivers.tar;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Iterator;
-
-import org.apache.commons.io.IOUtils;
-
-final class ComposedTarInputStream extends InputStream {
-
- final Iterator extends InputStream> streams;
- private final long size;
- private InputStream current;
- private long position;
-
- ComposedTarInputStream(final Iterable extends InputStream> streams, final long size) {
- this.streams = streams.iterator();
- this.size = size;
- this.current = this.streams.hasNext() ? this.streams.next() : null;
- this.position = 0;
- }
-
- @Override
- public void close() throws IOException {
- while (current != null) {
- current.close();
- current = streams.hasNext() ? streams.next() : null;
- }
- }
-
- @Override
- public int read() throws IOException {
- if (position >= size) {
- return -1;
- }
- while (current != null) {
- final int ret = current.read();
- if (ret != -1) {
- position++;
- return ret;
- }
- nextStream();
- }
- throw new EOFException(String.format("Truncated TAR archive: expected %d bytes, but got only %d bytes", size, position));
- }
-
- @Override
- public int read(final byte[] b, final int off, final int len) throws IOException {
- IOUtils.checkFromIndexSize(b, off, len);
- if (len == 0) {
- return 0;
- }
- if (position >= size) {
- return -1;
- }
-
- final int toRead = (int) Math.min(size - position, len);
- int remaining = toRead;
- int dst = off;
-
- while (current != null && remaining > 0) {
- final int n = current.read(b, dst, remaining);
- if (n == -1) {
- nextStream();
- continue;
- }
- position += n;
- dst += n;
- remaining -= n;
- }
-
- if (remaining == 0) {
- return toRead;
- }
- throw new EOFException(String.format("Truncated TAR archive: expected %d bytes, but got only %d bytes", size, position));
- }
-
- private void nextStream() throws IOException {
- if (current != null) {
- current.close();
- }
- current = streams.hasNext() ? streams.next() : null;
- }
-}
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index ff8068140d5..c137ad8fdce 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -28,8 +28,10 @@
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
+import java.io.SequenceInputStream;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -360,7 +362,7 @@ private void buildSparseInputStreams() throws IOException {
}
offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
}
- currentInputStream = new ComposedTarInputStream(sparseInputStreams, currEntry.getRealSize());
+ currentInputStream = new SequenceInputStream(Collections.enumeration(sparseInputStreams));
}
/**
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
index 84f7360a230..21455bed027 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
@@ -25,6 +25,7 @@
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.SequenceInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
@@ -43,7 +44,6 @@
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.ArchiveUtils;
import org.apache.commons.compress.utils.BoundedArchiveInputStream;
-import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
import org.apache.commons.io.function.IOIterable;
import org.apache.commons.io.function.IOIterator;
import org.apache.commons.io.input.BoundedInputStream;
@@ -55,15 +55,24 @@
*/
public class TarFile implements Closeable, IOIterable {
+ /**
+ * InputStream that reads a specific entry from the archive.
+ *
+ * It ensures that:
+ *
+ * - No more than the specified number of bytes are read from the underlying channel.
+ * - If the end of the entry is reached before the expected number of bytes, an {@link EOFException} is thrown.
+ *
+ */
private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream {
private final SeekableByteChannel channel;
- private final TarArchiveEntry entry;
+ private final long end;
- BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException {
- super(entry.getDataOffset(), entry.getSize());
- this.entry = entry;
+ BoundedTarEntryInputStream(final long start, final long remaining, final SeekableByteChannel channel) {
+ super(start, remaining);
+ this.end = start + remaining;
this.channel = channel;
}
@@ -76,7 +85,7 @@ protected int read(final long pos, final ByteBuffer buf) throws IOException {
if (totalRead == -1) {
if (buf.remaining() > 0) {
throw new EOFException(String.format("Truncated TAR archive: expected at least %d bytes, but got only %d bytes",
- entry.getDataOffset() + entry.getSize(), channel.position()));
+ end, channel.position()));
}
// Marks the TarFile as having reached EOF.
setAtEOF(true);
@@ -364,7 +373,7 @@ private void buildSparseInputStreams() throws IOException {
// possible integer overflow
throw new ArchiveException("Unreadable TAR archive, sparse block offset or length too big");
}
- streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive));
+ streams.add(new BoundedTarEntryInputStream(start, sparseHeader.getNumbytes(), archive));
}
offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
}
@@ -405,13 +414,13 @@ public List getEntries() {
*/
public InputStream getInputStream(final TarArchiveEntry entry) throws IOException {
try {
- // Sparse entries are composed of multiple fragments: wrap them in a ComposedTarInputStream
+ // Sparse entries are composed of multiple fragments: wrap them in a SequenceInputStream
if (entry.isSparse()) {
final List streams = sparseInputStreams.get(entry.getName());
- return new ComposedTarInputStream(streams != null ? streams : Collections.emptyList(), entry.getRealSize());
+ return new SequenceInputStream(streams != null ? Collections.enumeration(streams) : Collections.emptyEnumeration());
}
// Regular entries are bounded: wrap in BoundedTarEntryInputStream to enforce size and detect premature EOF
- return new BoundedTarEntryInputStream(entry, archive);
+ return new BoundedTarEntryInputStream(entry.getDataOffset(), entry.getSize(), archive);
} catch (final RuntimeException e) {
throw new ArchiveException("Corrupted TAR archive. Can't read entry", (Throwable) e);
}
@@ -454,14 +463,12 @@ private TarArchiveEntry getNextTarEntry() throws IOException {
// Parse the header into a new entry
final long position = archive.position();
currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position);
- currentStream = new BoundedTarEntryInputStream(currEntry, archive);
+ currentStream = new BoundedTarEntryInputStream(currEntry.getDataOffset(), currEntry.getSize(), archive);
lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry);
if (lastWasSpecial) {
// Handle PAX, GNU long name, or other special records
// Make sure not to read beyond the entry data
- final BoundedTarEntryInputStream inputStream = new BoundedTarEntryInputStream(currEntry, archive);
- TarUtils.handleSpecialTarRecord(inputStream, zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders,
- globalSparseHeaders);
+ TarUtils.handleSpecialTarRecord(currentStream, zipEncoding, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
}
} while (lastWasSpecial);
// Apply global and local PAX headers
diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java
index 87e6d61bb4d..d334af0f377 100644
--- a/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java
@@ -78,15 +78,9 @@ public synchronized int read(final byte[] b, final int off, final int len) throw
if (loc >= end) {
return -1;
}
- final long maxLen = Math.min(len, end - loc);
- if (maxLen <= 0) {
- return 0;
- }
- if (off < 0 || off > b.length || maxLen > b.length - off) {
- throw new IndexOutOfBoundsException("offset or len are out of bounds");
- }
-
- final ByteBuffer buf = ByteBuffer.wrap(b, off, (int) maxLen);
+ // Both len and end - loc are guaranteed to be > 0 here and at least len is <= Integer.MAX_VALUE.
+ final int maxLen = (int) Math.min(len, end - loc);
+ final ByteBuffer buf = ByteBuffer.wrap(b, off, maxLen);
final int ret = read(loc, buf);
if (ret > 0) {
loc += ret;
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
index c8bb61205dd..ce39e07a89f 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
@@ -273,7 +273,7 @@ void testMaximallyFragmentedTarFile(final String fileName) throws IOException {
try (InputStream inputStream = input.getInputStream(entry)) {
// read the expected amount of data
final byte[] content = new byte[expectedSize];
- assertEquals(expectedSize, inputStream.read(content));
+ assertEquals(expectedSize, IOUtils.read(inputStream, content));
// verify that the stream is at EOF
assertEquals(IOUtils.EOF, inputStream.read());
// check content
@@ -294,7 +294,7 @@ void testMaximallyFragmentedTarStream(final String fileName) throws IOException
assertEquals("sparse-file.txt", entry.getName());
// read the expected amount of data
final byte[] content = new byte[expectedSize];
- assertEquals(expectedSize, input.read(content));
+ assertEquals(expectedSize, IOUtils.read(input, content));
// verify that the stream is at EOF
assertEquals(IOUtils.EOF, input.read());
// check content
From c164798e7ea96181a2f87b99a96766b8400fa921 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Sun, 12 Oct 2025 21:48:02 +0200
Subject: [PATCH 4/6] fix: error message
---
.../commons/compress/archivers/tar/TarArchiveInputStream.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index c137ad8fdce..b0fd1b4dc0d 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -523,7 +523,7 @@ private void afterRead(final int read) throws IOException {
count(read);
// Check for truncated entries
if (read == -1 && entryOffset < currEntry.getSize()) {
- throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %d bytes, but got %d", currEntry.getName(), currEntry.getSize(),
+ throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(),
entryOffset));
}
entryOffset += Math.max(0, read);
From 96d024e3b9d83dd91078ae4d6f4b23b5e1c8b39d Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Thu, 16 Oct 2025 21:25:08 +0200
Subject: [PATCH 5/6] Fix failing tests
---
.../archivers/MaxNameEntryLengthTest.java | 32 +++++++++++++------
.../compress/archivers/tar/TarFileTest.java | 5 +--
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java b/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java
index 16ef55bc726..10fef8f1488 100644
--- a/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java
@@ -46,6 +46,7 @@
import org.apache.commons.io.function.IOStream;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.function.Executable;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
@@ -111,12 +112,15 @@ static Stream testTruncatedStreams() throws IOException {
static Stream testTruncatedTarFiles() throws IOException {
return Stream.of(
- Arguments.of(TarFile.builder()
- .setMaxEntryNameLength(Integer.MAX_VALUE)
- .setURI(getURI("synthetic/long-name/pax-fail.tar"))),
- Arguments.of(TarFile.builder()
- .setMaxEntryNameLength(Integer.MAX_VALUE)
- .setURI(getURI("synthetic/long-name/gnu-fail.tar"))));
+ Arguments.of(
+ TarFile.builder().setMaxEntryNameLength(Integer.MAX_VALUE).setURI(getURI("synthetic/long-name/pax-fail.tar")),
+ Integer.MAX_VALUE
+ ),
+ Arguments.of(
+ TarFile.builder().setMaxEntryNameLength(Integer.MAX_VALUE).setURI(getURI("synthetic/long-name/gnu-fail.tar")),
+ SOFT_MAX_ARRAY_LENGTH
+ )
+ );
}
static Stream testValidStreams() throws IOException {
@@ -175,10 +179,18 @@ void testTruncatedStreams(final ArchiveInputStream> archiveInputStream, final
@ParameterizedTest
@MethodSource
- void testTruncatedTarFiles(final TarFile.Builder tarFileBuilder) {
- // Since the real size of the archive is known, the truncation is detected
- // much earlier and before trying to read file names.
- assertThrows(EOFException.class, () -> tarFileBuilder.get().getEntries());
+ void testTruncatedTarFiles(final TarFile.Builder tarFileBuilder, final long expectedLength) {
+ // If the file name length exceeds available memory, the stream fails fast with MemoryLimitException.
+ // Otherwise, it fails with EOFException when the stream ends unexpectedly.
+ final Executable action = () -> tarFileBuilder.get().entries();
+ if (Runtime.getRuntime().totalMemory() < expectedLength) {
+ final MemoryLimitException exception = assertThrows(MemoryLimitException.class, action);
+ final String message = exception.getMessage();
+ assertNotNull(message);
+ assertTrue(message.contains(String.format("%,d", expectedLength)), "Message mentions expected length (" + expectedLength + "): " + message);
+ } else {
+ assertThrows(EOFException.class, action);
+ }
}
@Test
diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java
index add3f9464fc..ee1a677f75a 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java
@@ -26,7 +26,6 @@
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
-import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -226,7 +225,9 @@ void testParseTarWithNonNumberPaxHeaders() {
@Test
void testParseTarWithSpecialPaxHeaders() {
- assertThrows(EOFException.class, () -> TarFile.builder().setURI(getURI("COMPRESS-530-fail.tar")).get());
+ final ArchiveException ex = assertThrows(ArchiveException.class, () -> TarFile.builder().setURI(getURI("COMPRESS-530-fail.tar")).get());
+ // Parsing fails since the data starts with null bytes
+ assertTrue(ex.getMessage().contains("non-number"));
}
@Test
From 51607c93af76a4c9e04ae9b84fffcb0902d61c02 Mon Sep 17 00:00:00 2001
From: "Piotr P. Karwasz"
Date: Thu, 16 Oct 2025 21:31:00 +0200
Subject: [PATCH 6/6] Sort members
---
.../archivers/tar/TarArchiveInputStream.java | 22 +-
.../archivers/TestArchiveGenerator.java | 354 +++++++++---------
2 files changed, 188 insertions(+), 188 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 51990f4196f..57cd596ca7a 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -317,6 +317,17 @@ public TarArchiveInputStream(final InputStream inputStream, final String encodin
this(builder().setInputStream(inputStream).setCharset(encoding));
}
+ private void afterRead(final int read) throws IOException {
+ // Count the bytes read
+ count(read);
+ // Check for truncated entries
+ if (read == -1 && entryOffset < currEntry.getSize()) {
+ throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(),
+ entryOffset));
+ }
+ entryOffset += Math.max(0, read);
+ }
+
/**
* Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only
* in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry.
@@ -536,17 +547,6 @@ public TarArchiveEntry getNextEntry() throws IOException {
return currEntry;
}
- private void afterRead(final int read) throws IOException {
- // Count the bytes read
- count(read);
- // Check for truncated entries
- if (read == -1 && entryOffset < currEntry.getSize()) {
- throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(),
- entryOffset));
- }
- entryOffset += Math.max(0, read);
- }
-
/**
* Gets the next entry in this tar archive. This will skip over any remaining data in the current entry, if there is one, and place the input stream at the
* header of the next entry, and read the header and instantiate a new TarEntry from the header bytes and return that entry. If there are no more entries in
diff --git a/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
index 4bbe44510ee..49c06da5172 100644
--- a/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
+++ b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java
@@ -39,30 +39,85 @@
public final class TestArchiveGenerator {
- private static final int TIMESTAMP = 0;
- private static final int OWNER_ID = 0;
- private static final String OWNER_NAME = "owner";
+ private static final int FILE_MODE = 0100644;
private static final int GROUP_ID = 0;
private static final String GROUP_NAME = "group";
- private static final int FILE_MODE = 0100644;
// TAR
private static final String OLD_GNU_MAGIC = "ustar ";
+ private static final int OWNER_ID = 0;
+ private static final String OWNER_NAME = "owner";
private static final String PAX_MAGIC = "ustar\u000000";
+ private static final int TIMESTAMP = 0;
- public static void main(final String[] args) throws IOException {
- if (args.length != 1) {
- System.err.println("Expected one argument: output directory");
- System.exit(1);
+ private static byte[] createData(final int size) {
+ final byte[] data = new byte[size];
+ for (int i = 0; i < size; i++) {
+ data[i] = (byte) (i % 256);
}
- final Path path = Paths.get(args[0]);
- if (!Files.isDirectory(path)) {
- System.err.println("Not a directory: " + path);
- System.exit(1);
+ return data;
+ }
+
+ // Very fragmented sparse file
+ private static List> createFragmentedSparseEntries(final int realSize) {
+ final List> sparseEntries = new ArrayList<>();
+ for (int offset = 0; offset < realSize; offset++) {
+ sparseEntries.add(Pair.of(offset, 1));
}
- // Sparse file examples
- final Path sparsePath = path.resolve("sparse");
- Files.createDirectories(sparsePath);
- createSparseFileTestCases(sparsePath);
+ return sparseEntries;
+ }
+
+ private static byte[] createGnuSparse00PaxData(
+ final Collection extends Pair> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.size", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ for (final Pair entry : sparseEntries) {
+ writePaxKeyValue("GNU.sparse.offset", entry.getLeft(), writer);
+ writePaxKeyValue("GNU.sparse.numbytes", entry.getRight(), writer);
+ }
+ }
+ return baos.toByteArray();
+ }
+
+ private static byte[] createGnuSparse01PaxData(
+ final Collection extends Pair> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.size", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ final String map = sparseEntries.stream()
+ .map(e -> e.getLeft() + "," + e.getRight())
+ .collect(Collectors.joining(","));
+ writePaxKeyValue("GNU.sparse.map", map, writer);
+ }
+ return baos.toByteArray();
+ }
+
+ private static byte[] createGnuSparse1EntriesData(final Collection extends Pair> sparseEntries)
+ throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writer.printf("%d\n", sparseEntries.size());
+ for (final Pair entry : sparseEntries) {
+ writer.printf("%d\n", entry.getLeft());
+ writer.printf("%d\n", entry.getRight());
+ }
+ }
+ padTo512Bytes(baos.size(), baos);
+ return baos.toByteArray();
+ }
+
+ private static byte[] createGnuSparse1PaxData(
+ final Collection> sparseEntries, final int realSize) {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
+ writePaxKeyValue("GNU.sparse.realsize", realSize, writer);
+ writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
+ writePaxKeyValue("GNU.sparse.major", 1, writer);
+ writePaxKeyValue("GNU.sparse.minor", 0, writer);
+ }
+ return baos.toByteArray();
}
public static void createSparseFileTestCases(final Path path) throws IOException {
@@ -75,16 +130,6 @@ public static void createSparseFileTestCases(final Path path) throws IOException
gnuSparse1X(path);
}
- private static void oldGnuSparse(final Path path) throws IOException {
- final Path file = path.resolve("old-gnu-sparse.tar");
- try (OutputStream out = Files.newOutputStream(file)) {
- final byte[] data = createData(8 * 1024);
- final List> sparseEntries = createFragmentedSparseEntries(data.length);
- writeOldGnuSparseFile(sparseEntries, data, data.length, out);
- writeUstarTrailer(out);
- }
- }
-
private static void gnuSparse00(final Path path) throws IOException {
final Path file = path.resolve("gnu-sparse-00.tar");
try (OutputStream out = Files.newOutputStream(file)) {
@@ -117,47 +162,39 @@ private static void gnuSparse1X(final Path path) throws IOException {
}
}
- // Very fragmented sparse file
- private static List> createFragmentedSparseEntries(final int realSize) {
- final List> sparseEntries = new ArrayList<>();
- for (int offset = 0; offset < realSize; offset++) {
- sparseEntries.add(Pair.of(offset, 1));
+ public static void main(final String[] args) throws IOException {
+ if (args.length != 1) {
+ System.err.println("Expected one argument: output directory");
+ System.exit(1);
}
- return sparseEntries;
+ final Path path = Paths.get(args[0]);
+ if (!Files.isDirectory(path)) {
+ System.err.println("Not a directory: " + path);
+ System.exit(1);
+ }
+ // Sparse file examples
+ final Path sparsePath = path.resolve("sparse");
+ Files.createDirectories(sparsePath);
+ createSparseFileTestCases(sparsePath);
}
- private static byte[] createData(final int size) {
- final byte[] data = new byte[size];
- for (int i = 0; i < size; i++) {
- data[i] = (byte) (i % 256);
+ private static void oldGnuSparse(final Path path) throws IOException {
+ final Path file = path.resolve("old-gnu-sparse.tar");
+ try (OutputStream out = Files.newOutputStream(file)) {
+ final byte[] data = createData(8 * 1024);
+ final List> sparseEntries = createFragmentedSparseEntries(data.length);
+ writeOldGnuSparseFile(sparseEntries, data, data.length, out);
+ writeUstarTrailer(out);
}
- return data;
}
- private static void writeOldGnuSparseFile(
- final Collection> sparseEntries,
- final byte[] data,
- final int realSize,
- final OutputStream out)
- throws IOException {
- int offset = writeTarUstarHeader("sparse-file.txt", data.length, OLD_GNU_MAGIC, 'S', out);
- while (offset < 386) {
+ private static int padTo512Bytes(final int offset, final OutputStream out) throws IOException {
+ int count = offset;
+ while (count % 512 != 0) {
out.write(0);
- offset++;
- }
- // Sparse entries (24 bytes each)
- offset += writeOldGnuSparseEntries(sparseEntries, 4, out);
- // Real size (12 bytes)
- offset += writeOctalString(realSize, 12, out);
- offset = padTo512Bytes(offset, out);
- // Write extended headers
- while (!sparseEntries.isEmpty()) {
- offset += writeOldGnuSparseExtendedHeader(sparseEntries, out);
+ count++;
}
- // Write file data
- out.write(data);
- offset += data.length;
- padTo512Bytes(offset, out);
+ return count;
}
private static void writeGnuSparse0File(final byte[] data, final byte[] paxData, final OutputStream out)
@@ -201,61 +238,12 @@ private static void writeGnuSparse1File(
padTo512Bytes(offset, out);
}
- private static int writeTarUstarHeader(
- final String fileName,
- final long fileSize,
- final String magicAndVersion,
- final char typeFlag,
- final OutputStream out)
- throws IOException {
+ private static int writeOctalString(final long value, final int length, final OutputStream out) throws IOException {
int count = 0;
- // File name (100 bytes)
- count += writeString(fileName, 100, out);
- // File mode (8 bytes)
- count += writeOctalString(FILE_MODE, 8, out);
- // Owner ID (8 bytes)
- count += writeOctalString(OWNER_ID, 8, out);
- // Group ID (8 bytes)
- count += writeOctalString(GROUP_ID, 8, out);
- // File size (12 bytes)
- count += writeOctalString(fileSize, 12, out);
- // Modification timestamp (12 bytes)
- count += writeOctalString(TIMESTAMP, 12, out);
- // Checksum (8 bytes), filled with spaces for now
- count += writeString(StringUtils.repeat(' ', 7), 8, out);
- // Link indicator (1 byte)
- out.write(typeFlag);
- count++;
- // Name of linked file (100 bytes)
- count += writeString("", 100, out);
- // Magic (6 bytes) + Version (2 bytes)
- count += writeString(magicAndVersion, 8, out);
- // Owner user name (32 bytes)
- count += writeString(OWNER_NAME, 32, out);
- // Owner group name (32 bytes)
- count += writeString(GROUP_NAME, 32, out);
- // Device major number (8 bytes)
- count += writeString("", 8, out);
- // Device minor number (8 bytes)
- count += writeString("", 8, out);
- return count;
- }
-
- private static int writeOldGnuSparseExtendedHeader(
- final Iterable> sparseEntries, final OutputStream out) throws IOException {
- int offset = 0;
- offset += writeOldGnuSparseEntries(sparseEntries, 21, out);
- offset = padTo512Bytes(offset, out);
- return offset;
- }
-
- private static void writeUstarTrailer(final OutputStream out) throws IOException {
- int offset = 0;
- // 1024 bytes of zero
- while (offset < 1024) {
- out.write(0);
- offset++;
- }
+ final String s = Long.toOctalString(value);
+ count += writeString(s, length - 1, out);
+ out.write('\0');
+ return ++count;
}
private static int writeOldGnuSparseEntries(
@@ -291,44 +279,42 @@ private static int writeOldGnuSparseEntry(final int offset, final int length, fi
return count;
}
- private static byte[] createGnuSparse00PaxData(
- final Collection extends Pair> sparseEntries, final int realSize) {
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
- writePaxKeyValue("GNU.sparse.size", realSize, writer);
- writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
- for (final Pair entry : sparseEntries) {
- writePaxKeyValue("GNU.sparse.offset", entry.getLeft(), writer);
- writePaxKeyValue("GNU.sparse.numbytes", entry.getRight(), writer);
- }
- }
- return baos.toByteArray();
+ private static int writeOldGnuSparseExtendedHeader(
+ final Iterable> sparseEntries, final OutputStream out) throws IOException {
+ int offset = 0;
+ offset += writeOldGnuSparseEntries(sparseEntries, 21, out);
+ offset = padTo512Bytes(offset, out);
+ return offset;
}
- private static byte[] createGnuSparse01PaxData(
- final Collection extends Pair> sparseEntries, final int realSize) {
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
- writePaxKeyValue("GNU.sparse.size", realSize, writer);
- writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
- final String map = sparseEntries.stream()
- .map(e -> e.getLeft() + "," + e.getRight())
- .collect(Collectors.joining(","));
- writePaxKeyValue("GNU.sparse.map", map, writer);
+ private static void writeOldGnuSparseFile(
+ final Collection> sparseEntries,
+ final byte[] data,
+ final int realSize,
+ final OutputStream out)
+ throws IOException {
+ int offset = writeTarUstarHeader("sparse-file.txt", data.length, OLD_GNU_MAGIC, 'S', out);
+ while (offset < 386) {
+ out.write(0);
+ offset++;
}
- return baos.toByteArray();
+ // Sparse entries (24 bytes each)
+ offset += writeOldGnuSparseEntries(sparseEntries, 4, out);
+ // Real size (12 bytes)
+ offset += writeOctalString(realSize, 12, out);
+ offset = padTo512Bytes(offset, out);
+ // Write extended headers
+ while (!sparseEntries.isEmpty()) {
+ offset += writeOldGnuSparseExtendedHeader(sparseEntries, out);
+ }
+ // Write file data
+ out.write(data);
+ offset += data.length;
+ padTo512Bytes(offset, out);
}
- private static byte[] createGnuSparse1PaxData(
- final Collection> sparseEntries, final int realSize) {
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
- writePaxKeyValue("GNU.sparse.realsize", realSize, writer);
- writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer);
- writePaxKeyValue("GNU.sparse.major", 1, writer);
- writePaxKeyValue("GNU.sparse.minor", 0, writer);
- }
- return baos.toByteArray();
+ private static void writePaxKeyValue(final String key, final int value, final PrintWriter out) {
+ writePaxKeyValue(key, Integer.toString(value), out);
}
private static void writePaxKeyValue(final String key, final String value, final PrintWriter out) {
@@ -340,32 +326,6 @@ private static void writePaxKeyValue(final String key, final String value, final
out.print(entry);
}
- private static void writePaxKeyValue(final String key, final int value, final PrintWriter out) {
- writePaxKeyValue(key, Integer.toString(value), out);
- }
-
- private static byte[] createGnuSparse1EntriesData(final Collection extends Pair> sparseEntries)
- throws IOException {
- final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) {
- writer.printf("%d\n", sparseEntries.size());
- for (final Pair entry : sparseEntries) {
- writer.printf("%d\n", entry.getLeft());
- writer.printf("%d\n", entry.getRight());
- }
- }
- padTo512Bytes(baos.size(), baos);
- return baos.toByteArray();
- }
-
- private static int writeOctalString(final long value, final int length, final OutputStream out) throws IOException {
- int count = 0;
- final String s = Long.toOctalString(value);
- count += writeString(s, length - 1, out);
- out.write('\0');
- return ++count;
- }
-
private static int writeString(final String s, final int length, final OutputStream out) throws IOException {
final byte[] bytes = s.getBytes(US_ASCII);
out.write(bytes);
@@ -375,13 +335,53 @@ private static int writeString(final String s, final int length, final OutputStr
return length;
}
- private static int padTo512Bytes(final int offset, final OutputStream out) throws IOException {
- int count = offset;
- while (count % 512 != 0) {
+ private static int writeTarUstarHeader(
+ final String fileName,
+ final long fileSize,
+ final String magicAndVersion,
+ final char typeFlag,
+ final OutputStream out)
+ throws IOException {
+ int count = 0;
+ // File name (100 bytes)
+ count += writeString(fileName, 100, out);
+ // File mode (8 bytes)
+ count += writeOctalString(FILE_MODE, 8, out);
+ // Owner ID (8 bytes)
+ count += writeOctalString(OWNER_ID, 8, out);
+ // Group ID (8 bytes)
+ count += writeOctalString(GROUP_ID, 8, out);
+ // File size (12 bytes)
+ count += writeOctalString(fileSize, 12, out);
+ // Modification timestamp (12 bytes)
+ count += writeOctalString(TIMESTAMP, 12, out);
+ // Checksum (8 bytes), filled with spaces for now
+ count += writeString(StringUtils.repeat(' ', 7), 8, out);
+ // Link indicator (1 byte)
+ out.write(typeFlag);
+ count++;
+ // Name of linked file (100 bytes)
+ count += writeString("", 100, out);
+ // Magic (6 bytes) + Version (2 bytes)
+ count += writeString(magicAndVersion, 8, out);
+ // Owner user name (32 bytes)
+ count += writeString(OWNER_NAME, 32, out);
+ // Owner group name (32 bytes)
+ count += writeString(GROUP_NAME, 32, out);
+ // Device major number (8 bytes)
+ count += writeString("", 8, out);
+ // Device minor number (8 bytes)
+ count += writeString("", 8, out);
+ return count;
+ }
+
+ private static void writeUstarTrailer(final OutputStream out) throws IOException {
+ int offset = 0;
+ // 1024 bytes of zero
+ while (offset < 1024) {
out.write(0);
- count++;
+ offset++;
}
- return count;
}
private TestArchiveGenerator() {