diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java index 2bd6ade2b5e..57cd596ca7a 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -26,11 +26,12 @@ import java.io.ByteArrayOutputStream; import java.io.EOFException; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.SequenceInputStream; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -150,21 +151,15 @@ public static boolean matches(final byte[] signature, final int length) { /** True if stream is at EOF. */ private boolean atEof; - /** Size of the current. */ - private long entrySize; - /** How far into the entry the stream is at. */ private long entryOffset; - /** Input streams for reading sparse entries. **/ - private List sparseInputStreams; - - /** The index of current input stream being read when reading sparse entries. */ - private int currentSparseInputStreamIndex; - /** The meta-data about the current entry. */ private TarArchiveEntry currEntry; + /** The current input stream. */ + private InputStream currentInputStream; + /** The encoding of the file. */ private final ZipEncoding zipEncoding; @@ -322,6 +317,17 @@ public TarArchiveInputStream(final InputStream inputStream, final String encodin this(builder().setInputStream(inputStream).setCharset(encoding)); } + private void afterRead(final int read) throws IOException { + // Count the bytes read + count(read); + // Check for truncated entries + if (read == -1 && entryOffset < currEntry.getSize()) { + throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(), + entryOffset)); + } + entryOffset += Math.max(0, read); + } + /** * Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only * in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry. @@ -350,8 +356,7 @@ public int available() throws IOException { *

*/ private void buildSparseInputStreams() throws IOException { - currentSparseInputStreamIndex = -1; - sparseInputStreams = new ArrayList<>(); + final List sparseInputStreams = new ArrayList<>(); final List sparseHeaders = currEntry.getOrderedSparseHeaders(); // Stream doesn't need to be closed at all as it doesn't use any resources final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR @@ -377,15 +382,15 @@ private void buildSparseInputStreams() throws IOException { // @formatter:off sparseInputStreams.add(BoundedInputStream.builder() .setInputStream(in) + .setAfterRead(this::afterRead) .setMaxCount(sparseHeader.getNumbytes()) + .setPropagateClose(false) .get()); // @formatter:on } offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); } - if (!sparseInputStreams.isEmpty()) { - currentSparseInputStreamIndex = 0; - } + currentInputStream = new SequenceInputStream(Collections.enumeration(sparseInputStreams)); } /** @@ -406,10 +411,9 @@ public boolean canReadEntryData(final ArchiveEntry archiveEntry) { @Override public void close() throws IOException { // Close all the input streams in sparseInputStreams - if (sparseInputStreams != null) { - for (final InputStream inputStream : sparseInputStreams) { - inputStream.close(); - } + if (currentInputStream != null) { + currentInputStream.close(); + currentInputStream = null; } in.close(); } @@ -425,26 +429,6 @@ private void consumeRemainderOfLastBlock() throws IOException { } } - /** - * For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped - * - * @param available available bytes returned by {@link InputStream#available()}. - * @param skipped skipped bytes returned by {@link InputStream#skip()}. - * @param expected bytes expected to skip. - * @return number of bytes actually skipped. - * @throws IOException if a truncated tar archive is detected. - */ - private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException { - long actuallySkipped = skipped; - if (in instanceof FileInputStream) { - actuallySkipped = Math.min(skipped, available); - } - if (actuallySkipped != expected) { - throw new ArchiveException("Truncated TAR archive"); - } - return actuallySkipped; - } - /** * Gets the current TAR Archive Entry that this input stream is processing * @@ -509,8 +493,8 @@ public TarArchiveEntry getNextEntry() throws IOException { boolean lastWasSpecial = false; do { // If there is a current entry, skip any unread data and padding - if (currEntry != null) { - IOUtils.skip(this, Long.MAX_VALUE); // Skip to end of current entry + if (currentInputStream != null) { + IOUtils.skip(currentInputStream, Long.MAX_VALUE); // Skip to end of current entry skipRecordPadding(); // Skip padding to align to the next record } // Read the next header record @@ -525,13 +509,19 @@ public TarArchiveEntry getNextEntry() throws IOException { } // Parse the header into a new entry currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient); + // Set up the input stream for the new entry + currentInputStream = BoundedInputStream.builder() + .setInputStream(in) + .setAfterRead(this::afterRead) + .setMaxCount(currEntry.getSize()) + .setPropagateClose(false) + .get(); entryOffset = 0; - entrySize = currEntry.getSize(); lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry); if (lastWasSpecial) { // Handle PAX, GNU long name, or other special records - TarUtils.handleSpecialTarRecord(this, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, - globalSparseHeaders); + TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders, + globalPaxHeaders, globalSparseHeaders); } } while (lastWasSpecial); // Apply global and local PAX headers @@ -539,9 +529,12 @@ public TarArchiveEntry getNextEntry() throws IOException { // Handle sparse files if (currEntry.isSparse()) { if (currEntry.isOldGNUSparse()) { + // Old GNU sparse format uses extra header blocks for metadata. + // These blocks are not included in the entry’s size, so we cannot + // rely on BoundedInputStream here. readOldGNUSparse(); } else if (currEntry.isPaxGNU1XSparse()) { - currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(in, getRecordSize())); + currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentInputStream, getRecordSize())); } // sparse headers are all done reading, we need to build // sparse input streams using these sparse headers @@ -551,8 +544,6 @@ public TarArchiveEntry getNextEntry() throws IOException { if (currEntry.isDirectory() && !currEntry.getName().endsWith("/")) { currEntry.setName(currEntry.getName() + "/"); } - // Update entry size in case it changed due to PAX headers - entrySize = currEntry.getSize(); return currEntry; } @@ -664,33 +655,13 @@ public int read(final byte[] buf, final int offset, int numToRead) throws IOExce if (numToRead == 0) { return 0; } - int totalRead = 0; if (isAtEOF() || isDirectory()) { return -1; } - if (currEntry == null) { + if (currEntry == null || currentInputStream == null) { throw new IllegalStateException("No current tar entry"); } - if (entryOffset >= currEntry.getRealSize()) { - return -1; - } - numToRead = Math.min(numToRead, available()); - if (currEntry.isSparse()) { - // for sparse entries, we need to read them in another way - totalRead = readSparse(buf, offset, numToRead); - } else { - totalRead = in.read(buf, offset, numToRead); - } - if (totalRead == -1) { - if (numToRead > 0) { - throw new EOFException("Truncated TAR archive"); - } - setAtEOF(true); - } else { - count(totalRead); - entryOffset += totalRead; - } - return totalRead; + return currentInputStream.read(buf, offset, numToRead); } /** @@ -710,9 +681,6 @@ private void readOldGNUSparse() throws IOException { currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); } while (entry.isExtended()); } - // sparse headers are all done reading, we need to build - // sparse input streams using these sparse headers - buildSparseInputStreams(); } /** @@ -730,52 +698,6 @@ protected byte[] readRecord() throws IOException { return recordBuffer; } - /** - * For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored - * separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the - * length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined according to - * the sparse headers. - * - * @param buf The buffer into which to place bytes read. - * @param offset The offset at which to place bytes read. - * @param numToRead The number of bytes to read. - * @return The number of bytes read, or -1 at EOF. - * @throws IOException on error. - */ - private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException { - // if there are no actual input streams, just read from the original input stream - if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { - return in.read(buf, offset, numToRead); - } - if (currentSparseInputStreamIndex >= sparseInputStreams.size()) { - return -1; - } - final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); - final int readLen = currentInputStream.read(buf, offset, numToRead); - // if the current input stream is the last input stream, - // just return the number of bytes read from current input stream - if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) { - return readLen; - } - // if EOF of current input stream is meet, open a new input stream and recursively call read - if (readLen == -1) { - currentSparseInputStreamIndex++; - return readSparse(buf, offset, numToRead); - } - // if the rest data of current input stream is not long enough, open a new input stream - // and recursively call read - if (readLen < numToRead) { - currentSparseInputStreamIndex++; - final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen); - if (readLenOfNext == -1) { - return readLen; - } - return readLen + readLenOfNext; - } - // if the rest data of current input stream is enough(which means readLen == len), just return readLen - return readLen; - } - /** * Since we do not support marking just yet, we do nothing. */ @@ -816,21 +738,11 @@ public long skip(final long n) throws IOException { if (n <= 0 || isDirectory()) { return 0; } - final long availableOfInputStream = in.available(); - final long available = currEntry.getRealSize() - entryOffset; - final long numToSkip = Math.min(n, available); - long skipped; - if (!currEntry.isSparse()) { - skipped = IOUtils.skip(in, numToSkip); - // for non-sparse entry, we should get the bytes actually skipped bytes along with - // inputStream.available() if inputStream is instance of FileInputStream - skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip); - } else { - skipped = skipSparse(numToSkip); + if (currEntry == null || currentInputStream == null) { + throw new IllegalStateException("No current tar entry"); } - count(skipped); - entryOffset += skipped; - return skipped; + // Use Apache Commons IO to skip as it handles skipping fully + return org.apache.commons.io.IOUtils.skip(currentInputStream, n); } /** @@ -839,37 +751,15 @@ public long skip(final long n) throws IOException { * @throws IOException if a truncated tar archive is detected. */ private void skipRecordPadding() throws IOException { - if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) { - final long available = in.available(); - final long numRecords = this.entrySize / getRecordSize() + 1; - final long padding = numRecords * getRecordSize() - this.entrySize; - long skipped = IOUtils.skip(in, padding); - skipped = getActuallySkipped(available, skipped, padding); + final long entrySize = currEntry != null ? currEntry.getSize() : 0; + if (!isDirectory() && entrySize > 0 && entrySize % getRecordSize() != 0) { + final long padding = getRecordSize() - (entrySize % getRecordSize()); + final long skipped = org.apache.commons.io.IOUtils.skip(in, padding); count(skipped); - } - } - - /** - * Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest - * bytes, keep doing this until total n bytes are skipped or the input streams are all skipped - * - * @param n bytes of data to skip. - * @return actual bytes of data skipped. - * @throws IOException if an I/O error occurs. - */ - private long skipSparse(final long n) throws IOException { - if (sparseInputStreams == null || sparseInputStreams.isEmpty()) { - return in.skip(n); - } - long bytesSkipped = 0; - while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) { - final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex); - bytesSkipped += currentInputStream.skip(n - bytesSkipped); - if (bytesSkipped < n) { - currentSparseInputStreamIndex++; + if (skipped != padding) { + throw new EOFException(String.format("Truncated TAR archive: failed to skip record padding for entry '%s'", currEntry.getName())); } } - return bytesSkipped; } /** diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java index e699b1d4f45..d9cd604a610 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java @@ -22,15 +22,18 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.io.SequenceInputStream; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.ArchiveFile; @@ -39,7 +42,6 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.BoundedArchiveInputStream; -import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; import org.apache.commons.io.function.IOStream; import org.apache.commons.io.input.BoundedInputStream; @@ -50,91 +52,45 @@ */ public class TarFile implements ArchiveFile { + /** + * InputStream that reads a specific entry from the archive. + * + *

It ensures that:

+ *
    + *
  • No more than the specified number of bytes are read from the underlying channel.
  • + *
  • If the end of the entry is reached before the expected number of bytes, an {@link EOFException} is thrown.
  • + *
+ */ private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { private final SeekableByteChannel channel; - private final TarArchiveEntry entry; - - private long entryOffset; + private final long end; - private int currentSparseInputStreamIndex; - - BoundedTarEntryInputStream(final TarArchiveEntry entry, final SeekableByteChannel channel) throws IOException { - super(entry.getDataOffset(), entry.getRealSize()); - if (channel.size() - entry.getSize() < entry.getDataOffset()) { - throw new EOFException("Truncated TAR archive: entry size exceeds archive size"); - } - this.entry = entry; + BoundedTarEntryInputStream(final long start, final long remaining, final SeekableByteChannel channel) { + super(start, remaining); + this.end = start + remaining; this.channel = channel; } @Override protected int read(final long pos, final ByteBuffer buf) throws IOException { - if (entryOffset >= entry.getRealSize()) { - return -1; - } - final int totalRead; - if (entry.isSparse()) { - totalRead = readSparse(entryOffset, buf, buf.limit()); - } else { - totalRead = readArchive(pos, buf); - } + Objects.requireNonNull(buf, "ByteBuffer"); + // The caller ensures that [pos, pos + buf.remaining()] is within [start, end] + channel.position(pos); + final int totalRead = channel.read(buf); if (totalRead == -1) { - if (buf.array().length > 0) { - throw new EOFException("Truncated TAR archive"); + if (buf.remaining() > 0) { + throw new EOFException(String.format("Truncated TAR archive: expected at least %d bytes, but got only %d bytes", + end, channel.position())); } + // Marks the TarFile as having reached EOF. setAtEOF(true); } else { - entryOffset += totalRead; buf.flip(); } return totalRead; } - - private int readArchive(final long pos, final ByteBuffer buf) throws IOException { - channel.position(pos); - return channel.read(buf); - } - - private int readSparse(final long pos, final ByteBuffer buf, final int numToRead) throws IOException { - // if there are no actual input streams, just read from the original archive - final List entrySparseInputStreams = sparseInputStreams.get(entry.getName()); - if (entrySparseInputStreams == null || entrySparseInputStreams.isEmpty()) { - return readArchive(entry.getDataOffset() + pos, buf); - } - if (currentSparseInputStreamIndex >= entrySparseInputStreams.size()) { - return -1; - } - final InputStream currentInputStream = entrySparseInputStreams.get(currentSparseInputStreamIndex); - final byte[] bufArray = new byte[numToRead]; - final int readLen = currentInputStream.read(bufArray); - if (readLen != -1) { - buf.put(bufArray, 0, readLen); - } - // if the current input stream is the last input stream, - // just return the number of bytes read from current input stream - if (currentSparseInputStreamIndex == entrySparseInputStreams.size() - 1) { - return readLen; - } - // if EOF of current input stream is meet, open a new input stream and recursively call read - if (readLen == -1) { - currentSparseInputStreamIndex++; - return readSparse(pos, buf, numToRead); - } - // if the rest data of current input stream is not long enough, open a new input stream - // and recursively call read - if (readLen < numToRead) { - currentSparseInputStreamIndex++; - final int readLenOfNext = readSparse(pos + readLen, buf, numToRead - readLen); - if (readLenOfNext == -1) { - return readLen; - } - return readLen + readLenOfNext; - } - // if the rest data of current input stream is enough(which means readLen == len), just return readLen - return readLen; - } } // @formatter:off @@ -423,7 +379,7 @@ private void buildSparseInputStreams() throws IOException { // possible integer overflow throw new ArchiveException("Unreadable TAR archive, sparse block offset or length too big"); } - streams.add(new BoundedSeekableByteChannelInputStream(start, sparseHeader.getNumbytes(), archive)); + streams.add(new BoundedTarEntryInputStream(start, sparseHeader.getNumbytes(), archive)); } offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); } @@ -467,7 +423,13 @@ public List getEntries() { @Override public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { try { - return new BoundedTarEntryInputStream(entry, archive); + // Sparse entries are composed of multiple fragments: wrap them in a SequenceInputStream + if (entry.isSparse()) { + final List streams = sparseInputStreams.get(entry.getName()); + return new SequenceInputStream(streams != null ? Collections.enumeration(streams) : Collections.emptyEnumeration()); + } + // Regular entries are bounded: wrap in BoundedTarEntryInputStream to enforce size and detect premature EOF + return new BoundedTarEntryInputStream(entry.getDataOffset(), entry.getSize(), archive); } catch (final RuntimeException e) { throw new ArchiveException("Corrupted TAR archive. Can't read entry", (Throwable) e); } @@ -489,6 +451,7 @@ private TarArchiveEntry getNextTarEntry() throws IOException { final List sparseHeaders = new ArrayList<>(); // Handle special tar records boolean lastWasSpecial = false; + InputStream currentStream; do { // If there is a current entry, skip any unread data and padding if (currEntry != null) { @@ -509,22 +472,33 @@ private TarArchiveEntry getNextTarEntry() throws IOException { // Parse the header into a new entry final long position = archive.position(); currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf.array(), zipEncoding, lenient, position); + currentStream = new BoundedTarEntryInputStream(currEntry.getDataOffset(), currEntry.getSize(), archive); lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry); if (lastWasSpecial) { // Handle PAX, GNU long name, or other special records - TarUtils.handleSpecialTarRecord(getInputStream(currEntry), zipEncoding, maxEntryNameLength, currEntry, paxHeaders, sparseHeaders, - globalPaxHeaders, globalSparseHeaders); + TarUtils.handleSpecialTarRecord(currentStream, zipEncoding, maxEntryNameLength, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, + globalSparseHeaders); } } while (lastWasSpecial); // Apply global and local PAX headers TarUtils.applyPaxHeadersToEntry(currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders); // Handle sparse files if (currEntry.isSparse()) { + // These sparse formats have the sparse headers in the entry if (currEntry.isOldGNUSparse()) { + // Old GNU sparse format uses extra header blocks for metadata. + // These blocks are not included in the entry’s size, so we cannot + // rely on BoundedTarEntryInputStream here. readOldGNUSparse(); + // Reposition to the start of the entry data to correctly compute the sparse streams + currEntry.setDataOffset(archive.position()); } else if (currEntry.isPaxGNU1XSparse()) { - currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(getInputStream(currEntry), recordSize)); - currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); + final long position = archive.position(); + currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentStream, recordSize)); + // Adjust the current entry to point to the start of the sparse file data + final long sparseHeadersSize = archive.position() - position; + currEntry.setSize(currEntry.getSize() - sparseHeadersSize); + currEntry.setDataOffset(currEntry.getDataOffset() + sparseHeadersSize); } // sparse headers are all done reading, we need to build // sparse input streams using these sparse headers @@ -593,12 +567,8 @@ private void readOldGNUSparse() throws IOException { } entry = new TarArchiveSparseEntry(headerBuf.array()); currEntry.getSparseHeaders().addAll(entry.getSparseHeaders()); - currEntry.setDataOffset(currEntry.getDataOffset() + recordSize); } while (entry.isExtended()); } - // sparse headers are all done reading, we need to build - // sparse input streams using these sparse headers - buildSparseInputStreams(); } /** @@ -644,8 +614,7 @@ protected final void setAtEOF(final boolean eof) { */ private void skipRecordPadding() throws IOException { if (!isDirectory() && currEntry.getSize() > 0 && currEntry.getSize() % recordSize != 0) { - final long numRecords = currEntry.getSize() / recordSize + 1; - final long padding = numRecords * recordSize - currEntry.getSize(); + final long padding = recordSize - (currEntry.getSize() % recordSize); repositionForwardBy(padding); throwExceptionIfPositionIsNotInArchive(); } @@ -668,7 +637,7 @@ public IOStream stream() { */ private void throwExceptionIfPositionIsNotInArchive() throws IOException { if (archive.size() < archive.position()) { - throw new ArchiveException("Truncated TAR archive"); + throw new EOFException("Truncated TAR archive: archive should be at least " + archive.position() + " bytes but was " + archive.size() + " bytes"); } } diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java index c66b24a1cdf..d334af0f377 100644 --- a/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java @@ -78,15 +78,9 @@ public synchronized int read(final byte[] b, final int off, final int len) throw if (loc >= end) { return -1; } - final long maxLen = Math.min(len, end - loc); - if (maxLen <= 0) { - return 0; - } - if (off < 0 || off > b.length || maxLen > b.length - off) { - throw new IndexOutOfBoundsException("offset or len are out of bounds"); - } - - final ByteBuffer buf = ByteBuffer.wrap(b, off, (int) maxLen); + // Both len and end - loc are guaranteed to be > 0 here and at least len is <= Integer.MAX_VALUE. + final int maxLen = (int) Math.min(len, end - loc); + final ByteBuffer buf = ByteBuffer.wrap(b, off, maxLen); final int ret = read(loc, buf); if (ret > 0) { loc += ret; @@ -95,12 +89,15 @@ public synchronized int read(final byte[] b, final int off, final int len) throw } /** - * Reads content of the stream into a {@link ByteBuffer}. + * Reads bytes from this stream into the given {@link ByteBuffer}, starting at the specified position. + * + *

The caller is responsible for ensuring that the requested range + * {@code [pos, pos + buf.remaining())} lies within the valid bounds of the stream.

* - * @param pos position to start the read. - * @param buf buffer to add the read content. - * @return number of read bytes. - * @throws IOException if I/O fails. + * @param pos the position within the stream at which to begin reading + * @param buf the buffer into which bytes are read; bytes are written starting at the buffer’s current position + * @return the number of bytes read into the buffer + * @throws IOException if an I/O error occurs while reading */ protected abstract int read(long pos, ByteBuffer buf) throws IOException; } diff --git a/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java b/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java index 16ef55bc726..10fef8f1488 100644 --- a/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/MaxNameEntryLengthTest.java @@ -46,6 +46,7 @@ import org.apache.commons.io.function.IOStream; import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.function.Executable; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -111,12 +112,15 @@ static Stream testTruncatedStreams() throws IOException { static Stream testTruncatedTarFiles() throws IOException { return Stream.of( - Arguments.of(TarFile.builder() - .setMaxEntryNameLength(Integer.MAX_VALUE) - .setURI(getURI("synthetic/long-name/pax-fail.tar"))), - Arguments.of(TarFile.builder() - .setMaxEntryNameLength(Integer.MAX_VALUE) - .setURI(getURI("synthetic/long-name/gnu-fail.tar")))); + Arguments.of( + TarFile.builder().setMaxEntryNameLength(Integer.MAX_VALUE).setURI(getURI("synthetic/long-name/pax-fail.tar")), + Integer.MAX_VALUE + ), + Arguments.of( + TarFile.builder().setMaxEntryNameLength(Integer.MAX_VALUE).setURI(getURI("synthetic/long-name/gnu-fail.tar")), + SOFT_MAX_ARRAY_LENGTH + ) + ); } static Stream testValidStreams() throws IOException { @@ -175,10 +179,18 @@ void testTruncatedStreams(final ArchiveInputStream archiveInputStream, final @ParameterizedTest @MethodSource - void testTruncatedTarFiles(final TarFile.Builder tarFileBuilder) { - // Since the real size of the archive is known, the truncation is detected - // much earlier and before trying to read file names. - assertThrows(EOFException.class, () -> tarFileBuilder.get().getEntries()); + void testTruncatedTarFiles(final TarFile.Builder tarFileBuilder, final long expectedLength) { + // If the file name length exceeds available memory, the stream fails fast with MemoryLimitException. + // Otherwise, it fails with EOFException when the stream ends unexpectedly. + final Executable action = () -> tarFileBuilder.get().entries(); + if (Runtime.getRuntime().totalMemory() < expectedLength) { + final MemoryLimitException exception = assertThrows(MemoryLimitException.class, action); + final String message = exception.getMessage(); + assertNotNull(message); + assertTrue(message.contains(String.format("%,d", expectedLength)), "Message mentions expected length (" + expectedLength + "): " + message); + } else { + assertThrows(EOFException.class, action); + } } @Test diff --git a/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java new file mode 100644 index 00000000000..49c06da5172 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/TestArchiveGenerator.java @@ -0,0 +1,390 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static java.nio.charset.StandardCharsets.US_ASCII; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; + +public final class TestArchiveGenerator { + + private static final int FILE_MODE = 0100644; + private static final int GROUP_ID = 0; + private static final String GROUP_NAME = "group"; + // TAR + private static final String OLD_GNU_MAGIC = "ustar "; + private static final int OWNER_ID = 0; + private static final String OWNER_NAME = "owner"; + private static final String PAX_MAGIC = "ustar\u000000"; + private static final int TIMESTAMP = 0; + + private static byte[] createData(final int size) { + final byte[] data = new byte[size]; + for (int i = 0; i < size; i++) { + data[i] = (byte) (i % 256); + } + return data; + } + + // Very fragmented sparse file + private static List> createFragmentedSparseEntries(final int realSize) { + final List> sparseEntries = new ArrayList<>(); + for (int offset = 0; offset < realSize; offset++) { + sparseEntries.add(Pair.of(offset, 1)); + } + return sparseEntries; + } + + private static byte[] createGnuSparse00PaxData( + final Collection> sparseEntries, final int realSize) { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) { + writePaxKeyValue("GNU.sparse.size", realSize, writer); + writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer); + for (final Pair entry : sparseEntries) { + writePaxKeyValue("GNU.sparse.offset", entry.getLeft(), writer); + writePaxKeyValue("GNU.sparse.numbytes", entry.getRight(), writer); + } + } + return baos.toByteArray(); + } + + private static byte[] createGnuSparse01PaxData( + final Collection> sparseEntries, final int realSize) { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) { + writePaxKeyValue("GNU.sparse.size", realSize, writer); + writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer); + final String map = sparseEntries.stream() + .map(e -> e.getLeft() + "," + e.getRight()) + .collect(Collectors.joining(",")); + writePaxKeyValue("GNU.sparse.map", map, writer); + } + return baos.toByteArray(); + } + + private static byte[] createGnuSparse1EntriesData(final Collection> sparseEntries) + throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) { + writer.printf("%d\n", sparseEntries.size()); + for (final Pair entry : sparseEntries) { + writer.printf("%d\n", entry.getLeft()); + writer.printf("%d\n", entry.getRight()); + } + } + padTo512Bytes(baos.size(), baos); + return baos.toByteArray(); + } + + private static byte[] createGnuSparse1PaxData( + final Collection> sparseEntries, final int realSize) { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(baos, US_ASCII))) { + writePaxKeyValue("GNU.sparse.realsize", realSize, writer); + writePaxKeyValue("GNU.sparse.numblocks", sparseEntries.size(), writer); + writePaxKeyValue("GNU.sparse.major", 1, writer); + writePaxKeyValue("GNU.sparse.minor", 0, writer); + } + return baos.toByteArray(); + } + + public static void createSparseFileTestCases(final Path path) throws IOException { + if (!Files.isDirectory(path)) { + throw new IllegalArgumentException("Not a directory: " + path); + } + oldGnuSparse(path); + gnuSparse00(path); + gnuSparse01(path); + gnuSparse1X(path); + } + + private static void gnuSparse00(final Path path) throws IOException { + final Path file = path.resolve("gnu-sparse-00.tar"); + try (OutputStream out = Files.newOutputStream(file)) { + final byte[] data = createData(8 * 1024); + final List> sparseEntries = createFragmentedSparseEntries(data.length); + final byte[] paxData = createGnuSparse00PaxData(sparseEntries, data.length); + writeGnuSparse0File(data, paxData, out); + writeUstarTrailer(out); + } + } + + private static void gnuSparse01(final Path path) throws IOException { + final Path file = path.resolve("gnu-sparse-01.tar"); + try (OutputStream out = Files.newOutputStream(file)) { + final byte[] data = createData(8 * 1024); + final List> sparseEntries = createFragmentedSparseEntries(data.length); + final byte[] paxData = createGnuSparse01PaxData(sparseEntries, data.length); + writeGnuSparse0File(data, paxData, out); + writeUstarTrailer(out); + } + } + + private static void gnuSparse1X(final Path path) throws IOException { + final Path file = path.resolve("gnu-sparse-1.tar"); + try (OutputStream out = Files.newOutputStream(file)) { + final byte[] data = createData(8 * 1024); + final List> sparseEntries = createFragmentedSparseEntries(data.length); + writeGnuSparse1File(sparseEntries, data, out); + writeUstarTrailer(out); + } + } + + public static void main(final String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Expected one argument: output directory"); + System.exit(1); + } + final Path path = Paths.get(args[0]); + if (!Files.isDirectory(path)) { + System.err.println("Not a directory: " + path); + System.exit(1); + } + // Sparse file examples + final Path sparsePath = path.resolve("sparse"); + Files.createDirectories(sparsePath); + createSparseFileTestCases(sparsePath); + } + + private static void oldGnuSparse(final Path path) throws IOException { + final Path file = path.resolve("old-gnu-sparse.tar"); + try (OutputStream out = Files.newOutputStream(file)) { + final byte[] data = createData(8 * 1024); + final List> sparseEntries = createFragmentedSparseEntries(data.length); + writeOldGnuSparseFile(sparseEntries, data, data.length, out); + writeUstarTrailer(out); + } + } + + private static int padTo512Bytes(final int offset, final OutputStream out) throws IOException { + int count = offset; + while (count % 512 != 0) { + out.write(0); + count++; + } + return count; + } + + private static void writeGnuSparse0File(final byte[] data, final byte[] paxData, final OutputStream out) + throws IOException { + // PAX entry + int offset = writeTarUstarHeader("./GNUSparseFile.1/" + "sparse-file.txt", paxData.length, PAX_MAGIC, 'x', out); + offset = padTo512Bytes(offset, out); + // PAX data + out.write(paxData); + offset += paxData.length; + offset = padTo512Bytes(offset, out); + // File entry + offset += writeTarUstarHeader("sparse-file.txt", data.length, PAX_MAGIC, '0', out); + offset = padTo512Bytes(offset, out); + // File data + out.write(data); + offset += data.length; + padTo512Bytes(offset, out); + } + + private static void writeGnuSparse1File( + final Collection> sparseEntries, final byte[] data, final OutputStream out) + throws IOException { + // PAX entry + final byte[] paxData = createGnuSparse1PaxData(sparseEntries, data.length); + int offset = writeTarUstarHeader("./GNUSparseFile.1/sparse-file.txt", paxData.length, PAX_MAGIC, 'x', out); + offset = padTo512Bytes(offset, out); + // PAX data + out.write(paxData); + offset += paxData.length; + offset = padTo512Bytes(offset, out); + // File entry + final byte[] sparseEntriesData = createGnuSparse1EntriesData(sparseEntries); + offset += writeTarUstarHeader("sparse-file.txt", sparseEntriesData.length + data.length, PAX_MAGIC, '0', out); + offset = padTo512Bytes(offset, out); + // File data + out.write(sparseEntriesData); + offset += sparseEntriesData.length; + out.write(data); + offset += data.length; + padTo512Bytes(offset, out); + } + + private static int writeOctalString(final long value, final int length, final OutputStream out) throws IOException { + int count = 0; + final String s = Long.toOctalString(value); + count += writeString(s, length - 1, out); + out.write('\0'); + return ++count; + } + + private static int writeOldGnuSparseEntries( + final Iterable> sparseEntries, final int limit, final OutputStream out) + throws IOException { + int offset = 0; + int count = 0; + final Iterator> it = sparseEntries.iterator(); + while (it.hasNext()) { + if (count >= limit) { + out.write(1); // more entries follow + return ++offset; + } + final Pair entry = it.next(); + it.remove(); + count++; + offset += writeOldGnuSparseEntry(entry.getLeft(), entry.getRight(), out); + } + while (count < limit) { + // pad with empty entries + offset += writeOldGnuSparseEntry(0, 0, out); + count++; + } + out.write(0); // no more entries + return ++offset; + } + + private static int writeOldGnuSparseEntry(final int offset, final int length, final OutputStream out) + throws IOException { + int count = 0; + count += writeOctalString(offset, 12, out); + count += writeOctalString(length, 12, out); + return count; + } + + private static int writeOldGnuSparseExtendedHeader( + final Iterable> sparseEntries, final OutputStream out) throws IOException { + int offset = 0; + offset += writeOldGnuSparseEntries(sparseEntries, 21, out); + offset = padTo512Bytes(offset, out); + return offset; + } + + private static void writeOldGnuSparseFile( + final Collection> sparseEntries, + final byte[] data, + final int realSize, + final OutputStream out) + throws IOException { + int offset = writeTarUstarHeader("sparse-file.txt", data.length, OLD_GNU_MAGIC, 'S', out); + while (offset < 386) { + out.write(0); + offset++; + } + // Sparse entries (24 bytes each) + offset += writeOldGnuSparseEntries(sparseEntries, 4, out); + // Real size (12 bytes) + offset += writeOctalString(realSize, 12, out); + offset = padTo512Bytes(offset, out); + // Write extended headers + while (!sparseEntries.isEmpty()) { + offset += writeOldGnuSparseExtendedHeader(sparseEntries, out); + } + // Write file data + out.write(data); + offset += data.length; + padTo512Bytes(offset, out); + } + + private static void writePaxKeyValue(final String key, final int value, final PrintWriter out) { + writePaxKeyValue(key, Integer.toString(value), out); + } + + private static void writePaxKeyValue(final String key, final String value, final PrintWriter out) { + final String entry = ' ' + key + "=" + value + "\n"; + // Guess length: length of length + space + entry + final int length = String.valueOf(entry.length()).length() + entry.length(); + // Recompute if number of digits changes + out.print(String.valueOf(length).length() + entry.length()); + out.print(entry); + } + + private static int writeString(final String s, final int length, final OutputStream out) throws IOException { + final byte[] bytes = s.getBytes(US_ASCII); + out.write(bytes); + for (int i = bytes.length; i < length; i++) { + out.write('\0'); + } + return length; + } + + private static int writeTarUstarHeader( + final String fileName, + final long fileSize, + final String magicAndVersion, + final char typeFlag, + final OutputStream out) + throws IOException { + int count = 0; + // File name (100 bytes) + count += writeString(fileName, 100, out); + // File mode (8 bytes) + count += writeOctalString(FILE_MODE, 8, out); + // Owner ID (8 bytes) + count += writeOctalString(OWNER_ID, 8, out); + // Group ID (8 bytes) + count += writeOctalString(GROUP_ID, 8, out); + // File size (12 bytes) + count += writeOctalString(fileSize, 12, out); + // Modification timestamp (12 bytes) + count += writeOctalString(TIMESTAMP, 12, out); + // Checksum (8 bytes), filled with spaces for now + count += writeString(StringUtils.repeat(' ', 7), 8, out); + // Link indicator (1 byte) + out.write(typeFlag); + count++; + // Name of linked file (100 bytes) + count += writeString("", 100, out); + // Magic (6 bytes) + Version (2 bytes) + count += writeString(magicAndVersion, 8, out); + // Owner user name (32 bytes) + count += writeString(OWNER_NAME, 32, out); + // Owner group name (32 bytes) + count += writeString(GROUP_NAME, 32, out); + // Device major number (8 bytes) + count += writeString("", 8, out); + // Device minor number (8 bytes) + count += writeString("", 8, out); + return count; + } + + private static void writeUstarTrailer(final OutputStream out) throws IOException { + int offset = 0; + // 1024 bytes of zero + while (offset < 1024) { + out.write(0); + offset++; + } + } + + private TestArchiveGenerator() { + // hide constructor + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java index 2e03a978b29..ce39e07a89f 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assumptions.assumeFalse; @@ -35,14 +36,27 @@ import java.util.List; import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.archivers.TestArchiveGenerator; import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.DisabledOnOs; import org.junit.jupiter.api.condition.EnabledOnOs; import org.junit.jupiter.api.condition.OS; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; class SparseFilesTest extends AbstractTest { + @TempDir + private static Path tempDir; + + @BeforeAll + static void setupAll() throws IOException { + TestArchiveGenerator.createSparseFileTestCases(tempDir); + } + private void assertPaxGNUEntry(final TarArchiveEntry entry, final String suffix) { assertEquals("sparsefile-" + suffix, entry.getName()); assertEquals(TarConstants.LF_NORMAL, entry.getLinkFlag()); @@ -245,6 +259,53 @@ void testExtractSparseTarsOnWindows() throws IOException { } } + @ParameterizedTest + @ValueSource(strings = {"old-gnu-sparse.tar" , "gnu-sparse-00.tar", "gnu-sparse-01.tar", "gnu-sparse-1.tar"}) + void testMaximallyFragmentedTarFile(final String fileName) throws IOException { + final int expectedSize = 8192; + try (TarFile input = TarFile.builder().setPath(tempDir.resolve(fileName)).get()) { + final List entries = input.getEntries(); + assertEquals(1, entries.size()); + final TarArchiveEntry entry = entries.get(0); + assertNotNull(entry); + assertEquals("sparse-file.txt", entry.getName()); + + try (InputStream inputStream = input.getInputStream(entry)) { + // read the expected amount of data + final byte[] content = new byte[expectedSize]; + assertEquals(expectedSize, IOUtils.read(inputStream, content)); + // verify that the stream is at EOF + assertEquals(IOUtils.EOF, inputStream.read()); + // check content + for (int i = 0; i < content.length; i++) { + assertEquals((byte) (i % 256), content[i], "at index " + i); + } + } + } + } + + @ParameterizedTest + @ValueSource(strings = {"old-gnu-sparse.tar", "gnu-sparse-00.tar", "gnu-sparse-01.tar", "gnu-sparse-1.tar"}) + void testMaximallyFragmentedTarStream(final String fileName) throws IOException { + final int expectedSize = 8192; + try (TarArchiveInputStream input = TarArchiveInputStream.builder().setPath(tempDir.resolve(fileName)).get()) { + final TarArchiveEntry entry = input.getNextEntry(); + assertNotNull(entry); + assertEquals("sparse-file.txt", entry.getName()); + // read the expected amount of data + final byte[] content = new byte[expectedSize]; + assertEquals(expectedSize, IOUtils.read(input, content)); + // verify that the stream is at EOF + assertEquals(IOUtils.EOF, input.read()); + // check content + for (int i = 0; i < content.length; i++) { + assertEquals((byte) (i % 256), content[i], "at index " + i); + } + // check that there are no more entries + assertNull(input.getNextEntry()); + } + } + @Test void testOldGNU() throws Throwable { try (TarArchiveInputStream tin = TarArchiveInputStream.builder() diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java index add3f9464fc..ee1a677f75a 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java @@ -26,7 +26,6 @@ import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; -import java.io.EOFException; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -226,7 +225,9 @@ void testParseTarWithNonNumberPaxHeaders() { @Test void testParseTarWithSpecialPaxHeaders() { - assertThrows(EOFException.class, () -> TarFile.builder().setURI(getURI("COMPRESS-530-fail.tar")).get()); + final ArchiveException ex = assertThrows(ArchiveException.class, () -> TarFile.builder().setURI(getURI("COMPRESS-530-fail.tar")).get()); + // Parsing fails since the data starts with null bytes + assertTrue(ex.getMessage().contains("non-number")); } @Test