Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@

import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -150,21 +151,15 @@ public static boolean matches(final byte[] signature, final int length) {
/** True if stream is at EOF. */
private boolean atEof;

/** Size of the current. */
private long entrySize;

/** How far into the entry the stream is at. */
private long entryOffset;

/** Input streams for reading sparse entries. **/
private List<InputStream> sparseInputStreams;

/** The index of current input stream being read when reading sparse entries. */
private int currentSparseInputStreamIndex;

/** The meta-data about the current entry. */
private TarArchiveEntry currEntry;

/** The current input stream. */
private InputStream currentInputStream;

/** The encoding of the file. */
private final ZipEncoding zipEncoding;

Expand Down Expand Up @@ -322,6 +317,17 @@ public TarArchiveInputStream(final InputStream inputStream, final String encodin
this(builder().setInputStream(inputStream).setCharset(encoding));
}

private void afterRead(final int read) throws IOException {
// Count the bytes read
count(read);
// Check for truncated entries
if (read == -1 && entryOffset < currEntry.getSize()) {
throw new EOFException(String.format("Truncated TAR archive: entry '%s' expected %,d bytes, actual %,d", currEntry.getName(), currEntry.getSize(),
entryOffset));
}
entryOffset += Math.max(0, read);
}

/**
* Gets the available data that can be read from the current entry in the archive. This does not indicate how much data is left in the entire archive, only
* in the current entry. This value is determined from the entry's size header field and the amount of data already read from the current entry.
Expand Down Expand Up @@ -350,8 +356,7 @@ public int available() throws IOException {
* </p>
*/
private void buildSparseInputStreams() throws IOException {
currentSparseInputStreamIndex = -1;
sparseInputStreams = new ArrayList<>();
final List<InputStream> sparseInputStreams = new ArrayList<>();
final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders();
// Stream doesn't need to be closed at all as it doesn't use any resources
final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); // NOSONAR
Expand All @@ -377,15 +382,15 @@ private void buildSparseInputStreams() throws IOException {
// @formatter:off
sparseInputStreams.add(BoundedInputStream.builder()
.setInputStream(in)
.setAfterRead(this::afterRead)
.setMaxCount(sparseHeader.getNumbytes())
.setPropagateClose(false)
.get());
// @formatter:on
}
offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
}
if (!sparseInputStreams.isEmpty()) {
currentSparseInputStreamIndex = 0;
}
currentInputStream = new SequenceInputStream(Collections.enumeration(sparseInputStreams));
}

/**
Expand All @@ -406,10 +411,9 @@ public boolean canReadEntryData(final ArchiveEntry archiveEntry) {
@Override
public void close() throws IOException {
// Close all the input streams in sparseInputStreams
if (sparseInputStreams != null) {
for (final InputStream inputStream : sparseInputStreams) {
inputStream.close();
}
if (currentInputStream != null) {
currentInputStream.close();
currentInputStream = null;
}
in.close();
}
Expand All @@ -425,26 +429,6 @@ private void consumeRemainderOfLastBlock() throws IOException {
}
}

/**
* For FileInputStream, the skip always return the number you input, so we need the available bytes to determine how many bytes are actually skipped
*
* @param available available bytes returned by {@link InputStream#available()}.
* @param skipped skipped bytes returned by {@link InputStream#skip()}.
* @param expected bytes expected to skip.
* @return number of bytes actually skipped.
* @throws IOException if a truncated tar archive is detected.
*/
private long getActuallySkipped(final long available, final long skipped, final long expected) throws IOException {
long actuallySkipped = skipped;
if (in instanceof FileInputStream) {
actuallySkipped = Math.min(skipped, available);
}
if (actuallySkipped != expected) {
throw new ArchiveException("Truncated TAR archive");
}
return actuallySkipped;
}

/**
* Gets the current TAR Archive Entry that this input stream is processing
*
Expand Down Expand Up @@ -509,8 +493,8 @@ public TarArchiveEntry getNextEntry() throws IOException {
boolean lastWasSpecial = false;
do {
// If there is a current entry, skip any unread data and padding
if (currEntry != null) {
IOUtils.skip(this, Long.MAX_VALUE); // Skip to end of current entry
if (currentInputStream != null) {
IOUtils.skip(currentInputStream, Long.MAX_VALUE); // Skip to end of current entry
skipRecordPadding(); // Skip padding to align to the next record
}
// Read the next header record
Expand All @@ -525,23 +509,32 @@ public TarArchiveEntry getNextEntry() throws IOException {
}
// Parse the header into a new entry
currEntry = new TarArchiveEntry(globalPaxHeaders, headerBuf, zipEncoding, lenient);
// Set up the input stream for the new entry
currentInputStream = BoundedInputStream.builder()
.setInputStream(in)
.setAfterRead(this::afterRead)
.setMaxCount(currEntry.getSize())
.setPropagateClose(false)
.get();
entryOffset = 0;
entrySize = currEntry.getSize();
lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry);
if (lastWasSpecial) {
// Handle PAX, GNU long name, or other special records
TarUtils.handleSpecialTarRecord(this, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders, globalPaxHeaders,
globalSparseHeaders);
TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders,
globalPaxHeaders, globalSparseHeaders);
}
} while (lastWasSpecial);
// Apply global and local PAX headers
TarUtils.applyPaxHeadersToEntry(currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, globalSparseHeaders);
// Handle sparse files
if (currEntry.isSparse()) {
if (currEntry.isOldGNUSparse()) {
// Old GNU sparse format uses extra header blocks for metadata.
// These blocks are not included in the entry’s size, so we cannot
// rely on BoundedInputStream here.
readOldGNUSparse();
} else if (currEntry.isPaxGNU1XSparse()) {
currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(in, getRecordSize()));
currEntry.setSparseHeaders(TarUtils.parsePAX1XSparseHeaders(currentInputStream, getRecordSize()));
}
// sparse headers are all done reading, we need to build
// sparse input streams using these sparse headers
Expand All @@ -551,8 +544,6 @@ public TarArchiveEntry getNextEntry() throws IOException {
if (currEntry.isDirectory() && !currEntry.getName().endsWith("/")) {
currEntry.setName(currEntry.getName() + "/");
}
// Update entry size in case it changed due to PAX headers
entrySize = currEntry.getSize();
return currEntry;
}

Expand Down Expand Up @@ -664,33 +655,13 @@ public int read(final byte[] buf, final int offset, int numToRead) throws IOExce
if (numToRead == 0) {
return 0;
}
int totalRead = 0;
if (isAtEOF() || isDirectory()) {
return -1;
}
if (currEntry == null) {
if (currEntry == null || currentInputStream == null) {
throw new IllegalStateException("No current tar entry");
}
if (entryOffset >= currEntry.getRealSize()) {
return -1;
}
numToRead = Math.min(numToRead, available());
if (currEntry.isSparse()) {
// for sparse entries, we need to read them in another way
totalRead = readSparse(buf, offset, numToRead);
} else {
totalRead = in.read(buf, offset, numToRead);
}
if (totalRead == -1) {
if (numToRead > 0) {
throw new EOFException("Truncated TAR archive");
}
setAtEOF(true);
} else {
count(totalRead);
entryOffset += totalRead;
}
return totalRead;
return currentInputStream.read(buf, offset, numToRead);
}

/**
Expand All @@ -710,9 +681,6 @@ private void readOldGNUSparse() throws IOException {
currEntry.getSparseHeaders().addAll(entry.getSparseHeaders());
} while (entry.isExtended());
}
// sparse headers are all done reading, we need to build
// sparse input streams using these sparse headers
buildSparseInputStreams();
}

/**
Expand All @@ -730,52 +698,6 @@ protected byte[] readRecord() throws IOException {
return recordBuffer;
}

/**
* For sparse tar entries, there are many "holes"(consisting of all 0) in the file. Only the non-zero data is stored in tar files, and they are stored
* separately. The structure of non-zero data is introduced by the sparse headers using the offset, where a block of non-zero data starts, and numbytes, the
* length of the non-zero data block. When reading sparse entries, the actual data is read out with "holes" and non-zero data combined according to
* the sparse headers.
*
* @param buf The buffer into which to place bytes read.
* @param offset The offset at which to place bytes read.
* @param numToRead The number of bytes to read.
* @return The number of bytes read, or -1 at EOF.
* @throws IOException on error.
*/
private int readSparse(final byte[] buf, final int offset, final int numToRead) throws IOException {
// if there are no actual input streams, just read from the original input stream
if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
return in.read(buf, offset, numToRead);
}
if (currentSparseInputStreamIndex >= sparseInputStreams.size()) {
return -1;
}
final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
final int readLen = currentInputStream.read(buf, offset, numToRead);
// if the current input stream is the last input stream,
// just return the number of bytes read from current input stream
if (currentSparseInputStreamIndex == sparseInputStreams.size() - 1) {
return readLen;
}
// if EOF of current input stream is meet, open a new input stream and recursively call read
if (readLen == -1) {
currentSparseInputStreamIndex++;
return readSparse(buf, offset, numToRead);
}
// if the rest data of current input stream is not long enough, open a new input stream
// and recursively call read
if (readLen < numToRead) {
currentSparseInputStreamIndex++;
final int readLenOfNext = readSparse(buf, offset + readLen, numToRead - readLen);
if (readLenOfNext == -1) {
return readLen;
}
return readLen + readLenOfNext;
}
// if the rest data of current input stream is enough(which means readLen == len), just return readLen
return readLen;
}

/**
* Since we do not support marking just yet, we do nothing.
*/
Expand Down Expand Up @@ -816,21 +738,11 @@ public long skip(final long n) throws IOException {
if (n <= 0 || isDirectory()) {
return 0;
}
final long availableOfInputStream = in.available();
final long available = currEntry.getRealSize() - entryOffset;
final long numToSkip = Math.min(n, available);
long skipped;
if (!currEntry.isSparse()) {
skipped = IOUtils.skip(in, numToSkip);
// for non-sparse entry, we should get the bytes actually skipped bytes along with
// inputStream.available() if inputStream is instance of FileInputStream
skipped = getActuallySkipped(availableOfInputStream, skipped, numToSkip);
} else {
skipped = skipSparse(numToSkip);
if (currEntry == null || currentInputStream == null) {
throw new IllegalStateException("No current tar entry");
}
count(skipped);
entryOffset += skipped;
return skipped;
// Use Apache Commons IO to skip as it handles skipping fully
return org.apache.commons.io.IOUtils.skip(currentInputStream, n);
}

/**
Expand All @@ -839,37 +751,15 @@ public long skip(final long n) throws IOException {
* @throws IOException if a truncated tar archive is detected.
*/
private void skipRecordPadding() throws IOException {
if (!isDirectory() && this.entrySize > 0 && this.entrySize % getRecordSize() != 0) {
final long available = in.available();
final long numRecords = this.entrySize / getRecordSize() + 1;
final long padding = numRecords * getRecordSize() - this.entrySize;
long skipped = IOUtils.skip(in, padding);
skipped = getActuallySkipped(available, skipped, padding);
final long entrySize = currEntry != null ? currEntry.getSize() : 0;
if (!isDirectory() && entrySize > 0 && entrySize % getRecordSize() != 0) {
final long padding = getRecordSize() - (entrySize % getRecordSize());
final long skipped = org.apache.commons.io.IOUtils.skip(in, padding);
count(skipped);
}
}

/**
* Skip n bytes from current input stream, if the current input stream doesn't have enough data to skip, jump to the next input stream and skip the rest
* bytes, keep doing this until total n bytes are skipped or the input streams are all skipped
*
* @param n bytes of data to skip.
* @return actual bytes of data skipped.
* @throws IOException if an I/O error occurs.
*/
private long skipSparse(final long n) throws IOException {
if (sparseInputStreams == null || sparseInputStreams.isEmpty()) {
return in.skip(n);
}
long bytesSkipped = 0;
while (bytesSkipped < n && currentSparseInputStreamIndex < sparseInputStreams.size()) {
final InputStream currentInputStream = sparseInputStreams.get(currentSparseInputStreamIndex);
bytesSkipped += currentInputStream.skip(n - bytesSkipped);
if (bytesSkipped < n) {
currentSparseInputStreamIndex++;
if (skipped != padding) {
throw new EOFException(String.format("Truncated TAR archive: failed to skip record padding for entry '%s'", currEntry.getName()));
}
}
return bytesSkipped;
}

/**
Expand Down
Loading