Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The <action> type attribute can be add,update,fix,remove.
<action type="fix" dev="ggregory" due-to="Gary Gregory">Don't loose precision while reading folders from a SevenZFile.</action>
<action type="fix" dev="ggregory" due-to="Roel van Dijk, Gary Gregory">Improve some exception messages in TarUtils and TarArchiveEntry.</action>
<action type="fix" dev="pkarwasz" due-to="Piotr P. Karwasz">SevenZFile now enforces the same folder and coder limits as the CPP implementation.</action>
<!-- FIX bzip2 -->
<!-- FIX bzip2 -->
<action type="fix" dev="ggregory" due-to="Tyler Nighswander, Gary Gregory">BZip2CompressorInputStream now throw CompressorException (a subclass of IOException) for invalid or corrupted data, providing more specific error reporting.</action>
<action type="fix" dev="pkarwasz" due-to="Tyler Nighswander, Piotr P. Karwasz">BZip2 input streams treat Huffman codes longer than 20 bits as corrupted data, matching the behavior of the reference implementation.</action>
<!-- FIX dump -->
Expand All @@ -82,7 +82,8 @@ The <action> type attribute can be add,update,fix,remove.
<action type="fix" dev="pkarwasz" due-to="Tyler Nighswander">Simplify handling of special AR records in ArArchiveInputStream.</action>
<!-- FIX arj -->
<action type="fix" dev="pkarwasz" due-to="Piotr P. Karwasz">Correct byte accounting and truncation errors in ARJ input stream.</action>
<!-- FIX unpack200 -->
<action type="fix" dev="pkarwasz" due-to="Piotr P. Karwasz">Add strict header validation in ARJ input stream and `selfExtracting` option.</action>
<!-- FIX unpack200 -->
<action type="fix" dev="ggregory" due-to="Gary Gregory, Stanislav Fort">org.apache.commons.compress.harmony.unpack200 now throws Pack200Exception, IllegalArgumentException, and IllegalStateException instead of other runtime exceptions and Error.</action>
<!-- FIX pack200 -->
<action type="fix" dev="ggregory" due-to="Gary Gregory, Igor Morgenstern">org.apache.commons.compress.harmony.pack200 now throws Pack200Exception, IllegalArgumentException, IllegalStateException, instead of other runtime exceptions and Error.</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ public class ArjArchiveInputStream extends ArchiveInputStream<ArjArchiveEntry> {
*/
public static final class Builder extends AbstractArchiveBuilder<ArjArchiveInputStream, Builder> {

private boolean selfExtracting;

private Builder() {
setCharset(ENCODING_NAME);
}
Expand All @@ -71,11 +73,43 @@ private Builder() {
public ArjArchiveInputStream get() throws IOException {
return new ArjArchiveInputStream(this);
}

/**
* Enables compatibility with self-extracting (SFX) ARJ files.
*
* <p>When {@code true}, the stream is scanned forward to locate the first
* valid ARJ main header. All bytes before that point are ignored, which
* allows reading ARJ data embedded in an executable stub.</p>
*
* <p><strong>Caveat:</strong> this lenient pre-scan can mask corruption that
* would otherwise be reported at the start of a normal {@code .arj} file.
* Enable only when you expect an SFX input.</p>
*
* <p>Default: {@code false}.</p>
*
* @param selfExtracting {@code true} if the input stream is for a self-extracting archive
* @return {@code this} instance
* @since 1.29.0
*/
public Builder setSelfExtracting(final boolean selfExtracting) {
this.selfExtracting = selfExtracting;
return asThis();
}
}

private static final String ENCODING_NAME = "CP437";
private static final int ARJ_MAGIC_1 = 0x60;
private static final int ARJ_MAGIC_2 = 0xEA;
/**
* Maximum size of the basic header, in bytes.
*
* <p>The value is taken from the reference implementation</p>
*/
private static final int MAX_BASIC_HEADER_SIZE = 2600;
/**
* Minimum size of the first header (the fixed-size part of the basic header), in bytes.
*/
private static final int MIN_FIRST_HEADER_SIZE = 30;

/**
* Creates a new builder.
Expand All @@ -98,21 +132,10 @@ public static boolean matches(final byte[] signature, final int length) {
return length >= 2 && (0xff & signature[0]) == ARJ_MAGIC_1 && (0xff & signature[1]) == ARJ_MAGIC_2;
}

private static void readExtraData(final int firstHeaderSize, final InputStream firstHeader, final LocalFileHeader localFileHeader) throws IOException {
if (firstHeaderSize >= 33) {
localFileHeader.extendedFilePosition = EndianUtils.readSwappedInteger(firstHeader);
if (firstHeaderSize >= 45) {
localFileHeader.dateTimeAccessed = EndianUtils.readSwappedInteger(firstHeader);
localFileHeader.dateTimeCreated = EndianUtils.readSwappedInteger(firstHeader);
localFileHeader.originalSizeEvenForVolumes = EndianUtils.readSwappedInteger(firstHeader);
}
}
}

private static int readUnsignedByte(InputStream in) throws IOException {
final int value = in.read();
if (value == -1) {
throw new EOFException();
throw new EOFException("Truncated ARJ archive: expected more data");
}
return value & 0xff;
}
Expand All @@ -123,7 +146,7 @@ private static int readUnsignedByte(InputStream in) throws IOException {

private ArjArchiveInputStream(final Builder builder) throws IOException {
super(builder);
mainHeader = readMainHeader();
mainHeader = readMainHeader(builder.selfExtracting);
if ((mainHeader.arjFlags & MainHeader.Flags.GARBLED) != 0) {
throw new ArchiveException("Encrypted ARJ files are unsupported");
}
Expand Down Expand Up @@ -164,6 +187,55 @@ public boolean canReadEntryData(final ArchiveEntry ae) {
return ae instanceof ArjArchiveEntry && ((ArjArchiveEntry) ae).getMethod() == LocalFileHeader.Methods.STORED;
}

/**
* Verifies the CRC32 checksum of the given data against the next four bytes read from the input stream.
*
* @param data The data to verify.
* @return true if the checksum matches, false otherwise.
* @throws EOFException If the end of the stream is reached before reading the checksum.
* @throws IOException If an I/O error occurs.
*/
@SuppressWarnings("Since15")
private boolean checkCRC32(final byte[] data) throws IOException {
final CRC32 crc32 = new CRC32();
crc32.update(data);
final long expectedCrc32 = readSwappedUnsignedInteger();
return crc32.getValue() == expectedCrc32;
}

/**
* Scans for the next valid ARJ header.
*
* @return The header bytes.
* @throws EOFException If the end of the stream is reached before a valid header is found.
* @throws IOException If an I/O error occurs.
*/
private byte[] findMainHeader() throws IOException {
byte[] basicHeaderBytes;
try {
while (true) {
int first;
int second = readUnsignedByte();
do {
first = second;
second = readUnsignedByte();
} while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
final int basicHeaderSize = readSwappedUnsignedShort();
// At least two bytes are required for the null-terminated name and comment
if (MIN_FIRST_HEADER_SIZE + 2 <= basicHeaderSize && basicHeaderSize <= MAX_BASIC_HEADER_SIZE) {
basicHeaderBytes = org.apache.commons.io.IOUtils.toByteArray(in, basicHeaderSize);
count(basicHeaderSize);
if (checkCRC32(basicHeaderBytes)) {
return basicHeaderBytes;
}
}
// CRC32 failed, continue scanning
}
} catch (EOFException e) {
throw new ArchiveException("Corrupted ARJ archive: unable to find valid main header");
}
}

/**
* Gets the archive's comment.
*
Expand Down Expand Up @@ -263,33 +335,26 @@ private String readEntryName(final InputStream dataIn) throws IOException {
* @throws IOException If an I/O error occurs.
*/
private byte[] readHeader() throws IOException {
byte[] basicHeaderBytes;
// TODO: Explain why we are scanning for a valid ARJ header
// and don't throw, when an invalid/corrupted header is found,
// which might indicate a corrupted archive.
while (true) {
int first;
int second = readUnsignedByte();
do {
first = second;
second = readUnsignedByte();
} while (first != ARJ_MAGIC_1 && second != ARJ_MAGIC_2);
final int basicHeaderSize = readSwappedUnsignedShort();
if (basicHeaderSize == 0) {
// end of archive
return null;
} else if (basicHeaderSize <= 2600) {
basicHeaderBytes = org.apache.commons.io.IOUtils.toByteArray(in, basicHeaderSize);
count(basicHeaderSize);
final long basicHeaderCrc32 = EndianUtils.readSwappedUnsignedInteger(in);
count(4);
final CRC32 crc32 = new CRC32();
crc32.update(basicHeaderBytes);
if (basicHeaderCrc32 == crc32.getValue()) {
return basicHeaderBytes;
}
}
final int first = readUnsignedByte();
final int second = readUnsignedByte();
if (first != ARJ_MAGIC_1 || second != ARJ_MAGIC_2) {
throw new ArchiveException("Corrupted ARJ archive: invalid ARJ header signature 0x%02X 0x%02X", first, second);
}
final int basicHeaderSize = readSwappedUnsignedShort();
if (basicHeaderSize == 0) {
// End of archive
return null;
}
// At least two bytes are required for the null-terminated name and comment
if (basicHeaderSize < MIN_FIRST_HEADER_SIZE + 2 || basicHeaderSize > MAX_BASIC_HEADER_SIZE) {
throw new ArchiveException("Corrupted ARJ archive: invalid ARJ header size %,d", basicHeaderSize);
}
final byte[] basicHeaderBytes = org.apache.commons.io.IOUtils.toByteArray(in, basicHeaderSize);
count(basicHeaderSize);
if (!checkCRC32(basicHeaderBytes)) {
throw new ArchiveException("Corrupted ARJ archive: invalid ARJ header CRC32 checksum");
}
return basicHeaderBytes;
}

private LocalFileHeader readLocalFileHeader() throws IOException {
Expand Down Expand Up @@ -318,8 +383,18 @@ private LocalFileHeader readLocalFileHeader() throws IOException {
localFileHeader.fileAccessMode = EndianUtils.readSwappedShort(firstHeader);
localFileHeader.firstChapter = readUnsignedByte(firstHeader);
localFileHeader.lastChapter = readUnsignedByte(firstHeader);

readExtraData(firstHeaderSize, firstHeader, localFileHeader);
// Total read (including size byte): 10 + 4 * 4 + 2 * 2 = 30 bytes

if (firstHeaderSize >= MIN_FIRST_HEADER_SIZE + 4) {
localFileHeader.extendedFilePosition = EndianUtils.readSwappedInteger(firstHeader);
// Total read (including size byte): 30 + 4 = 34 bytes
if (firstHeaderSize >= MIN_FIRST_HEADER_SIZE + 4 + 12) {
localFileHeader.dateTimeAccessed = EndianUtils.readSwappedInteger(firstHeader);
localFileHeader.dateTimeCreated = EndianUtils.readSwappedInteger(firstHeader);
localFileHeader.originalSizeEvenForVolumes = EndianUtils.readSwappedInteger(firstHeader);
// Total read (including size byte): 34 + 12 = 46 bytes
}
}
}

localFileHeader.name = readEntryName(basicHeader);
Expand All @@ -331,12 +406,8 @@ private LocalFileHeader readLocalFileHeader() throws IOException {
while ((extendedHeaderSize = readSwappedUnsignedShort()) > 0) {
final byte[] extendedHeaderBytes = org.apache.commons.io.IOUtils.toByteArray(in, extendedHeaderSize);
count(extendedHeaderSize);
final long extendedHeaderCrc32 = EndianUtils.readSwappedUnsignedInteger(in);
count(4);
final CRC32 crc32 = new CRC32();
crc32.update(extendedHeaderBytes);
if (extendedHeaderCrc32 != crc32.getValue()) {
throw new ArchiveException("Extended header CRC32 verification failure");
if (!checkCRC32(extendedHeaderBytes)) {
throw new ArchiveException("Corrupted ARJ archive: extended header CRC32 verification failure");
}
extendedHeaders.add(extendedHeaderBytes);
}
Expand All @@ -345,8 +416,8 @@ private LocalFileHeader readLocalFileHeader() throws IOException {
return localFileHeader;
}

private MainHeader readMainHeader() throws IOException {
final byte[] basicHeaderBytes = readHeader();
private MainHeader readMainHeader(final boolean selfExtracting) throws IOException {
final byte[] basicHeaderBytes = selfExtracting ? findMainHeader() : readHeader();
final MainHeader header = new MainHeader();
try (InputStream basicHeader = new ByteArrayInputStream(basicHeaderBytes)) {

Expand All @@ -368,12 +439,14 @@ private MainHeader readMainHeader() throws IOException {
header.securityEnvelopeLength = EndianUtils.readSwappedShort(firstHeader);
header.encryptionVersion = readUnsignedByte(firstHeader);
header.lastChapter = readUnsignedByte(firstHeader);
// Total read (including size byte): 10 + 4 * 4 + 2 * 2 = 30 bytes

if (firstHeaderSize >= 33) {
if (firstHeaderSize >= MIN_FIRST_HEADER_SIZE + 4) {
header.arjProtectionFactor = readUnsignedByte(firstHeader);
header.arjFlags2 = readUnsignedByte(firstHeader);
readUnsignedByte(firstHeader);
readUnsignedByte(firstHeader);
// Total read (including size byte): 30 + 4 = 34 bytes
}
}

Expand All @@ -385,12 +458,8 @@ private MainHeader readMainHeader() throws IOException {
if (extendedHeaderSize > 0) {
header.extendedHeaderBytes = org.apache.commons.io.IOUtils.toByteArray(in, extendedHeaderSize);
count(extendedHeaderSize);
final long extendedHeaderCrc32 = EndianUtils.readSwappedUnsignedInteger(in);
count(4);
final CRC32 crc32 = new CRC32();
crc32.update(header.extendedHeaderBytes);
if (extendedHeaderCrc32 != crc32.getValue()) {
throw new ArchiveException("Extended header CRC32 verification failure");
if (!checkCRC32(header.extendedHeaderBytes)) {
throw new ArchiveException("Corrupted ARJ archive: extended header CRC32 verification failure");
}
}

Expand All @@ -407,6 +476,12 @@ private ByteArrayOutputStream readString(final InputStream dataIn) throws IOExce
}
}

private long readSwappedUnsignedInteger() throws IOException {
final long value = EndianUtils.readSwappedUnsignedInteger(in);
count(4);
return value;
}

private int readSwappedUnsignedShort() throws IOException {
final int value = EndianUtils.readSwappedUnsignedShort(in);
count(2);
Expand Down
Loading