diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 85c50ea0042..4e24dfd8b86 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -63,7 +63,7 @@ The type attribute can be add,update,fix,remove. Align DUMP archive block size with Linux `dump` utility. DumpArchiveInputStream.getNextEntry() throws an ArchiveException instead of ArrayIndexOutOfBoundsException. Fix DumpArchiveInputStream to correctly handle file names up to 255 bytes #711. - + ZipArchiveInputStream.read(byte[], int, int) now throws an IOException instead of a NullPointerException. ZipFile.createBoundedInputStream(long, long) now throws an ArchiveException instead of IllegalArgumentException. ZipFile.getContentBeforeFirstLocalFileHeader() now throws an ArchiveException instead of IllegalArgumentException. @@ -71,7 +71,7 @@ The type attribute can be add,update,fix,remove. ZipArchiveInputStream.read() now throws an IOException instead of java.lang.ArrayIndexOutOfBoundsException. ZipArchiveInputStream now throws an MemoryLimitException instead of ArchiveException, or OutOfMemoryError when running with low memory settings set on the command line. ZstdCompressorInputStream closes the InputStream held by ZipArchiveInputStream garbage collection. - + >Uniform handling of special tar records in TarFile and TarArchiveInputStream. TarArchiveOutputStream now throws a IllegalArgumentException instead of an OutOfMemoryError. TarUtils.verifyCheckSum() throws an Exception when checksum could not be parsed. @@ -84,7 +84,7 @@ The type attribute can be add,update,fix,remove. org.apache.commons.compress.harmony.pack200 now throws Pack200Exception, IllegalArgumentException, IllegalStateException, instead of other runtime exceptions and Error. Extract duplicate code in org.apache.commons.compress.harmony.pack200.IntList. - + CpioArchiveEntry now throws ArchiveException instead of Arithmetic exception. CpioArchiveInputStream.getNextEntry() now throws a MemoryLimitException instead of OutOfMemoryError when it can't process input greater than available memory. CpioArchiveInputStream.readOldAsciiEntry(boolean) now throws ArchiveException instead of Arithmetic exception. @@ -126,6 +126,7 @@ The type attribute can be add,update,fix,remove. TarFile now implements IOIterable<TarArchiveEntry>. Add a builder for the TarFile class and deprecate some constructors. SevenZFile, TarFile, and ZipFile now always close underlying resources when builder or constructor fails. + Introduce an ArchiveFile abstraction to unify the APIs of SevenZFile, TarFile, and ZipFile. Bump org.apache.commons:commons-parent from 85 to 88 #707. Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.19.0. diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java new file mode 100644 index 00000000000..9958c16e6a6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.ZipFile; + +import org.apache.commons.io.function.IOIterable; +import org.apache.commons.io.function.IOIterator; +import org.apache.commons.io.function.IOStream; + +/** + * A file-based representation of an archive containing multiple {@link ArchiveEntry entries}. + * + *

This interface provides a higher-level abstraction over archive files, similar to + * {@link ZipFile}, but generalized for a variety of archive formats.

+ * + *

Implementations are {@link Closeable} and should be closed once they are no longer + * needed in order to release any underlying system resources.

+ * + * @param the type of {@link ArchiveEntry} produced by this archive + * @since 1.29.0 + */ +public interface ArchiveFile extends Closeable, IOIterable { + + /** + * Returns all entries contained in the archive as a list. + * + *

The order of entries is format-dependent but guaranteed to be consistent + * across multiple invocations on the same archive.

+ * + * @return An immutable list of all entries in this archive. + */ + default List entries() { + try (IOStream stream = stream()) { + return stream.collect(Collectors.toList()); + } + } + + /** + * Returns a sequential stream of archive entries. + * + *

The order of entries is format-dependent but stable for a given archive.

+ *

The returned stream must be closed after use to free + * associated resources.

+ * + * @return A stream of entries in this archive. + */ + IOStream stream(); + + /** + * Opens an input stream for the specified entry's contents. + * + *

The caller is responsible for closing the returned stream after use.

+ * + * @param entry The archive entry to read. + * @return An input stream providing the contents of the given entry. + * @throws IOException If an I/O error occurs while opening the entry stream. + */ + InputStream getInputStream(T entry) throws IOException; + + @Override + @SuppressWarnings("unchecked") + default IOIterator iterator() { + return IOIterator.adapt((Iterable) entries()); + } + + @Override + default Iterable unwrap() { + return asIterable(); + } +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index c282f134dbc..116d4ffa61e 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -23,7 +23,6 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.Closeable; import java.io.DataInputStream; import java.io.EOFException; import java.io.File; @@ -48,8 +47,10 @@ import org.apache.commons.compress.MemoryLimitException; import org.apache.commons.compress.archivers.AbstractArchiveBuilder; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.InputStreamStatistics; +import org.apache.commons.io.function.IOStream; import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.input.ChecksumInputStream; import org.apache.commons.lang3.ArrayUtils; @@ -77,7 +78,7 @@ * @NotThreadSafe * @since 1.6 */ -public class SevenZFile implements Closeable { +public class SevenZFile implements ArchiveFile { private static final class ArchiveStatistics { private int numberOfPackedStreams; @@ -1029,11 +1030,22 @@ public String getDefaultName() { * * @return a copy of meta-data of all archive entries. * @since 1.11 + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public Iterable getEntries() { return new ArrayList<>(Arrays.asList(archive.files)); } + /** + * {@inheritDoc} + * @since 1.29.0 + */ + @Override + public IOStream stream() { + return IOStream.of(archive.files); + } + /** * Gets an InputStream for reading the contents of the given entry. *

@@ -1045,6 +1057,7 @@ public Iterable getEntries() { * @throws IOException if unable to create an input stream from the entry * @since 1.20 */ + @Override public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { int entryIndex = -1; for (int i = 0; i < archive.files.length; i++) { diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java index f6ee3ddf556..ddd76fc1b08 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java @@ -18,7 +18,6 @@ */ package org.apache.commons.compress.archivers.tar; -import java.io.Closeable; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -28,20 +27,19 @@ import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.HashMap; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.BoundedArchiveInputStream; import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; -import org.apache.commons.io.function.IOIterable; -import org.apache.commons.io.function.IOIterator; +import org.apache.commons.io.function.IOStream; import org.apache.commons.io.input.BoundedInputStream; /** @@ -49,7 +47,7 @@ * * @since 1.21 */ -public class TarFile implements Closeable, IOIterable { +public class TarFile implements ArchiveFile { private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { @@ -226,7 +224,13 @@ private TarFile(final Builder builder) throws IOException { this.recordBuffer = ByteBuffer.allocate(this.recordSize); this.blockSize = builder.getBlockSize(); this.lenient = builder.isLenient(); - forEach(entries::add); + // Populate `entries` explicitly here instead of using `forEach`/`stream`, + // because both rely on `entries` internally. + // Using them would cause a self-referential loop and leave `entries` empty. + TarArchiveEntry entry; + while ((entry = getNextTarEntry()) != null) { + entries.add(entry); + } } catch (IOException ex) { try { this.archive.close(); @@ -442,11 +446,22 @@ private void consumeRemainderOfLastBlock() throws IOException { * Gets all TAR Archive Entries from the TarFile. * * @return All entries from the tar file. + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public List getEntries() { return new ArrayList<>(entries); } + /** + * {@inheritDoc} + * @since 1.29.0 + */ + @Override + public IOStream stream() { + return IOStream.of(entries); + } + /** * Gets the input stream for the provided Tar Archive Entry. * @@ -454,6 +469,7 @@ public List getEntries() { * @return Input stream of the provided entry. * @throws IOException Corrupted TAR archive. Can't read entry. */ + @Override public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { try { return new BoundedTarEntryInputStream(entry, archive); @@ -567,38 +583,6 @@ private boolean isEOFRecord(final ByteBuffer headerBuf) { return headerBuf == null || ArchiveUtils.isArrayZero(headerBuf.array(), recordSize); } - @Override - public IOIterator iterator() { - return new IOIterator() { - - private TarArchiveEntry next; - - @Override - public boolean hasNext() throws IOException { - if (next == null) { - next = getNextTarEntry(); - } - return next != null; - } - - @Override - public TarArchiveEntry next() throws IOException { - if (next == null) { - next = getNextTarEntry(); - } - final TarArchiveEntry tmp = next; - next = null; - return tmp; - } - - @Override - public Iterator unwrap() { - return null; - } - - }; - } - /** * Adds the sparse chunks from the current entry to the sparse chunks, including any additional sparse entries following the current entry. * @@ -704,12 +688,4 @@ private void tryToConsumeSecondEOFRecord() throws IOException { } } } - - @Override - public Iterable unwrap() { - // Commons IO 2.21.0: - // return asIterable(); - return null; - } - } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 40792f977a5..0dbca0da517 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -20,7 +20,6 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; -import java.io.Closeable; import java.io.EOFException; import java.io.File; import java.io.IOException; @@ -52,6 +51,7 @@ import org.apache.commons.compress.archivers.AbstractArchiveBuilder; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.archivers.EntryStreamOffsets; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; @@ -88,7 +88,7 @@ *

  • close is allowed to throw IOException.
  • * */ -public class ZipFile implements Closeable { +public class ZipFile implements ArchiveFile { /** * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs @@ -1135,7 +1135,9 @@ public String getEncoding() { *

    * * @return all entries as {@link ZipArchiveEntry} instances + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public Enumeration getEntries() { return Collections.enumeration(entries); } @@ -1208,6 +1210,7 @@ public long getFirstLocalFileHeaderOffset() { * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}. * @throws IOException if unable to create an input stream from the zipEntry. */ + @Override public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException { if (!(entry instanceof Entry)) { return null; @@ -1738,6 +1741,7 @@ private boolean startsWithLocalFileHeader() throws IOException { * @throws IllegalStateException if the ZIP file has been closed. * @since 1.28.0 */ + @Override public IOStream stream() { return IOStream.adapt(entries.stream()); } diff --git a/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java b/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java new file mode 100644 index 00000000000..36cc19f95e5 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.InputStream; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +import org.apache.commons.compress.AbstractTest; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.IOIterator; +import org.apache.commons.io.function.IOStream; +import org.junit.jupiter.api.Test; + +/** + * Abstract base class for tests of {@link ArchiveFile} implementations. + * + * @param The type of {@link ArchiveEntry} produced. + */ +public abstract class AbstractArchiveFileTest extends AbstractTest { + + private static ArchiveEntry newEntryUtc(String name, long size, LocalDateTime lastModified) { + return newEntry(name, size, lastModified.toInstant(ZoneOffset.UTC)); + } + + private static ArchiveEntry newEntry(String name, long size, Instant lastModified) { + return new ArchiveEntry() { + + @Override + public String getName() { + return name; + } + + @Override + public long getSize() { + return size; + } + + @Override + public boolean isDirectory() { + return false; + } + + @Override + public Date getLastModifiedDate() { + return Date.from(lastModified); + } + }; + } + + /** + * Returns an {@link ArchiveFile} to be tested. + * + * @return The archive file to be tested. + */ + protected abstract ArchiveFile getArchiveFile() throws Exception; + + /** + * Returns the expected entries in the test archive. + * + * @return The expected entries. + */ + private List getExpectedEntries() { + return Arrays.asList( + newEntryUtc("test1.xml", 610, LocalDateTime.of(2007, 11, 14, 10, 19, 2)), + newEntryUtc("test2.xml", 82, LocalDateTime.of(2007, 11, 14, 10, 19, 2))); + } + + /** + * Tests that the entries returned by {@link ArchiveFile#entries()} match the expected entries. + */ + @Test + void testEntries() throws Exception { + try (ArchiveFile archiveFile = getArchiveFile()) { + final List entries = archiveFile.entries(); + final List expectedEntries = getExpectedEntries(); + assertEquals(expectedEntries.size(), entries.size(), "Number of entries"); + for (int i = 0; i < expectedEntries.size(); i++) { + final ArchiveEntry expected = expectedEntries.get(i); + final ArchiveEntry actual = entries.get(i); + assertEquals(expected.getName(), actual.getName(), "Entry name at index " + i); + assertEquals(expected.getSize(), actual.getSize(), "Size of entry " + expected.getName()); + assertEquals( + expected.getLastModifiedDate(), + actual.getLastModifiedDate(), + "Last modified date of entry " + expected.getName()); + } + } + } + + /** + * Tests that the iterator returned by {@link ArchiveFile#iterator()} matches the expected entries. + */ + @Test + void testIterator() throws Exception { + try (ArchiveFile archiveFile = getArchiveFile()) { + final IOIterator iterator = archiveFile.iterator(); + final List entries = getExpectedEntries(); + int count = 0; + while (iterator.hasNext()) { + final ArchiveEntry expected = entries.get(count); + final ArchiveEntry actual = iterator.next(); + assertEquals(expected.getName(), actual.getName(), "Entry name at index " + count); + assertEquals(expected.getSize(), actual.getSize(), "Size of entry " + expected.getName()); + assertEquals( + expected.getLastModifiedDate(), + actual.getLastModifiedDate(), + "Last modified date of entry " + expected.getName()); + count++; + } + } + } + + /** + * Tests that the input streams returned by {@link ArchiveFile#getInputStream(ArchiveEntry)} match the expected + * entries. + */ + @Test + void testGetInputStream() throws Exception { + try (ArchiveFile archiveFile = getArchiveFile()) { + final List expectedEntries = getExpectedEntries(); + for (final ArchiveEntry expected : expectedEntries) { + final T actual = getMatchingEntry(archiveFile, expected.getName()); + assertNotNull(actual, "Entry " + expected.getName() + " not found"); + try (InputStream inputStream = archiveFile.getInputStream(actual)) { + assertNotNull(inputStream, "Input stream for entry " + expected.getName()); + final byte[] content = IOUtils.toByteArray(inputStream); + assertEquals(expected.getSize(), content.length, "Size of entry " + expected.getName()); + } + } + } + } + + private T getMatchingEntry(ArchiveFile archiveFile, String name) throws Exception { + try (IOStream stream = archiveFile.stream()) { + return stream.filter(e -> e.getName().equals(name)).findFirst().orElse(null); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java index 6df5b102f40..c3aee273f29 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -51,9 +51,9 @@ import javax.crypto.Cipher; -import org.apache.commons.compress.AbstractTest; import org.apache.commons.compress.MemoryLimitException; import org.apache.commons.compress.PasswordRequiredException; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; @@ -62,7 +62,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -class SevenZFileTest extends AbstractTest { +class SevenZFileTest extends AbstractArchiveFileTest { private static final String TEST2_CONTENT = "\r\n\r\n\r\n\t\r\n\n"; private static boolean isStrongCryptoAvailable() throws NoSuchAlgorithmException { @@ -1007,4 +1007,9 @@ void testSignatureCheck() { final byte[] data3 = { '7', 'z', (byte) 0xBC, (byte) 0xAF, 0x27, 0x1D }; assertFalse(SevenZFile.matches(data3, data3.length)); } + + @Override + protected SevenZFile getArchiveFile() throws Exception { + return SevenZFile.builder().setPath(getPath("bla.7z")).get(); + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java index 87f6766ba6e..c430df05094 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java @@ -41,15 +41,14 @@ import java.util.List; import java.util.zip.GZIPInputStream; -import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.TimeZones; import org.junit.jupiter.api.Test; -import shaded.org.apache.commons.lang3.StringUtils; - -class TarFileTest extends AbstractTest { +class TarFileTest extends AbstractArchiveFileTest { private void datePriorToEpoch(final String archive) throws Exception { try (TarFile tarFile = TarFile.builder().setURI(getURI(archive)).get()) { @@ -380,4 +379,9 @@ void testWorkaroundForBrokenTimeHeader() throws IOException { assertTrue(entry.isCheckSumOK()); } } + + @Override + protected TarFile getArchiveFile() throws Exception { + return TarFile.builder().setPath(getPath("bla.tar")).get(); + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java index c417ec823f5..841e17d8212 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java @@ -56,7 +56,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipException; -import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; @@ -73,7 +73,7 @@ import io.airlift.compress.zstd.ZstdInputStream; -class ZipFileTest extends AbstractTest { +class ZipFileTest extends AbstractArchiveFileTest { /** * This Class simulates the case where the Zip File uses the aircompressors {@link ZstdInputStream} @@ -1085,4 +1085,8 @@ void testZstdInputStreamErrorCloseWhenGc() throws Exception { } } + @Override + protected ZipFile getArchiveFile() throws Exception { + return ZipFile.builder().setPath(getPath("bla.zip")).get(); + } }