001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018 package org.apache.commons.compress.archivers.zip;
019
020 import java.io.File;
021 import java.io.IOException;
022 import java.io.InputStream;
023 import java.io.RandomAccessFile;
024 import java.util.Arrays;
025 import java.util.Collections;
026 import java.util.Comparator;
027 import java.util.Enumeration;
028 import java.util.HashMap;
029 import java.util.LinkedHashMap;
030 import java.util.Map;
031 import java.util.zip.Inflater;
032 import java.util.zip.InflaterInputStream;
033 import java.util.zip.ZipException;
034
035 /**
036 * Replacement for <code>java.util.ZipFile</code>.
037 *
038 * <p>This class adds support for file name encodings other than UTF-8
039 * (which is required to work on ZIP files created by native zip tools
040 * and is able to skip a preamble like the one found in self
041 * extracting archives. Furthermore it returns instances of
042 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
043 * instead of <code>java.util.zip.ZipEntry</code>.</p>
044 *
045 * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
046 * have to reimplement all methods anyway. Like
047 * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
048 * covers and supports compressed and uncompressed entries.</p>
049 *
050 * <p>The method signatures mimic the ones of
051 * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
052 *
053 * <ul>
054 * <li>There is no getName method.</li>
055 * <li>entries has been renamed to getEntries.</li>
056 * <li>getEntries and getEntry return
057 * <code>org.apache.commons.compress.archivers.zip.ZipArchiveEntry</code>
058 * instances.</li>
059 * <li>close is allowed to throw IOException.</li>
060 * </ul>
061 *
062 */
063 public class ZipFile {
064 private static final int HASH_SIZE = 509;
065 private static final int SHORT = 2;
066 private static final int WORD = 4;
067 static final int NIBLET_MASK = 0x0f;
068 static final int BYTE_SHIFT = 8;
069 private static final int POS_0 = 0;
070 private static final int POS_1 = 1;
071 private static final int POS_2 = 2;
072 private static final int POS_3 = 3;
073
074 /**
075 * Maps ZipArchiveEntrys to Longs, recording the offsets of the local
076 * file headers.
077 */
078 private final Map entries = new LinkedHashMap(HASH_SIZE);
079
080 /**
081 * Maps String to ZipArchiveEntrys, name -> actual entry.
082 */
083 private final Map nameMap = new HashMap(HASH_SIZE);
084
085 private static final class OffsetEntry {
086 private long headerOffset = -1;
087 private long dataOffset = -1;
088 }
089
090 /**
091 * The encoding to use for filenames and the file comment.
092 *
093 * <p>For a list of possible values see <a
094 * href="http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html">http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html</a>.
095 * Defaults to UTF-8.</p>
096 */
097 private final String encoding;
098
099 /**
100 * The zip encoding to use for filenames and the file comment.
101 */
102 private final ZipEncoding zipEncoding;
103
104 /**
105 * File name of actual source.
106 */
107 private final String archiveName;
108
109 /**
110 * The actual data source.
111 */
112 private final RandomAccessFile archive;
113
114 /**
115 * Whether to look for and use Unicode extra fields.
116 */
117 private final boolean useUnicodeExtraFields;
118
119 /**
120 * Whether the file is closed.
121 */
122 private boolean closed;
123
124 /**
125 * Opens the given file for reading, assuming "UTF8" for file names.
126 *
127 * @param f the archive.
128 *
129 * @throws IOException if an error occurs while reading the file.
130 */
131 public ZipFile(File f) throws IOException {
132 this(f, ZipEncodingHelper.UTF8);
133 }
134
135 /**
136 * Opens the given file for reading, assuming "UTF8".
137 *
138 * @param name name of the archive.
139 *
140 * @throws IOException if an error occurs while reading the file.
141 */
142 public ZipFile(String name) throws IOException {
143 this(new File(name), ZipEncodingHelper.UTF8);
144 }
145
146 /**
147 * Opens the given file for reading, assuming the specified
148 * encoding for file names, scanning unicode extra fields.
149 *
150 * @param name name of the archive.
151 * @param encoding the encoding to use for file names, use null
152 * for the platform's default encoding
153 *
154 * @throws IOException if an error occurs while reading the file.
155 */
156 public ZipFile(String name, String encoding) throws IOException {
157 this(new File(name), encoding, true);
158 }
159
160 /**
161 * Opens the given file for reading, assuming the specified
162 * encoding for file names and scanning for unicode extra fields.
163 *
164 * @param f the archive.
165 * @param encoding the encoding to use for file names, use null
166 * for the platform's default encoding
167 *
168 * @throws IOException if an error occurs while reading the file.
169 */
170 public ZipFile(File f, String encoding) throws IOException {
171 this(f, encoding, true);
172 }
173
174 /**
175 * Opens the given file for reading, assuming the specified
176 * encoding for file names.
177 *
178 * @param f the archive.
179 * @param encoding the encoding to use for file names, use null
180 * for the platform's default encoding
181 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
182 * Extra Fields (if present) to set the file names.
183 *
184 * @throws IOException if an error occurs while reading the file.
185 */
186 public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
187 throws IOException {
188 this.archiveName = f.getAbsolutePath();
189 this.encoding = encoding;
190 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
191 this.useUnicodeExtraFields = useUnicodeExtraFields;
192 archive = new RandomAccessFile(f, "r");
193 boolean success = false;
194 try {
195 Map entriesWithoutUTF8Flag = populateFromCentralDirectory();
196 resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
197 success = true;
198 } finally {
199 if (!success) {
200 try {
201 closed = true;
202 archive.close();
203 } catch (IOException e2) { // NOPMD
204 // swallow, throw the original exception instead
205 }
206 }
207 }
208 }
209
210 /**
211 * The encoding to use for filenames and the file comment.
212 *
213 * @return null if using the platform's default character encoding.
214 */
215 public String getEncoding() {
216 return encoding;
217 }
218
219 /**
220 * Closes the archive.
221 * @throws IOException if an error occurs closing the archive.
222 */
223 public void close() throws IOException {
224 // this flag is only written here and read in finalize() which
225 // can never be run in parallel.
226 // no synchronization needed.
227 closed = true;
228
229 archive.close();
230 }
231
232 /**
233 * close a zipfile quietly; throw no io fault, do nothing
234 * on a null parameter
235 * @param zipfile file to close, can be null
236 */
237 public static void closeQuietly(ZipFile zipfile) {
238 if (zipfile != null) {
239 try {
240 zipfile.close();
241 } catch (IOException e) { // NOPMD
242 //ignore, that's why the method is called "quietly"
243 }
244 }
245 }
246
247 /**
248 * Returns all entries.
249 *
250 * <p>Entries will be returned in the same order they appear
251 * within the archive's central directory.</p>
252 *
253 * @return all entries as {@link ZipArchiveEntry} instances
254 */
255 public Enumeration getEntries() {
256 return Collections.enumeration(entries.keySet());
257 }
258
259 /**
260 * Returns all entries in physical order.
261 *
262 * <p>Entries will be returned in the same order their contents
263 * appear within the archive.</p>
264 *
265 * @return all entries as {@link ZipArchiveEntry} instances
266 *
267 * @since Commons Compress 1.1
268 */
269 public Enumeration getEntriesInPhysicalOrder() {
270 Object[] allEntries = entries.keySet().toArray();
271 Arrays.sort(allEntries, OFFSET_COMPARATOR);
272 return Collections.enumeration(Arrays.asList(allEntries));
273 }
274
275 /**
276 * Returns a named entry - or <code>null</code> if no entry by
277 * that name exists.
278 * @param name name of the entry.
279 * @return the ZipArchiveEntry corresponding to the given name - or
280 * <code>null</code> if not present.
281 */
282 public ZipArchiveEntry getEntry(String name) {
283 return (ZipArchiveEntry) nameMap.get(name);
284 }
285
286 /**
287 * Whether this class is able to read the given entry.
288 *
289 * <p>May return false if it is set up to use encryption or a
290 * compression method that hasn't been implemented yet.</p>
291 * @since Apache Commons Compress 1.1
292 */
293 public boolean canReadEntryData(ZipArchiveEntry ze) {
294 return ZipUtil.canHandleEntryData(ze);
295 }
296
297 /**
298 * Returns an InputStream for reading the contents of the given entry.
299 *
300 * @param ze the entry to get the stream for.
301 * @return a stream to read the entry from.
302 * @throws IOException if unable to create an input stream from the zipenty
303 * @throws ZipException if the zipentry uses an unsupported feature
304 */
305 public InputStream getInputStream(ZipArchiveEntry ze)
306 throws IOException, ZipException {
307 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
308 if (offsetEntry == null) {
309 return null;
310 }
311 ZipUtil.checkRequestedFeatures(ze);
312 long start = offsetEntry.dataOffset;
313 BoundedInputStream bis =
314 new BoundedInputStream(start, ze.getCompressedSize());
315 switch (ze.getMethod()) {
316 case ZipArchiveEntry.STORED:
317 return bis;
318 case ZipArchiveEntry.DEFLATED:
319 bis.addDummy();
320 final Inflater inflater = new Inflater(true);
321 return new InflaterInputStream(bis, inflater) {
322 public void close() throws IOException {
323 super.close();
324 inflater.end();
325 }
326 };
327 default:
328 throw new ZipException("Found unsupported compression method "
329 + ze.getMethod());
330 }
331 }
332
333 /**
334 * Ensures that the close method of this zipfile is called when
335 * there are no more references to it.
336 * @see #close()
337 */
338 protected void finalize() throws Throwable {
339 try {
340 if (!closed) {
341 System.err.println("Cleaning up unclosed ZipFile for archive "
342 + archiveName);
343 close();
344 }
345 } finally {
346 super.finalize();
347 }
348 }
349
350 private static final int CFH_LEN =
351 /* version made by */ SHORT
352 /* version needed to extract */ + SHORT
353 /* general purpose bit flag */ + SHORT
354 /* compression method */ + SHORT
355 /* last mod file time */ + SHORT
356 /* last mod file date */ + SHORT
357 /* crc-32 */ + WORD
358 /* compressed size */ + WORD
359 /* uncompressed size */ + WORD
360 /* filename length */ + SHORT
361 /* extra field length */ + SHORT
362 /* file comment length */ + SHORT
363 /* disk number start */ + SHORT
364 /* internal file attributes */ + SHORT
365 /* external file attributes */ + WORD
366 /* relative offset of local header */ + WORD;
367
368 /**
369 * Reads the central directory of the given archive and populates
370 * the internal tables with ZipArchiveEntry instances.
371 *
372 * <p>The ZipArchiveEntrys will know all data that can be obtained from
373 * the central directory alone, but not the data that requires the
374 * local file header or additional data to be read.</p>
375 *
376 * @return a Map<ZipArchiveEntry, NameAndComment>> of
377 * zipentries that didn't have the language encoding flag set when
378 * read.
379 */
380 private Map populateFromCentralDirectory()
381 throws IOException {
382 HashMap noUTF8Flag = new HashMap();
383
384 positionAtCentralDirectory();
385
386 byte[] cfh = new byte[CFH_LEN];
387
388 byte[] signatureBytes = new byte[WORD];
389 archive.readFully(signatureBytes);
390 long sig = ZipLong.getValue(signatureBytes);
391 final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
392 if (sig != cfhSig && startsWithLocalFileHeader()) {
393 throw new IOException("central directory is empty, can't expand"
394 + " corrupt archive.");
395 }
396 while (sig == cfhSig) {
397 archive.readFully(cfh);
398 int off = 0;
399 ZipArchiveEntry ze = new ZipArchiveEntry();
400
401 int versionMadeBy = ZipShort.getValue(cfh, off);
402 off += SHORT;
403 ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
404
405 off += SHORT; // skip version info
406
407 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfh, off);
408 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
409 final ZipEncoding entryEncoding =
410 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
411 ze.setGeneralPurposeBit(gpFlag);
412
413 off += SHORT;
414
415 ze.setMethod(ZipShort.getValue(cfh, off));
416 off += SHORT;
417
418 // FIXME this is actually not very cpu cycles friendly as we are converting from
419 // dos to java while the underlying Sun implementation will convert
420 // from java to dos time for internal storage...
421 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
422 ze.setTime(time);
423 off += WORD;
424
425 ze.setCrc(ZipLong.getValue(cfh, off));
426 off += WORD;
427
428 ze.setCompressedSize(ZipLong.getValue(cfh, off));
429 off += WORD;
430
431 ze.setSize(ZipLong.getValue(cfh, off));
432 off += WORD;
433
434 int fileNameLen = ZipShort.getValue(cfh, off);
435 off += SHORT;
436
437 int extraLen = ZipShort.getValue(cfh, off);
438 off += SHORT;
439
440 int commentLen = ZipShort.getValue(cfh, off);
441 off += SHORT;
442
443 off += SHORT; // disk number
444
445 ze.setInternalAttributes(ZipShort.getValue(cfh, off));
446 off += SHORT;
447
448 ze.setExternalAttributes(ZipLong.getValue(cfh, off));
449 off += WORD;
450
451 byte[] fileName = new byte[fileNameLen];
452 archive.readFully(fileName);
453 ze.setName(entryEncoding.decode(fileName), fileName);
454
455 // LFH offset,
456 OffsetEntry offset = new OffsetEntry();
457 offset.headerOffset = ZipLong.getValue(cfh, off);
458 // data offset will be filled later
459 entries.put(ze, offset);
460
461 nameMap.put(ze.getName(), ze);
462
463 byte[] cdExtraData = new byte[extraLen];
464 archive.readFully(cdExtraData);
465 ze.setCentralDirectoryExtra(cdExtraData);
466
467 byte[] comment = new byte[commentLen];
468 archive.readFully(comment);
469 ze.setComment(entryEncoding.decode(comment));
470
471 archive.readFully(signatureBytes);
472 sig = ZipLong.getValue(signatureBytes);
473
474 if (!hasUTF8Flag && useUnicodeExtraFields) {
475 noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
476 }
477 }
478 return noUTF8Flag;
479 }
480
481 private static final int MIN_EOCD_SIZE =
482 /* end of central dir signature */ WORD
483 /* number of this disk */ + SHORT
484 /* number of the disk with the */
485 /* start of the central directory */ + SHORT
486 /* total number of entries in */
487 /* the central dir on this disk */ + SHORT
488 /* total number of entries in */
489 /* the central dir */ + SHORT
490 /* size of the central directory */ + WORD
491 /* offset of start of central */
492 /* directory with respect to */
493 /* the starting disk number */ + WORD
494 /* zipfile comment length */ + SHORT;
495
496 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
497 /* maximum length of zipfile comment */ + 0xFFFF;
498
499 private static final int CFD_LOCATOR_OFFSET =
500 /* end of central dir signature */ WORD
501 /* number of this disk */ + SHORT
502 /* number of the disk with the */
503 /* start of the central directory */ + SHORT
504 /* total number of entries in */
505 /* the central dir on this disk */ + SHORT
506 /* total number of entries in */
507 /* the central dir */ + SHORT
508 /* size of the central directory */ + WORD;
509
510 /**
511 * Searches for the "End of central dir record", parses
512 * it and positions the stream at the first central directory
513 * record.
514 */
515 private void positionAtCentralDirectory()
516 throws IOException {
517 boolean found = false;
518 long off = archive.length() - MIN_EOCD_SIZE;
519 long stopSearching = Math.max(0L, archive.length() - MAX_EOCD_SIZE);
520 if (off >= 0) {
521 archive.seek(off);
522 byte[] sig = ZipArchiveOutputStream.EOCD_SIG;
523 int curr = archive.read();
524 while (off >= stopSearching && curr != -1) {
525 if (curr == sig[POS_0]) {
526 curr = archive.read();
527 if (curr == sig[POS_1]) {
528 curr = archive.read();
529 if (curr == sig[POS_2]) {
530 curr = archive.read();
531 if (curr == sig[POS_3]) {
532 found = true;
533 break;
534 }
535 }
536 }
537 }
538 archive.seek(--off);
539 curr = archive.read();
540 }
541 }
542 if (!found) {
543 throw new ZipException("archive is not a ZIP archive");
544 }
545 archive.seek(off + CFD_LOCATOR_OFFSET);
546 byte[] cfdOffset = new byte[WORD];
547 archive.readFully(cfdOffset);
548 archive.seek(ZipLong.getValue(cfdOffset));
549 }
550
551 /**
552 * Number of bytes in local file header up to the "length of
553 * filename" entry.
554 */
555 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH =
556 /* local file header signature */ WORD
557 /* version needed to extract */ + SHORT
558 /* general purpose bit flag */ + SHORT
559 /* compression method */ + SHORT
560 /* last mod file time */ + SHORT
561 /* last mod file date */ + SHORT
562 /* crc-32 */ + WORD
563 /* compressed size */ + WORD
564 /* uncompressed size */ + WORD;
565
566 /**
567 * Walks through all recorded entries and adds the data available
568 * from the local file header.
569 *
570 * <p>Also records the offsets for the data to read from the
571 * entries.</p>
572 */
573 private void resolveLocalFileHeaderData(Map entriesWithoutUTF8Flag)
574 throws IOException {
575 Enumeration e = getEntries();
576 while (e.hasMoreElements()) {
577 ZipArchiveEntry ze = (ZipArchiveEntry) e.nextElement();
578 OffsetEntry offsetEntry = (OffsetEntry) entries.get(ze);
579 long offset = offsetEntry.headerOffset;
580 archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
581 byte[] b = new byte[SHORT];
582 archive.readFully(b);
583 int fileNameLen = ZipShort.getValue(b);
584 archive.readFully(b);
585 int extraFieldLen = ZipShort.getValue(b);
586 int lenToSkip = fileNameLen;
587 while (lenToSkip > 0) {
588 int skipped = archive.skipBytes(lenToSkip);
589 if (skipped <= 0) {
590 throw new RuntimeException("failed to skip file name in"
591 + " local file header");
592 }
593 lenToSkip -= skipped;
594 }
595 byte[] localExtraData = new byte[extraFieldLen];
596 archive.readFully(localExtraData);
597 ze.setExtra(localExtraData);
598 /*dataOffsets.put(ze,
599 new Long(offset + LFH_OFFSET_FOR_FILENAME_LENGTH
600 + SHORT + SHORT + fileNameLen + extraFieldLen));
601 */
602 offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
603 + SHORT + SHORT + fileNameLen + extraFieldLen;
604
605 if (entriesWithoutUTF8Flag.containsKey(ze)) {
606 String orig = ze.getName();
607 NameAndComment nc = (NameAndComment) entriesWithoutUTF8Flag.get(ze);
608 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name,
609 nc.comment);
610 if (!orig.equals(ze.getName())) {
611 nameMap.remove(orig);
612 nameMap.put(ze.getName(), ze);
613 }
614 }
615 }
616 }
617
618 /**
619 * Checks whether the archive starts with a LFH. If it doesn't,
620 * it may be an empty archive.
621 */
622 private boolean startsWithLocalFileHeader() throws IOException {
623 archive.seek(0);
624 final byte[] start = new byte[WORD];
625 archive.readFully(start);
626 for (int i = 0; i < start.length; i++) {
627 if (start[i] != ZipArchiveOutputStream.LFH_SIG[i]) {
628 return false;
629 }
630 }
631 return true;
632 }
633
634 /**
635 * InputStream that delegates requests to the underlying
636 * RandomAccessFile, making sure that only bytes from a certain
637 * range can be read.
638 */
639 private class BoundedInputStream extends InputStream {
640 private long remaining;
641 private long loc;
642 private boolean addDummyByte = false;
643
644 BoundedInputStream(long start, long remaining) {
645 this.remaining = remaining;
646 loc = start;
647 }
648
649 public int read() throws IOException {
650 if (remaining-- <= 0) {
651 if (addDummyByte) {
652 addDummyByte = false;
653 return 0;
654 }
655 return -1;
656 }
657 synchronized (archive) {
658 archive.seek(loc++);
659 return archive.read();
660 }
661 }
662
663 public int read(byte[] b, int off, int len) throws IOException {
664 if (remaining <= 0) {
665 if (addDummyByte) {
666 addDummyByte = false;
667 b[off] = 0;
668 return 1;
669 }
670 return -1;
671 }
672
673 if (len <= 0) {
674 return 0;
675 }
676
677 if (len > remaining) {
678 len = (int) remaining;
679 }
680 int ret = -1;
681 synchronized (archive) {
682 archive.seek(loc);
683 ret = archive.read(b, off, len);
684 }
685 if (ret > 0) {
686 loc += ret;
687 remaining -= ret;
688 }
689 return ret;
690 }
691
692 /**
693 * Inflater needs an extra dummy byte for nowrap - see
694 * Inflater's javadocs.
695 */
696 void addDummy() {
697 addDummyByte = true;
698 }
699 }
700
701 private static final class NameAndComment {
702 private final byte[] name;
703 private final byte[] comment;
704 private NameAndComment(byte[] name, byte[] comment) {
705 this.name = name;
706 this.comment = comment;
707 }
708 }
709
710 /**
711 * Compares two ZipArchiveEntries based on their offset within the archive.
712 *
713 * <p>Won't return any meaningful results if one of the entries
714 * isn't part of the archive at all.</p>
715 *
716 * @since Commons Compress 1.1
717 */
718 private final Comparator OFFSET_COMPARATOR =
719 new Comparator() {
720 public int compare(Object o1, Object o2) {
721 if (o1 == o2)
722 return 0;
723
724 ZipArchiveEntry e1 = (ZipArchiveEntry) o1;
725 ZipArchiveEntry e2 = (ZipArchiveEntry) o2;
726
727 OffsetEntry off1 = (OffsetEntry) entries.get(e1);
728 OffsetEntry off2 = (OffsetEntry) entries.get(e2);
729 if (off1 == null) {
730 return 1;
731 }
732 if (off2 == null) {
733 return -1;
734 }
735 long val = (off1.headerOffset - off2.headerOffset);
736 return val == 0 ? 0 : val < 0 ? -1 : +1;
737 }
738 };
739 }