001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.zip;
020
021 import java.io.ByteArrayInputStream;
022 import java.io.ByteArrayOutputStream;
023 import java.io.EOFException;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.PushbackInputStream;
027 import java.util.zip.CRC32;
028 import java.util.zip.DataFormatException;
029 import java.util.zip.Inflater;
030 import java.util.zip.ZipException;
031
032 import org.apache.commons.compress.archivers.ArchiveEntry;
033 import org.apache.commons.compress.archivers.ArchiveInputStream;
034
035 /**
036 * Implements an input stream that can read Zip archives.
037 * <p>
038 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
039 * is not available from the header.
040 * <p>
041 * The {@link ZipFile} class is preferred when reading from files.
042 *
043 * @see ZipFile
044 * @NotThreadSafe
045 */
046 public class ZipArchiveInputStream extends ArchiveInputStream {
047
048 private static final int SHORT = 2;
049 private static final int WORD = 4;
050
051 /**
052 * The zip encoding to use for filenames and the file comment.
053 */
054 private final ZipEncoding zipEncoding;
055
056 /**
057 * Whether to look for and use Unicode extra fields.
058 */
059 private final boolean useUnicodeExtraFields;
060
061 private final InputStream in;
062
063 private final Inflater inf = new Inflater(true);
064 private final CRC32 crc = new CRC32();
065
066 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
067
068 private ZipArchiveEntry current = null;
069 private boolean closed = false;
070 private boolean hitCentralDirectory = false;
071 private int offsetInBuffer = 0;
072 private long readBytesOfEntry = 0, bytesReadFromStream = 0;
073 private int lengthOfLastRead = 0;
074 private boolean hasDataDescriptor = false;
075 private ByteArrayInputStream lastStoredEntry = null;
076
077 private boolean allowStoredEntriesWithDataDescriptor = false;
078
079 private static final int LFH_LEN = 30;
080 /*
081 local file header signature 4 bytes (0x04034b50)
082 version needed to extract 2 bytes
083 general purpose bit flag 2 bytes
084 compression method 2 bytes
085 last mod file time 2 bytes
086 last mod file date 2 bytes
087 crc-32 4 bytes
088 compressed size 4 bytes
089 uncompressed size 4 bytes
090 file name length 2 bytes
091 extra field length 2 bytes
092 */
093
094 public ZipArchiveInputStream(InputStream inputStream) {
095 this(inputStream, ZipEncodingHelper.UTF8, true);
096 }
097
098 /**
099 * @param encoding the encoding to use for file names, use null
100 * for the platform's default encoding
101 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
102 * Extra Fields (if present) to set the file names.
103 */
104 public ZipArchiveInputStream(InputStream inputStream,
105 String encoding,
106 boolean useUnicodeExtraFields) {
107 this(inputStream, encoding, useUnicodeExtraFields, false);
108 }
109
110 /**
111 * @param encoding the encoding to use for file names, use null
112 * for the platform's default encoding
113 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
114 * Extra Fields (if present) to set the file names.
115 * @param allowStoredEntriesWithDataDescriptor whether the stream
116 * will try to read STORED entries that use a data descriptor
117 * @since Apache Commons Compress 1.1
118 */
119 public ZipArchiveInputStream(InputStream inputStream,
120 String encoding,
121 boolean useUnicodeExtraFields,
122 boolean allowStoredEntriesWithDataDescriptor) {
123 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
124 this.useUnicodeExtraFields = useUnicodeExtraFields;
125 in = new PushbackInputStream(inputStream, buf.length);
126 this.allowStoredEntriesWithDataDescriptor =
127 allowStoredEntriesWithDataDescriptor;
128 }
129
130 public ZipArchiveEntry getNextZipEntry() throws IOException {
131 if (closed || hitCentralDirectory) {
132 return null;
133 }
134 if (current != null) {
135 closeEntry();
136 }
137 byte[] lfh = new byte[LFH_LEN];
138 try {
139 readFully(lfh);
140 } catch (EOFException e) {
141 return null;
142 }
143 ZipLong sig = new ZipLong(lfh);
144 if (sig.equals(ZipLong.CFH_SIG)) {
145 hitCentralDirectory = true;
146 return null;
147 }
148 if (!sig.equals(ZipLong.LFH_SIG)) {
149 return null;
150 }
151
152 int off = WORD;
153 current = new ZipArchiveEntry();
154
155 int versionMadeBy = ZipShort.getValue(lfh, off);
156 off += SHORT;
157 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
158 & ZipFile.NIBLET_MASK);
159
160 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off);
161 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
162 final ZipEncoding entryEncoding =
163 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
164 hasDataDescriptor = gpFlag.usesDataDescriptor();
165 current.setGeneralPurposeBit(gpFlag);
166
167 off += SHORT;
168
169 current.setMethod(ZipShort.getValue(lfh, off));
170 off += SHORT;
171
172 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
173 current.setTime(time);
174 off += WORD;
175
176 if (!hasDataDescriptor) {
177 current.setCrc(ZipLong.getValue(lfh, off));
178 off += WORD;
179
180 current.setCompressedSize(ZipLong.getValue(lfh, off));
181 off += WORD;
182
183 current.setSize(ZipLong.getValue(lfh, off));
184 off += WORD;
185 } else {
186 off += 3 * WORD;
187 }
188
189 int fileNameLen = ZipShort.getValue(lfh, off);
190
191 off += SHORT;
192
193 int extraLen = ZipShort.getValue(lfh, off);
194 off += SHORT;
195
196 byte[] fileName = new byte[fileNameLen];
197 readFully(fileName);
198 current.setName(entryEncoding.decode(fileName), fileName);
199
200 byte[] extraData = new byte[extraLen];
201 readFully(extraData);
202 current.setExtra(extraData);
203
204 if (!hasUTF8Flag && useUnicodeExtraFields) {
205 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
206 }
207 return current;
208 }
209
210 /** {@inheritDoc} */
211 public ArchiveEntry getNextEntry() throws IOException {
212 return getNextZipEntry();
213 }
214
215 /**
216 * Whether this class is able to read the given entry.
217 *
218 * <p>May return false if it is set up to use encryption or a
219 * compression method that hasn't been implemented yet.</p>
220 * @since Apache Commons Compress 1.1
221 */
222 public boolean canReadEntryData(ArchiveEntry ae) {
223 if (ae instanceof ZipArchiveEntry) {
224 ZipArchiveEntry ze = (ZipArchiveEntry) ae;
225 return ZipUtil.canHandleEntryData(ze)
226 && supportsDataDescriptorFor(ze);
227
228 }
229 return false;
230 }
231
232 public int read(byte[] buffer, int start, int length) throws IOException {
233 if (closed) {
234 throw new IOException("The stream is closed");
235 }
236 if (inf.finished() || current == null) {
237 return -1;
238 }
239
240 // avoid int overflow, check null buffer
241 if (start <= buffer.length && length >= 0 && start >= 0
242 && buffer.length - start >= length) {
243 ZipUtil.checkRequestedFeatures(current);
244 if (!supportsDataDescriptorFor(current)) {
245 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException
246 .Feature
247 .DATA_DESCRIPTOR,
248 current);
249 }
250
251 if (current.getMethod() == ZipArchiveOutputStream.STORED) {
252 if (hasDataDescriptor) {
253 if (lastStoredEntry == null) {
254 readStoredEntry();
255 }
256 return lastStoredEntry.read(buffer, start, length);
257 }
258
259 long csize = current.getSize();
260 if (readBytesOfEntry >= csize) {
261 return -1;
262 }
263 if (offsetInBuffer >= lengthOfLastRead) {
264 offsetInBuffer = 0;
265 if ((lengthOfLastRead = in.read(buf)) == -1) {
266 return -1;
267 }
268 count(lengthOfLastRead);
269 bytesReadFromStream += lengthOfLastRead;
270 }
271 int toRead = length > lengthOfLastRead
272 ? lengthOfLastRead - offsetInBuffer
273 : length;
274 if ((csize - readBytesOfEntry) < toRead) {
275 // if it is smaller than toRead then it fits into an int
276 toRead = (int) (csize - readBytesOfEntry);
277 }
278 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
279 offsetInBuffer += toRead;
280 readBytesOfEntry += toRead;
281 crc.update(buffer, start, toRead);
282 return toRead;
283 }
284
285 if (inf.needsInput()) {
286 fill();
287 if (lengthOfLastRead > 0) {
288 bytesReadFromStream += lengthOfLastRead;
289 }
290 }
291 int read = 0;
292 try {
293 read = inf.inflate(buffer, start, length);
294 } catch (DataFormatException e) {
295 throw new ZipException(e.getMessage());
296 }
297 if (read == 0) {
298 if (inf.finished()) {
299 return -1;
300 } else if (lengthOfLastRead == -1) {
301 throw new IOException("Truncated ZIP file");
302 }
303 }
304 crc.update(buffer, start, read);
305 return read;
306 }
307 throw new ArrayIndexOutOfBoundsException();
308 }
309
310 public void close() throws IOException {
311 if (!closed) {
312 closed = true;
313 in.close();
314 }
315 }
316
317 /**
318 * Skips over and discards value bytes of data from this input
319 * stream.
320 *
321 * <p>This implementation may end up skipping over some smaller
322 * number of bytes, possibly 0, if an only if it reaches the end
323 * of the underlying stream.</p>
324 *
325 * <p>The actual number of bytes skipped is returned.</p>
326 *
327 * @param value the number of bytes to be skipped.
328 * @return the actual number of bytes skipped.
329 * @throws IOException - if an I/O error occurs.
330 * @throws IllegalArgumentException - if value is negative.
331 */
332 public long skip(long value) throws IOException {
333 if (value >= 0) {
334 long skipped = 0;
335 byte[] b = new byte[1024];
336 while (skipped < value) {
337 long rem = value - skipped;
338 int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
339 if (x == -1) {
340 return skipped;
341 }
342 skipped += x;
343 }
344 return skipped;
345 }
346 throw new IllegalArgumentException();
347 }
348
349 /**
350 * Checks if the signature matches what is expected for a zip file.
351 * Does not currently handle self-extracting zips which may have arbitrary
352 * leading content.
353 *
354 * @param signature
355 * the bytes to check
356 * @param length
357 * the number of bytes to check
358 * @return true, if this stream is a zip archive stream, false otherwise
359 */
360 public static boolean matches(byte[] signature, int length) {
361 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
362 return false;
363 }
364
365 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
366 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
367 }
368
369 private static boolean checksig(byte[] signature, byte[] expected){
370 for (int i = 0; i < expected.length; i++) {
371 if (signature[i] != expected[i]) {
372 return false;
373 }
374 }
375 return true;
376 }
377
378 /**
379 * Closes the current ZIP archive entry and positions the underlying
380 * stream to the beginning of the next entry. All per-entry variables
381 * and data structures are cleared.
382 * <p>
383 * If the compressed size of this entry is included in the entry header,
384 * then any outstanding bytes are simply skipped from the underlying
385 * stream without uncompressing them. This allows an entry to be safely
386 * closed even if the compression method is unsupported.
387 * <p>
388 * In case we don't know the compressed size of this entry or have
389 * already buffered too much data from the underlying stream to support
390 * uncompression, then the uncompression process is completed and the
391 * end position of the stream is adjusted based on the result of that
392 * process.
393 *
394 * @throws IOException if an error occurs
395 */
396 private void closeEntry() throws IOException {
397 if (closed) {
398 throw new IOException("The stream is closed");
399 }
400 if (current == null) {
401 return;
402 }
403
404 // Ensure all entry bytes are read
405 if (bytesReadFromStream <= current.getCompressedSize()
406 && !hasDataDescriptor) {
407 long remaining = current.getCompressedSize() - bytesReadFromStream;
408 while (remaining > 0) {
409 long n = in.read(buf, 0, (int) Math.min(buf.length, remaining));
410 if (n < 0) {
411 throw new EOFException(
412 "Truncated ZIP entry: " + current.getName());
413 } else {
414 count(n);
415 remaining -= n;
416 }
417 }
418 } else {
419 skip(Long.MAX_VALUE);
420
421 long inB;
422 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
423 inB = ZipUtil.adjustToLong(inf.getTotalIn());
424 } else {
425 inB = readBytesOfEntry;
426 }
427
428 // this is at most a single read() operation and can't
429 // exceed the range of int
430 int diff = (int) (bytesReadFromStream - inB);
431
432 // Pushback any required bytes
433 if (diff > 0) {
434 ((PushbackInputStream) in).unread(
435 buf, lengthOfLastRead - diff, diff);
436 pushedBackBytes(diff);
437 }
438 }
439
440 if (lastStoredEntry == null && hasDataDescriptor) {
441 readDataDescriptor();
442 }
443
444 inf.reset();
445 readBytesOfEntry = bytesReadFromStream = 0L;
446 offsetInBuffer = lengthOfLastRead = 0;
447 crc.reset();
448 current = null;
449 lastStoredEntry = null;
450 }
451
452 private void fill() throws IOException {
453 if (closed) {
454 throw new IOException("The stream is closed");
455 }
456 if ((lengthOfLastRead = in.read(buf)) > 0) {
457 count(lengthOfLastRead);
458 inf.setInput(buf, 0, lengthOfLastRead);
459 }
460 }
461
462 private void readFully(byte[] b) throws IOException {
463 int count = 0, x = 0;
464 while (count != b.length) {
465 count += x = in.read(b, count, b.length - count);
466 if (x == -1) {
467 throw new EOFException();
468 }
469 count(x);
470 }
471 }
472
473 private void readDataDescriptor() throws IOException {
474 byte[] b = new byte[WORD];
475 readFully(b);
476 ZipLong val = new ZipLong(b);
477 if (ZipLong.DD_SIG.equals(val)) {
478 // data descriptor with signature, skip sig
479 readFully(b);
480 val = new ZipLong(b);
481 }
482 current.setCrc(val.getValue());
483 readFully(b);
484 current.setCompressedSize(new ZipLong(b).getValue());
485 readFully(b);
486 current.setSize(new ZipLong(b).getValue());
487 }
488
489 /**
490 * Whether this entry requires a data descriptor this library can work with.
491 *
492 * @return true if allowStoredEntriesWithDataDescriptor is true,
493 * the entry doesn't require any data descriptor or the method is
494 * DEFLATED.
495 */
496 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
497 return allowStoredEntriesWithDataDescriptor ||
498 !entry.getGeneralPurposeBit().usesDataDescriptor()
499 || entry.getMethod() == ZipArchiveEntry.DEFLATED;
500 }
501
502 /**
503 * Caches a stored entry that uses the data descriptor.
504 *
505 * <ul>
506 * <li>Reads a stored entry until the signature of a local file
507 * header, central directory header or data descriptor has been
508 * found.</li>
509 * <li>Stores all entry data in lastStoredEntry.</p>
510 * <li>Rewinds the stream to position at the data
511 * descriptor.</li>
512 * <li>reads the data descriptor</li>
513 * </ul>
514 *
515 * <p>After calling this method the entry should know its size,
516 * the entry's data is cached and the stream is positioned at the
517 * next local file or central directory header.</p>
518 */
519 private void readStoredEntry() throws IOException {
520 ByteArrayOutputStream bos = new ByteArrayOutputStream();
521 byte[] LFH = ZipLong.LFH_SIG.getBytes();
522 byte[] CFH = ZipLong.CFH_SIG.getBytes();
523 byte[] DD = ZipLong.DD_SIG.getBytes();
524 int off = 0;
525 boolean done = false;
526
527 while (!done) {
528 int r = in.read(buf, off, ZipArchiveOutputStream.BUFFER_SIZE - off);
529 if (r <= 0) {
530 // read the whole archive without ever finding a
531 // central directory
532 throw new IOException("Truncated ZIP file");
533 }
534 if (r + off < 4) {
535 // buf is too small to check for a signature, loop
536 off += r;
537 continue;
538 }
539
540 int readTooMuch = 0;
541 for (int i = 0; !done && i < r - 4; i++) {
542 if (buf[i] == LFH[0] && buf[i + 1] == LFH[1]) {
543 if ((buf[i + 2] == LFH[2] && buf[i + 3] == LFH[3])
544 || (buf[i] == CFH[2] && buf[i + 3] == CFH[3])) {
545 // found a LFH or CFH:
546 readTooMuch = off + r - i - 12 /* dd without signature */;
547 done = true;
548 }
549 else if (buf[i + 2] == DD[2] && buf[i + 3] == DD[3]) {
550 // found DD:
551 readTooMuch = off + r - i;
552 done = true;
553 }
554 if (done) {
555 // * push back bytes read in excess as well as the data
556 // descriptor
557 // * copy the remaining bytes to cache
558 // * read data descriptor
559 ((PushbackInputStream) in).unread(buf, off + r - readTooMuch, readTooMuch);
560 bos.write(buf, 0, i);
561 readDataDescriptor();
562 }
563 }
564 }
565 if (!done) {
566 // worst case we've read a data descriptor without a
567 // signature (12 bytes) plus the first three bytes of
568 // a LFH or CFH signature
569 // save the last 15 bytes in the buffer, cache
570 // anything in front of that, read on
571 if (off + r > 15) {
572 bos.write(buf, 0, off + r - 15);
573 System.arraycopy(buf, off + r - 15, buf, 0, 15);
574 off = 15;
575 } else {
576 off += r;
577 }
578 }
579 }
580
581 byte[] b = bos.toByteArray();
582 lastStoredEntry = new ByteArrayInputStream(b);
583 }
584 }