001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.zip;
020
021 import java.io.ByteArrayInputStream;
022 import java.io.ByteArrayOutputStream;
023 import java.io.EOFException;
024 import java.io.IOException;
025 import java.io.InputStream;
026 import java.io.PushbackInputStream;
027 import java.util.zip.CRC32;
028 import java.util.zip.DataFormatException;
029 import java.util.zip.Inflater;
030 import java.util.zip.ZipException;
031
032 import org.apache.commons.compress.archivers.ArchiveEntry;
033 import org.apache.commons.compress.archivers.ArchiveInputStream;
034
035 /**
036 * Implements an input stream that can read Zip archives.
037 * <p>
038 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
039 * is not available from the header.
040 * <p>
041 * The {@link ZipFile} class is preferred when reading from files.
042 *
043 * @see ZipFile
044 * @NotThreadSafe
045 */
046 public class ZipArchiveInputStream extends ArchiveInputStream {
047
048 private static final int SHORT = 2;
049 private static final int WORD = 4;
050
051 /**
052 * The zip encoding to use for filenames and the file comment.
053 */
054 private final ZipEncoding zipEncoding;
055
056 /**
057 * Whether to look for and use Unicode extra fields.
058 */
059 private final boolean useUnicodeExtraFields;
060
061 private final InputStream in;
062
063 private final Inflater inf = new Inflater(true);
064 private final CRC32 crc = new CRC32();
065
066 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
067
068 private ZipArchiveEntry current = null;
069 private boolean closed = false;
070 private boolean hitCentralDirectory = false;
071 private int readBytesOfEntry = 0, offsetInBuffer = 0;
072 private int bytesReadFromStream = 0;
073 private int lengthOfLastRead = 0;
074 private boolean hasDataDescriptor = false;
075 private ByteArrayInputStream lastStoredEntry = null;
076
077 private boolean allowStoredEntriesWithDataDescriptor = false;
078
079 private static final int LFH_LEN = 30;
080 /*
081 local file header signature 4 bytes (0x04034b50)
082 version needed to extract 2 bytes
083 general purpose bit flag 2 bytes
084 compression method 2 bytes
085 last mod file time 2 bytes
086 last mod file date 2 bytes
087 crc-32 4 bytes
088 compressed size 4 bytes
089 uncompressed size 4 bytes
090 file name length 2 bytes
091 extra field length 2 bytes
092 */
093
094 public ZipArchiveInputStream(InputStream inputStream) {
095 this(inputStream, ZipEncodingHelper.UTF8, true);
096 }
097
098 /**
099 * @param encoding the encoding to use for file names, use null
100 * for the platform's default encoding
101 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
102 * Extra Fields (if present) to set the file names.
103 */
104 public ZipArchiveInputStream(InputStream inputStream,
105 String encoding,
106 boolean useUnicodeExtraFields) {
107 this(inputStream, encoding, useUnicodeExtraFields, false);
108 }
109
110 /**
111 * @param encoding the encoding to use for file names, use null
112 * for the platform's default encoding
113 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
114 * Extra Fields (if present) to set the file names.
115 * @param allowStoredEntriesWithDataDescriptor whether the stream
116 * will try to read STORED entries that use a data descriptor
117 * @since Apache Commons Compress 1.1
118 */
119 public ZipArchiveInputStream(InputStream inputStream,
120 String encoding,
121 boolean useUnicodeExtraFields,
122 boolean allowStoredEntriesWithDataDescriptor) {
123 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
124 this.useUnicodeExtraFields = useUnicodeExtraFields;
125 in = new PushbackInputStream(inputStream, buf.length);
126 this.allowStoredEntriesWithDataDescriptor =
127 allowStoredEntriesWithDataDescriptor;
128 }
129
130 public ZipArchiveEntry getNextZipEntry() throws IOException {
131 if (closed || hitCentralDirectory) {
132 return null;
133 }
134 if (current != null) {
135 closeEntry();
136 }
137 byte[] lfh = new byte[LFH_LEN];
138 try {
139 readFully(lfh);
140 } catch (EOFException e) {
141 return null;
142 }
143 ZipLong sig = new ZipLong(lfh);
144 if (sig.equals(ZipLong.CFH_SIG)) {
145 hitCentralDirectory = true;
146 return null;
147 }
148 if (!sig.equals(ZipLong.LFH_SIG)) {
149 return null;
150 }
151
152 int off = WORD;
153 current = new ZipArchiveEntry();
154
155 int versionMadeBy = ZipShort.getValue(lfh, off);
156 off += SHORT;
157 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
158 & ZipFile.NIBLET_MASK);
159
160 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(lfh, off);
161 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
162 final ZipEncoding entryEncoding =
163 hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
164 hasDataDescriptor = gpFlag.usesDataDescriptor();
165 current.setGeneralPurposeBit(gpFlag);
166
167 off += SHORT;
168
169 current.setMethod(ZipShort.getValue(lfh, off));
170 off += SHORT;
171
172 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
173 current.setTime(time);
174 off += WORD;
175
176 if (!hasDataDescriptor) {
177 current.setCrc(ZipLong.getValue(lfh, off));
178 off += WORD;
179
180 current.setCompressedSize(ZipLong.getValue(lfh, off));
181 off += WORD;
182
183 current.setSize(ZipLong.getValue(lfh, off));
184 off += WORD;
185 } else {
186 off += 3 * WORD;
187 }
188
189 int fileNameLen = ZipShort.getValue(lfh, off);
190
191 off += SHORT;
192
193 int extraLen = ZipShort.getValue(lfh, off);
194 off += SHORT;
195
196 byte[] fileName = new byte[fileNameLen];
197 readFully(fileName);
198 current.setName(entryEncoding.decode(fileName));
199
200 byte[] extraData = new byte[extraLen];
201 readFully(extraData);
202 current.setExtra(extraData);
203
204 if (!hasUTF8Flag && useUnicodeExtraFields) {
205 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
206 }
207 return current;
208 }
209
210 /** {@inheritDoc} */
211 public ArchiveEntry getNextEntry() throws IOException {
212 return getNextZipEntry();
213 }
214
215 /**
216 * Whether this class is able to read the given entry.
217 *
218 * <p>May return false if it is set up to use encryption or a
219 * compression method that hasn't been implemented yet.</p>
220 * @since Apache Commons Compress 1.1
221 */
222 public boolean canReadEntryData(ArchiveEntry ae) {
223 if (ae instanceof ZipArchiveEntry) {
224 ZipArchiveEntry ze = (ZipArchiveEntry) ae;
225 return ZipUtil.canHandleEntryData(ze)
226 && supportsDataDescriptorFor(ze);
227
228 }
229 return false;
230 }
231
232 public int read(byte[] buffer, int start, int length) throws IOException {
233 if (closed) {
234 throw new IOException("The stream is closed");
235 }
236 if (inf.finished() || current == null) {
237 return -1;
238 }
239
240 // avoid int overflow, check null buffer
241 if (start <= buffer.length && length >= 0 && start >= 0
242 && buffer.length - start >= length) {
243 ZipUtil.checkRequestedFeatures(current);
244 if (!supportsDataDescriptorFor(current)) {
245 throw new UnsupportedZipFeatureException(UnsupportedZipFeatureException
246 .Feature
247 .DATA_DESCRIPTOR,
248 current);
249 }
250
251 if (current.getMethod() == ZipArchiveOutputStream.STORED) {
252 if (hasDataDescriptor) {
253 if (lastStoredEntry == null) {
254 readStoredEntry();
255 }
256 return lastStoredEntry.read(buffer, start, length);
257 }
258
259 int csize = (int) current.getSize();
260 if (readBytesOfEntry >= csize) {
261 return -1;
262 }
263 if (offsetInBuffer >= lengthOfLastRead) {
264 offsetInBuffer = 0;
265 if ((lengthOfLastRead = in.read(buf)) == -1) {
266 return -1;
267 }
268 count(lengthOfLastRead);
269 bytesReadFromStream += lengthOfLastRead;
270 }
271 int toRead = length > lengthOfLastRead
272 ? lengthOfLastRead - offsetInBuffer
273 : length;
274 if ((csize - readBytesOfEntry) < toRead) {
275 toRead = csize - readBytesOfEntry;
276 }
277 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
278 offsetInBuffer += toRead;
279 readBytesOfEntry += toRead;
280 crc.update(buffer, start, toRead);
281 return toRead;
282 }
283
284 if (inf.needsInput()) {
285 fill();
286 if (lengthOfLastRead > 0) {
287 bytesReadFromStream += lengthOfLastRead;
288 }
289 }
290 int read = 0;
291 try {
292 read = inf.inflate(buffer, start, length);
293 } catch (DataFormatException e) {
294 throw new ZipException(e.getMessage());
295 }
296 if (read == 0) {
297 if (inf.finished()) {
298 return -1;
299 } else if (lengthOfLastRead == -1) {
300 throw new IOException("Truncated ZIP file");
301 }
302 }
303 crc.update(buffer, start, read);
304 return read;
305 }
306 throw new ArrayIndexOutOfBoundsException();
307 }
308
309 public void close() throws IOException {
310 if (!closed) {
311 closed = true;
312 in.close();
313 }
314 }
315
316 public long skip(long value) throws IOException {
317 if (value >= 0) {
318 long skipped = 0;
319 byte[] b = new byte[1024];
320 while (skipped != value) {
321 long rem = value - skipped;
322 int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
323 if (x == -1) {
324 return skipped;
325 }
326 skipped += x;
327 }
328 return skipped;
329 }
330 throw new IllegalArgumentException();
331 }
332
333 /**
334 * Checks if the signature matches what is expected for a zip file.
335 * Does not currently handle self-extracting zips which may have arbitrary
336 * leading content.
337 *
338 * @param signature
339 * the bytes to check
340 * @param length
341 * the number of bytes to check
342 * @return true, if this stream is a zip archive stream, false otherwise
343 */
344 public static boolean matches(byte[] signature, int length) {
345 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
346 return false;
347 }
348
349 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
350 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
351 }
352
353 private static boolean checksig(byte[] signature, byte[] expected){
354 for (int i = 0; i < expected.length; i++) {
355 if (signature[i] != expected[i]) {
356 return false;
357 }
358 }
359 return true;
360 }
361
362 /**
363 * Closes the current ZIP archive entry and positions the underlying
364 * stream to the beginning of the next entry. All per-entry variables
365 * and data structures are cleared.
366 * <p>
367 * If the compressed size of this entry is included in the entry header,
368 * then any outstanding bytes are simply skipped from the underlying
369 * stream without uncompressing them. This allows an entry to be safely
370 * closed even if the compression method is unsupported.
371 * <p>
372 * In case we don't know the compressed size of this entry or have
373 * already buffered too much data from the underlying stream to support
374 * uncompression, then the uncompression process is completed and the
375 * end position of the stream is adjusted based on the result of that
376 * process.
377 *
378 * @throws IOException if an error occurs
379 */
380 private void closeEntry() throws IOException {
381 if (closed) {
382 throw new IOException("The stream is closed");
383 }
384 if (current == null) {
385 return;
386 }
387
388 // Ensure all entry bytes are read
389 if (bytesReadFromStream <= current.getCompressedSize()
390 && !hasDataDescriptor) {
391 long remaining = current.getCompressedSize() - bytesReadFromStream;
392 while (remaining > 0) {
393 long n = in.read(buf, 0, (int) Math.min(buf.length, remaining));
394 if (n < 0) {
395 throw new EOFException(
396 "Truncated ZIP entry: " + current.getName());
397 } else {
398 count(n);
399 remaining -= n;
400 }
401 }
402 } else {
403 skip(Long.MAX_VALUE);
404
405 int inB;
406 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
407 inB = inf.getTotalIn();
408 } else {
409 inB = readBytesOfEntry;
410 }
411 int diff = 0;
412
413 // Pushback any required bytes
414 if ((diff = bytesReadFromStream - inB) != 0) {
415 ((PushbackInputStream) in).unread(
416 buf, lengthOfLastRead - diff, diff);
417 pushedBackBytes(diff);
418 }
419 }
420
421 if (lastStoredEntry == null && hasDataDescriptor) {
422 readDataDescriptor();
423 }
424
425 inf.reset();
426 readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
427 lengthOfLastRead = 0;
428 crc.reset();
429 current = null;
430 lastStoredEntry = null;
431 }
432
433 private void fill() throws IOException {
434 if (closed) {
435 throw new IOException("The stream is closed");
436 }
437 if ((lengthOfLastRead = in.read(buf)) > 0) {
438 count(lengthOfLastRead);
439 inf.setInput(buf, 0, lengthOfLastRead);
440 }
441 }
442
443 private void readFully(byte[] b) throws IOException {
444 int count = 0, x = 0;
445 while (count != b.length) {
446 count += x = in.read(b, count, b.length - count);
447 if (x == -1) {
448 throw new EOFException();
449 }
450 count(x);
451 }
452 }
453
454 private void readDataDescriptor() throws IOException {
455 byte[] b = new byte[WORD];
456 readFully(b);
457 ZipLong val = new ZipLong(b);
458 if (ZipLong.DD_SIG.equals(val)) {
459 // data descriptor with signature, skip sig
460 readFully(b);
461 val = new ZipLong(b);
462 }
463 current.setCrc(val.getValue());
464 readFully(b);
465 current.setCompressedSize(new ZipLong(b).getValue());
466 readFully(b);
467 current.setSize(new ZipLong(b).getValue());
468 }
469
470 /**
471 * Whether this entry requires a data descriptor this library can work with.
472 *
473 * @return true if allowStoredEntriesWithDataDescriptor is true,
474 * the entry doesn't require any data descriptor or the method is
475 * DEFLATED.
476 */
477 private boolean supportsDataDescriptorFor(ZipArchiveEntry entry) {
478 return allowStoredEntriesWithDataDescriptor ||
479 !entry.getGeneralPurposeBit().usesDataDescriptor()
480 || entry.getMethod() == ZipArchiveEntry.DEFLATED;
481 }
482
483 /**
484 * Caches a stored entry that uses the data descriptor.
485 *
486 * <ul>
487 * <li>Reads a stored entry until the signature of a local file
488 * header, central directory header or data descriptor has been
489 * found.</li>
490 * <li>Stores all entry data in lastStoredEntry.</p>
491 * <li>Rewinds the stream to position at the data
492 * descriptor.</li>
493 * <li>reads the data descriptor</li>
494 * </ul>
495 *
496 * <p>After calling this method the entry should know its size,
497 * the entry's data is cached and the stream is positioned at the
498 * next local file or central directory header.</p>
499 */
500 private void readStoredEntry() throws IOException {
501 ByteArrayOutputStream bos = new ByteArrayOutputStream();
502 byte[] LFH = ZipLong.LFH_SIG.getBytes();
503 byte[] CFH = ZipLong.CFH_SIG.getBytes();
504 byte[] DD = ZipLong.DD_SIG.getBytes();
505 int off = 0;
506 boolean done = false;
507
508 while (!done) {
509 int r = in.read(buf, off, ZipArchiveOutputStream.BUFFER_SIZE - off);
510 if (r <= 0) {
511 // read the whole archive without ever finding a
512 // central directory
513 throw new IOException("Truncated ZIP file");
514 }
515 if (r + off < 4) {
516 // buf is too small to check for a signature, loop
517 off += r;
518 continue;
519 }
520
521 int readTooMuch = 0;
522 for (int i = 0; !done && i < r - 4; i++) {
523 if (buf[i] == LFH[0] && buf[i + 1] == LFH[1]) {
524 if ((buf[i + 2] == LFH[2] && buf[i + 3] == LFH[3])
525 || (buf[i] == CFH[2] && buf[i + 3] == CFH[3])) {
526 // found a LFH or CFH:
527 readTooMuch = off + r - i - 12 /* dd without signature */;
528 done = true;
529 }
530 else if (buf[i + 2] == DD[2] && buf[i + 3] == DD[3]) {
531 // found DD:
532 readTooMuch = off + r - i;
533 done = true;
534 }
535 if (done) {
536 // * push back bytes read in excess as well as the data
537 // descriptor
538 // * copy the remaining bytes to cache
539 // * read data descriptor
540 ((PushbackInputStream) in).unread(buf, off + r - readTooMuch, readTooMuch);
541 bos.write(buf, 0, i);
542 readDataDescriptor();
543 }
544 }
545 }
546 if (!done) {
547 // worst case we've read a data descriptor without a
548 // signature (12 bytes) plus the first three bytes of
549 // a LFH or CFH signature
550 // save the last 15 bytes in the buffer, cache
551 // anything in front of that, read on
552 if (off + r > 15) {
553 bos.write(buf, 0, off + r - 15);
554 System.arraycopy(buf, off + r - 15, buf, 0, 15);
555 off = 15;
556 } else {
557 off += r;
558 }
559 }
560 }
561
562 byte[] b = bos.toByteArray();
563 lastStoredEntry = new ByteArrayInputStream(b);
564 }
565 }