001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 */
018
019 /*
020 * This package is based on the work done by Timothy Gerard Endres
021 * (time@ice.com) to whom the Ant project is very grateful for his great code.
022 */
023
024 package org.apache.commons.compress.archivers.tar;
025
026 import java.io.IOException;
027 import java.io.InputStream;
028 import java.io.InputStreamReader;
029 import java.io.Reader;
030 import java.util.HashMap;
031 import java.util.Map;
032 import java.util.Map.Entry;
033
034 import org.apache.commons.compress.archivers.ArchiveEntry;
035 import org.apache.commons.compress.archivers.ArchiveInputStream;
036 import org.apache.commons.compress.utils.ArchiveUtils;
037
038 /**
039 * The TarInputStream reads a UNIX tar archive as an InputStream.
040 * methods are provided to position at each successive entry in
041 * the archive, and the read each entry as a normal input stream
042 * using read().
043 * @NotThreadSafe
044 */
045 public class TarArchiveInputStream extends ArchiveInputStream {
046 private static final int SMALL_BUFFER_SIZE = 256;
047 private static final int BUFFER_SIZE = 8 * 1024;
048
049 private boolean hasHitEOF;
050 private long entrySize;
051 private long entryOffset;
052 private byte[] readBuf;
053 protected final TarBuffer buffer;
054 private TarArchiveEntry currEntry;
055
056 /**
057 * Constructor for TarInputStream.
058 * @param is the input stream to use
059 */
060 public TarArchiveInputStream(InputStream is) {
061 this(is, TarBuffer.DEFAULT_BLKSIZE, TarBuffer.DEFAULT_RCDSIZE);
062 }
063
064 /**
065 * Constructor for TarInputStream.
066 * @param is the input stream to use
067 * @param blockSize the block size to use
068 */
069 public TarArchiveInputStream(InputStream is, int blockSize) {
070 this(is, blockSize, TarBuffer.DEFAULT_RCDSIZE);
071 }
072
073 /**
074 * Constructor for TarInputStream.
075 * @param is the input stream to use
076 * @param blockSize the block size to use
077 * @param recordSize the record size to use
078 */
079 public TarArchiveInputStream(InputStream is, int blockSize, int recordSize) {
080 this.buffer = new TarBuffer(is, blockSize, recordSize);
081 this.readBuf = null;
082 this.hasHitEOF = false;
083 }
084
085 /**
086 * Closes this stream. Calls the TarBuffer's close() method.
087 * @throws IOException on error
088 */
089 @Override
090 public void close() throws IOException {
091 buffer.close();
092 }
093
094 /**
095 * Get the record size being used by this stream's TarBuffer.
096 *
097 * @return The TarBuffer record size.
098 */
099 public int getRecordSize() {
100 return buffer.getRecordSize();
101 }
102
103 /**
104 * Get the available data that can be read from the current
105 * entry in the archive. This does not indicate how much data
106 * is left in the entire archive, only in the current entry.
107 * This value is determined from the entry's size header field
108 * and the amount of data already read from the current entry.
109 * Integer.MAX_VALUE is returen in case more than Integer.MAX_VALUE
110 * bytes are left in the current entry in the archive.
111 *
112 * @return The number of available bytes for the current entry.
113 * @throws IOException for signature
114 */
115 @Override
116 public int available() throws IOException {
117 if (entrySize - entryOffset > Integer.MAX_VALUE) {
118 return Integer.MAX_VALUE;
119 }
120 return (int) (entrySize - entryOffset);
121 }
122
123 /**
124 * Skip bytes in the input buffer. This skips bytes in the
125 * current entry's data, not the entire archive, and will
126 * stop at the end of the current entry's data if the number
127 * to skip extends beyond that point.
128 *
129 * @param numToSkip The number of bytes to skip.
130 * @return the number actually skipped
131 * @throws IOException on error
132 */
133 @Override
134 public long skip(long numToSkip) throws IOException {
135 // REVIEW
136 // This is horribly inefficient, but it ensures that we
137 // properly skip over bytes via the TarBuffer...
138 //
139 byte[] skipBuf = new byte[BUFFER_SIZE];
140 long skip = numToSkip;
141 while (skip > 0) {
142 int realSkip = (int) (skip > skipBuf.length ? skipBuf.length : skip);
143 int numRead = read(skipBuf, 0, realSkip);
144 if (numRead == -1) {
145 break;
146 }
147 skip -= numRead;
148 }
149 return (numToSkip - skip);
150 }
151
152 /**
153 * Since we do not support marking just yet, we do nothing.
154 */
155 @Override
156 public synchronized void reset() {
157 }
158
159 /**
160 * Get the next entry in this tar archive. This will skip
161 * over any remaining data in the current entry, if there
162 * is one, and place the input stream at the header of the
163 * next entry, and read the header and instantiate a new
164 * TarEntry from the header bytes and return that entry.
165 * If there are no more entries in the archive, null will
166 * be returned to indicate that the end of the archive has
167 * been reached.
168 *
169 * @return The next TarEntry in the archive, or null.
170 * @throws IOException on error
171 */
172 public TarArchiveEntry getNextTarEntry() throws IOException {
173 if (hasHitEOF) {
174 return null;
175 }
176
177 if (currEntry != null) {
178 long numToSkip = entrySize - entryOffset;
179
180 while (numToSkip > 0) {
181 long skipped = skip(numToSkip);
182 if (skipped <= 0) {
183 throw new RuntimeException("failed to skip current tar entry");
184 }
185 numToSkip -= skipped;
186 }
187
188 readBuf = null;
189 }
190
191 byte[] headerBuf = getRecord();
192
193 if (hasHitEOF) {
194 currEntry = null;
195 return null;
196 }
197
198 currEntry = new TarArchiveEntry(headerBuf);
199 entryOffset = 0;
200 entrySize = currEntry.getSize();
201
202 if (currEntry.isGNULongNameEntry()) {
203 // read in the name
204 StringBuffer longName = new StringBuffer();
205 byte[] buf = new byte[SMALL_BUFFER_SIZE];
206 int length = 0;
207 while ((length = read(buf)) >= 0) {
208 longName.append(new String(buf, 0, length));
209 }
210 getNextEntry();
211 if (currEntry == null) {
212 // Bugzilla: 40334
213 // Malformed tar file - long entry name not followed by entry
214 return null;
215 }
216 // remove trailing null terminator
217 if (longName.length() > 0
218 && longName.charAt(longName.length() - 1) == 0) {
219 longName.deleteCharAt(longName.length() - 1);
220 }
221 currEntry.setName(longName.toString());
222 }
223
224 if (currEntry.isPaxHeader()){ // Process Pax headers
225 paxHeaders();
226 }
227
228 if (currEntry.isGNUSparse()){ // Process sparse files
229 readGNUSparse();
230 }
231
232 return currEntry;
233 }
234
235 /**
236 * Get the next record in this tar archive. This will skip
237 * over any remaining data in the current entry, if there
238 * is one, and place the input stream at the header of the
239 * next entry.
240 * If there are no more entries in the archive, null will
241 * be returned to indicate that the end of the archive has
242 * been reached.
243 *
244 * @return The next header in the archive, or null.
245 * @throws IOException on error
246 */
247 private byte[] getRecord() throws IOException {
248 if (hasHitEOF) {
249 return null;
250 }
251
252 byte[] headerBuf = buffer.readRecord();
253
254 if (headerBuf == null) {
255 hasHitEOF = true;
256 } else if (buffer.isEOFRecord(headerBuf)) {
257 hasHitEOF = true;
258 }
259
260 return hasHitEOF ? null : headerBuf;
261 }
262
263 private void paxHeaders() throws IOException{
264 Reader br = new InputStreamReader(this, "UTF-8") {
265 @Override
266 public void close() {
267 // make sure GC doesn't close "this" before we are done
268 }
269 };
270 Map<String, String> headers = new HashMap<String, String>();
271 // Format is "length keyword=value\n";
272 try {
273 while(true){ // get length
274 int ch;
275 int len = 0;
276 int read = 0;
277 while((ch = br.read()) != -1){
278 read++;
279 if (ch == ' '){ // End of length string
280 // Get keyword
281 StringBuffer sb = new StringBuffer();
282 while((ch = br.read()) != -1){
283 read++;
284 if (ch == '='){ // end of keyword
285 String keyword = sb.toString();
286 // Get rest of entry
287 char[] cbuf = new char[len-read];
288 int got = br.read(cbuf);
289 if (got != len - read){
290 throw new IOException("Failed to read "
291 + "Paxheader. Expected "
292 + (len - read)
293 + " chars, read "
294 + got);
295 }
296 // Drop trailing NL
297 String value = new String(cbuf, 0,
298 len - read - 1);
299 headers.put(keyword, value);
300 break;
301 }
302 sb.append((char) ch);
303 }
304 break; // Processed single header
305 }
306 len *= 10;
307 len += ch - '0';
308 }
309 if (ch == -1){ // EOF
310 break;
311 }
312 }
313 } finally {
314 // NO-OP but makes FindBugs happy
315 br.close();
316 }
317
318 getNextEntry(); // Get the actual file entry
319 /*
320 * The following headers are defined for Pax.
321 * atime, ctime, mtime, charset: cannot use these without changing TarArchiveEntry fields
322 * comment
323 * gid, gname
324 * linkpath
325 * size
326 * uid,uname
327 */
328 for (Entry<String, String> ent : headers.entrySet()){
329 String key = ent.getKey();
330 String val = ent.getValue();
331 if ("path".equals(key)){
332 currEntry.setName(val);
333 } else if ("linkpath".equals(key)){
334 currEntry.setLinkName(val);
335 } else if ("gid".equals(key)){
336 currEntry.setGroupId(Integer.parseInt(val));
337 } else if ("gname".equals(key)){
338 currEntry.setGroupName(val);
339 } else if ("uid".equals(key)){
340 currEntry.setUserId(Integer.parseInt(val));
341 } else if ("uname".equals(key)){
342 currEntry.setUserName(val);
343 } else if ("size".equals(key)){
344 currEntry.setSize(Long.parseLong(val));
345 }
346 }
347 }
348
349 /**
350 * Adds the sparse chunks from the current entry to the sparse chunks,
351 * including any additional sparse entries following the current entry.
352 *
353 * @throws IOException on error
354 *
355 * @todo Sparse files get not yet really processed.
356 */
357 private void readGNUSparse() throws IOException {
358 /* we do not really process sparse files yet
359 sparses = new ArrayList();
360 sparses.addAll(currEntry.getSparses());
361 */
362 if (currEntry.isExtended()) {
363 TarArchiveSparseEntry entry;
364 do {
365 byte[] headerBuf = getRecord();
366 if (hasHitEOF) {
367 currEntry = null;
368 break;
369 }
370 entry = new TarArchiveSparseEntry(headerBuf);
371 /* we do not really process sparse files yet
372 sparses.addAll(entry.getSparses());
373 */
374 } while (entry.isExtended());
375 }
376 }
377
378 @Override
379 public ArchiveEntry getNextEntry() throws IOException {
380 return getNextTarEntry();
381 }
382
383 /**
384 * Reads bytes from the current tar archive entry.
385 *
386 * This method is aware of the boundaries of the current
387 * entry in the archive and will deal with them as if they
388 * were this stream's start and EOF.
389 *
390 * @param buf The buffer into which to place bytes read.
391 * @param offset The offset at which to place bytes read.
392 * @param numToRead The number of bytes to read.
393 * @return The number of bytes read, or -1 at EOF.
394 * @throws IOException on error
395 */
396 @Override
397 public int read(byte[] buf, int offset, int numToRead) throws IOException {
398 int totalRead = 0;
399
400 if (entryOffset >= entrySize) {
401 return -1;
402 }
403
404 if ((numToRead + entryOffset) > entrySize) {
405 numToRead = (int) (entrySize - entryOffset);
406 }
407
408 if (readBuf != null) {
409 int sz = (numToRead > readBuf.length) ? readBuf.length
410 : numToRead;
411
412 System.arraycopy(readBuf, 0, buf, offset, sz);
413
414 if (sz >= readBuf.length) {
415 readBuf = null;
416 } else {
417 int newLen = readBuf.length - sz;
418 byte[] newBuf = new byte[newLen];
419
420 System.arraycopy(readBuf, sz, newBuf, 0, newLen);
421
422 readBuf = newBuf;
423 }
424
425 totalRead += sz;
426 numToRead -= sz;
427 offset += sz;
428 }
429
430 while (numToRead > 0) {
431 byte[] rec = buffer.readRecord();
432
433 if (rec == null) {
434 // Unexpected EOF!
435 throw new IOException("unexpected EOF with " + numToRead
436 + " bytes unread. Occured at byte: " + getBytesRead());
437 }
438 count(rec.length);
439 int sz = numToRead;
440 int recLen = rec.length;
441
442 if (recLen > sz) {
443 System.arraycopy(rec, 0, buf, offset, sz);
444
445 readBuf = new byte[recLen - sz];
446
447 System.arraycopy(rec, sz, readBuf, 0, recLen - sz);
448 } else {
449 sz = recLen;
450
451 System.arraycopy(rec, 0, buf, offset, recLen);
452 }
453
454 totalRead += sz;
455 numToRead -= sz;
456 offset += sz;
457 }
458
459 entryOffset += totalRead;
460
461 return totalRead;
462 }
463
464 /**
465 * Whether this class is able to read the given entry.
466 *
467 * <p>May return false if the current entry is a sparse file.</p>
468 */
469 @Override
470 public boolean canReadEntryData(ArchiveEntry ae) {
471 if (ae instanceof TarArchiveEntry) {
472 TarArchiveEntry te = (TarArchiveEntry) ae;
473 return !te.isGNUSparse();
474 }
475 return false;
476 }
477
478 protected final TarArchiveEntry getCurrentEntry() {
479 return currEntry;
480 }
481
482 protected final void setCurrentEntry(TarArchiveEntry e) {
483 currEntry = e;
484 }
485
486 protected final boolean isAtEOF() {
487 return hasHitEOF;
488 }
489
490 protected final void setAtEOF(boolean b) {
491 hasHitEOF = b;
492 }
493
494 /**
495 * Checks if the signature matches what is expected for a tar file.
496 *
497 * @param signature
498 * the bytes to check
499 * @param length
500 * the number of bytes to check
501 * @return true, if this stream is a tar archive stream, false otherwise
502 */
503 public static boolean matches(byte[] signature, int length) {
504 if (length < TarConstants.VERSION_OFFSET+TarConstants.VERSIONLEN) {
505 return false;
506 }
507
508 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_POSIX,
509 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
510 &&
511 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_POSIX,
512 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
513 ){
514 return true;
515 }
516 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_GNU,
517 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
518 &&
519 (
520 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_SPACE,
521 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
522 ||
523 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_GNU_ZERO,
524 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
525 )
526 ){
527 return true;
528 }
529 // COMPRESS-107 - recognise Ant tar files
530 if (ArchiveUtils.matchAsciiBuffer(TarConstants.MAGIC_ANT,
531 signature, TarConstants.MAGIC_OFFSET, TarConstants.MAGICLEN)
532 &&
533 ArchiveUtils.matchAsciiBuffer(TarConstants.VERSION_ANT,
534 signature, TarConstants.VERSION_OFFSET, TarConstants.VERSIONLEN)
535 ){
536 return true;
537 }
538 return false;
539 }
540
541 }