001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.ar;
020
021 import java.io.EOFException;
022 import java.io.IOException;
023 import java.io.InputStream;
024
025 import org.apache.commons.compress.archivers.ArchiveEntry;
026 import org.apache.commons.compress.archivers.ArchiveInputStream;
027 import org.apache.commons.compress.utils.ArchiveUtils;
028
029 /**
030 * Implements the "ar" archive format as an input stream.
031 *
032 * @NotThreadSafe
033 *
034 */
035 public class ArArchiveInputStream extends ArchiveInputStream {
036
037 private final InputStream input;
038 private long offset = 0;
039 private boolean closed;
040
041 /*
042 * If getNextEnxtry has been called, the entry metadata is stored in
043 * currentEntry.
044 */
045 private ArArchiveEntry currentEntry = null;
046
047 // Storage area for extra long names (GNU ar)
048 private byte[] namebuffer = null;
049
050 /*
051 * The offset where the current entry started. -1 if no entry has been
052 * called
053 */
054 private long entryOffset = -1;
055
056 /**
057 * Constructs an Ar input stream with the referenced stream
058 *
059 * @param pInput
060 * the ar input stream
061 */
062 public ArArchiveInputStream(final InputStream pInput) {
063 input = pInput;
064 closed = false;
065 }
066
067 /**
068 * Returns the next AR entry in this stream.
069 *
070 * @return the next AR entry.
071 * @throws IOException
072 * if the entry could not be read
073 */
074 public ArArchiveEntry getNextArEntry() throws IOException {
075 if (currentEntry != null) {
076 final long entryEnd = entryOffset + currentEntry.getLength();
077 while (offset < entryEnd) {
078 int x = read();
079 if (x == -1) {
080 // hit EOF before previous entry was complete
081 // TODO: throw an exception instead?
082 return null;
083 }
084 }
085 currentEntry = null;
086 }
087
088 if (offset == 0) {
089 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER);
090 final byte[] realized = new byte[expected.length];
091 final int read = read(realized);
092 if (read != expected.length) {
093 throw new IOException("failed to read header. Occured at byte: " + getBytesRead());
094 }
095 for (int i = 0; i < expected.length; i++) {
096 if (expected[i] != realized[i]) {
097 throw new IOException("invalid header " + ArchiveUtils.toAsciiString(realized));
098 }
099 }
100 }
101
102 if (offset % 2 != 0 && read() < 0) {
103 // hit eof
104 return null;
105 }
106
107 if (input.available() == 0) {
108 return null;
109 }
110
111 final byte[] name = new byte[16];
112 final byte[] lastmodified = new byte[12];
113 final byte[] userid = new byte[6];
114 final byte[] groupid = new byte[6];
115 final byte[] filemode = new byte[8];
116 final byte[] length = new byte[10];
117
118 read(name);
119 read(lastmodified);
120 read(userid);
121 read(groupid);
122 read(filemode);
123 read(length);
124
125 {
126 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER);
127 final byte[] realized = new byte[expected.length];
128 final int read = read(realized);
129 if (read != expected.length) {
130 throw new IOException("failed to read entry trailer. Occured at byte: " + getBytesRead());
131 }
132 for (int i = 0; i < expected.length; i++) {
133 if (expected[i] != realized[i]) {
134 throw new IOException("invalid entry trailer. not read the content? Occured at byte: " + getBytesRead());
135 }
136 }
137 }
138
139 entryOffset = offset;
140
141 // GNU ar uses a '/' to mark the end of the filename; this allows for the use of spaces without the use of an extended filename.
142
143 // entry name is stored as ASCII string
144 String temp = ArchiveUtils.toAsciiString(name).trim();
145 long len = asLong(length);
146
147 if (isGNUStringTable(temp)) { // GNU extended filenames entry
148 currentEntry = readGNUStringTable(length);
149 return getNextArEntry();
150 } else if (temp.endsWith("/")) { // GNU terminator
151 temp = temp.substring(0, temp.length() - 1);
152 } else if (isGNULongName(temp)) {
153 int offset = Integer.parseInt(temp.substring(1));// get the offset
154 temp = getExtendedName(offset); // convert to the long name
155 } else if (isBSDLongName(temp)) {
156 temp = getBSDLongName(temp);
157 // entry length contained the length of the file name in
158 // addition to the real length of the entry.
159 // assume file name was ASCII, there is no "standard" otherwise
160 int nameLen = temp.length();
161 len -= nameLen;
162 entryOffset += nameLen;
163 }
164
165 currentEntry = new ArArchiveEntry(temp, len, asInt(userid, true),
166 asInt(groupid, true), asInt(filemode, 8),
167 asLong(lastmodified));
168 return currentEntry;
169 }
170
171 /**
172 * Get an extended name from the GNU extended name buffer.
173 *
174 * @param offset pointer to entry within the buffer
175 * @return the extended file name; without trailing "/" if present.
176 * @throws IOException if name not found or buffer not set up
177 */
178 private String getExtendedName(int offset) throws IOException{
179 if (namebuffer == null) {
180 throw new IOException("Cannot process GNU long filename as no // record was found");
181 }
182 for(int i=offset; i < namebuffer.length; i++){
183 if (namebuffer[i]=='\012'){
184 if (namebuffer[i-1]=='/') {
185 i--; // drop trailing /
186 }
187 return ArchiveUtils.toAsciiString(namebuffer, offset, i-offset);
188 }
189 }
190 throw new IOException("Failed to read entry: "+offset);
191 }
192 private long asLong(byte[] input) {
193 return Long.parseLong(new String(input).trim());
194 }
195
196 private int asInt(byte[] input) {
197 return asInt(input, 10, false);
198 }
199
200 private int asInt(byte[] input, boolean treatBlankAsZero) {
201 return asInt(input, 10, treatBlankAsZero);
202 }
203
204 private int asInt(byte[] input, int base) {
205 return asInt(input, base, false);
206 }
207
208 private int asInt(byte[] input, int base, boolean treatBlankAsZero) {
209 String string = new String(input).trim();
210 if (string.length() == 0 && treatBlankAsZero) {
211 return 0;
212 }
213 return Integer.parseInt(string, base);
214 }
215
216 /*
217 * (non-Javadoc)
218 *
219 * @see
220 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry()
221 */
222 @Override
223 public ArchiveEntry getNextEntry() throws IOException {
224 return getNextArEntry();
225 }
226
227 /*
228 * (non-Javadoc)
229 *
230 * @see java.io.InputStream#close()
231 */
232 @Override
233 public void close() throws IOException {
234 if (!closed) {
235 closed = true;
236 input.close();
237 }
238 currentEntry = null;
239 }
240
241 /*
242 * (non-Javadoc)
243 *
244 * @see java.io.InputStream#read(byte[], int, int)
245 */
246 @Override
247 public int read(byte[] b, final int off, final int len) throws IOException {
248 int toRead = len;
249 if (currentEntry != null) {
250 final long entryEnd = entryOffset + currentEntry.getLength();
251 if (len > 0 && entryEnd > offset) {
252 toRead = (int) Math.min(len, entryEnd - offset);
253 } else {
254 return -1;
255 }
256 }
257 final int ret = this.input.read(b, off, toRead);
258 count(ret);
259 offset += (ret > 0 ? ret : 0);
260 return ret;
261 }
262
263 /**
264 * Checks if the signature matches ASCII "!<arch>" followed by a single LF
265 * control character
266 *
267 * @param signature
268 * the bytes to check
269 * @param length
270 * the number of bytes to check
271 * @return true, if this stream is an Ar archive stream, false otherwise
272 */
273 public static boolean matches(byte[] signature, int length) {
274 // 3c21 7261 6863 0a3e
275
276 if (length < 8) {
277 return false;
278 }
279 if (signature[0] != 0x21) {
280 return false;
281 }
282 if (signature[1] != 0x3c) {
283 return false;
284 }
285 if (signature[2] != 0x61) {
286 return false;
287 }
288 if (signature[3] != 0x72) {
289 return false;
290 }
291 if (signature[4] != 0x63) {
292 return false;
293 }
294 if (signature[5] != 0x68) {
295 return false;
296 }
297 if (signature[6] != 0x3e) {
298 return false;
299 }
300 if (signature[7] != 0x0a) {
301 return false;
302 }
303
304 return true;
305 }
306
307 static final String BSD_LONGNAME_PREFIX = "#1/";
308 private static final int BSD_LONGNAME_PREFIX_LEN =
309 BSD_LONGNAME_PREFIX.length();
310 private static final String BSD_LONGNAME_PATTERN =
311 "^" + BSD_LONGNAME_PREFIX + "\\d+";
312
313 /**
314 * Does the name look like it is a long name (or a name containing
315 * spaces) as encoded by BSD ar?
316 *
317 * <p>From the FreeBSD ar(5) man page:</p>
318 * <pre>
319 * BSD In the BSD variant, names that are shorter than 16
320 * characters and without embedded spaces are stored
321 * directly in this field. If a name has an embedded
322 * space, or if it is longer than 16 characters, then
323 * the string "#1/" followed by the decimal represen-
324 * tation of the length of the file name is placed in
325 * this field. The actual file name is stored immedi-
326 * ately after the archive header. The content of the
327 * archive member follows the file name. The ar_size
328 * field of the header (see below) will then hold the
329 * sum of the size of the file name and the size of
330 * the member.
331 * </pre>
332 *
333 * @since Apache Commons Compress 1.3
334 */
335 private static boolean isBSDLongName(String name) {
336 return name != null && name.matches(BSD_LONGNAME_PATTERN);
337 }
338
339 /**
340 * Reads the real name from the current stream assuming the very
341 * first bytes to be read are the real file name.
342 *
343 * @see #isBSDLongName
344 *
345 * @since Apache Commons Compress 1.3
346 */
347 private String getBSDLongName(String bsdLongName) throws IOException {
348 int nameLen =
349 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN));
350 byte[] name = new byte[nameLen];
351 int read = 0, readNow = 0;
352 while ((readNow = input.read(name, read, nameLen - read)) >= 0) {
353 read += readNow;
354 count(readNow);
355 if (read == nameLen) {
356 break;
357 }
358 }
359 if (read != nameLen) {
360 throw new EOFException();
361 }
362 return ArchiveUtils.toAsciiString(name);
363 }
364
365 private static final String GNU_STRING_TABLE_NAME = "//";
366
367 /**
368 * Is this the name of the "Archive String Table" as used by
369 * SVR4/GNU to store long file names?
370 *
371 * <p>GNU ar stores multiple extended filenames in the data section
372 * of a file with the name "//", this record is referred to by
373 * future headers.</p>
374 *
375 * <p>A header references an extended filename by storing a "/"
376 * followed by a decimal offset to the start of the filename in
377 * the extended filename data section.</p>
378 *
379 * <p>The format of the "//" file itself is simply a list of the
380 * long filenames, each separated by one or more LF
381 * characters. Note that the decimal offsets are number of
382 * characters, not line or string number within the "//" file.</p>
383 */
384 private static boolean isGNUStringTable(String name) {
385 return GNU_STRING_TABLE_NAME.equals(name);
386 }
387
388 /**
389 * Reads the GNU archive String Table.
390 *
391 * @see #isGNUStringTable
392 */
393 private ArArchiveEntry readGNUStringTable(byte[] length) throws IOException {
394 int bufflen = asInt(length); // Assume length will fit in an int
395 namebuffer = new byte[bufflen];
396 int read = read(namebuffer, 0, bufflen);
397 if (read != bufflen){
398 throw new IOException("Failed to read complete // record: expected="
399 + bufflen + " read=" + read);
400 }
401 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen);
402 }
403
404 private static final String GNU_LONGNAME_PATTERN = "^/\\d+";
405
406 /**
407 * Does the name look like it is a long name (or a name containing
408 * spaces) as encoded by SVR4/GNU ar?
409 *
410 * @see #isGNUStringTable
411 */
412 private boolean isGNULongName(String name) {
413 return name != null && name.matches(GNU_LONGNAME_PATTERN);
414 }
415 }