diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java index a4ed6260..0a9a82e0 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java @@ -98,12 +98,17 @@ public void writeTo(OutputStream os) throws IOException { os.write(value); } } - public int read(InputStream is) throws IOException { + public int read(InputStream is, int maxRead) throws IOException { byte tmpName[] = null; byte tmpVal[] = null; int valLen = 0; tmpName = ByteOp.readNBytes(is, GZIP_FEXTRA_NAME_BYTES); valLen = ByteOp.readShort(is); + if (valLen > (maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES)) { + /* read in what's left, but throw an exception */ + tmpVal = ByteOp.readNBytes(is, maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES); + throw new GZIPFormatException.GZIPExtraFieldShortException(maxRead); + } if(valLen > 0) { tmpVal = ByteOp.readNBytes(is, valLen); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java index 7dc0de44..e5920552 100755 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java @@ -53,12 +53,17 @@ public void readRecords(InputStream is) ArrayList tmpList = new ArrayList(); while(bytesRemaining > 0) { GZIPFExtraRecord tmpRecord = new GZIPFExtraRecord(); - int bytesRead = tmpRecord.read(is); - bytesRemaining -= bytesRead; + try { + int bytesRead = tmpRecord.read(is, bytesRemaining); + bytesRemaining -= bytesRead; + tmpList.add(tmpRecord); + } catch (GZIPFormatException.GZIPExtraFieldShortException ex) { + /* not enough bytes for the extra field; move on */ + bytesRemaining -= ex.bytesRead; + } if(bytesRemaining < 0) { throw new GZIPFormatException("Invalid FExtra length/records"); } - tmpList.add(tmpRecord); } this.addAll(tmpList); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFormatException.java b/src/main/java/org/archive/format/gzip/GZIPFormatException.java index ca627a88..3916dafa 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFormatException.java +++ b/src/main/java/org/archive/format/gzip/GZIPFormatException.java @@ -21,4 +21,11 @@ public GZIPFormatException(Exception e) { public GZIPFormatException(String message, IOException e) { super(message,e); } + public static class GZIPExtraFieldShortException extends GZIPFormatException { + int bytesRead; + public GZIPExtraFieldShortException(int bytesRead) { + super("Extra Field short."); + this.bytesRead = bytesRead; + } + } } diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java index 95c7e96f..2eec46ec 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java @@ -374,6 +374,9 @@ public void testAutoSkip() throws IOException { assertNull(m); assertTrue(s.gotEOF()); } - + public void testWgetProblem() throws IndexOutOfBoundsException, FileNotFoundException, IOException { + InputStream is = getClass().getResourceAsStream("IAH-urls-wget.warc.gz"); + new GZIPDecoder().parseHeader(is); + } } diff --git a/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz new file mode 100644 index 00000000..fa248f8d Binary files /dev/null and b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz differ