Skip to content

Commit 6f5c607

Browse files
committed
Bugfix: The lax parser for HTTP-status did not accept ":" in the line, but ":" is legal according to RFC2616
Test: Extended unit test to check for extracted status
1 parent 37795b5 commit 6f5c607

2 files changed

Lines changed: 11 additions & 5 deletions

File tree

src/main/java/org/archive/io/arc/ARCRecord.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,11 +596,12 @@ private InputStream readHttpHeader() throws IOException {
596596

597597
// If it's actually the status line, break, otherwise continue skipping any
598598
// previous header values
599-
if (!statusLine.contains(":") && StatusLine.startsWithHTTP(statusLine)) {
599+
// Old code contained {@code !statusLine.contains(":")}, which conflicts with RFC2616-sec6
600+
if (StatusLine.startsWithHTTP(statusLine)) {
600601
break;
601602
}
602603

603-
if (statusLine.replace("\r", "").isEmpty()) { // No more headerlines
604+
if (statusLine.replace("\r", "").isEmpty()) { // No more header lines
604605
break;
605606
}
606607

src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ private void offsetResourceTest( File testfile, long offset, String uri ) throws
5757
}
5858

5959
public void testBaseSampleARC() throws IOException {
60-
testARCReaderIteration(testfile1, 9);
60+
testARCReaderIteration(testfile1, 9, 7);
6161
}
6262
/*
6363
This failed with the old http-header parsing code in {@code ARCRecord#readHttpHeader}.
6464
*/
6565
public void testNewlinedSampleARC() throws IOException {
66-
testARCReaderIteration(testfile_nl, 4);
66+
testARCReaderIteration(testfile_nl, 4, 3); // Status has 2*200 & 1*404
6767
}
6868

6969
// Independent of the ARCReader code
@@ -88,10 +88,14 @@ public void testBaseSampleARCContentLength() throws IOException {
8888
// }
8989

9090
// Uncomment println for manual inspection of first content line
91-
private void testARCReaderIteration(File arc, int expectedRecords) throws IOException {
91+
private void testARCReaderIteration(File arc, int expectedRecords, int hasStatus) throws IOException {
9292
ARCReader reader = ARCReaderFactory.get(arc);
9393
int recordCount = 0;
94+
int okCount = 0;
9495
for (ArchiveRecord record : reader) {
96+
if (((ARCRecord)record).getStatusCode() != -1) {
97+
okCount++;
98+
}
9599
SubInputStream sub = new SubInputStream(record);
96100
sub.skip(record.getHeader().getContentBegin());
97101
//System.out.println(record.getPosition() + "> " + sub.readLine());
@@ -100,6 +104,7 @@ private void testARCReaderIteration(File arc, int expectedRecords) throws IOExce
100104
}
101105
reader.close();
102106
assertEquals("There should be the right number of records in " + arc, expectedRecords, recordCount);
107+
assertEquals("There should be the right number of status 200 records in " + arc, hasStatus, okCount);
103108
}
104109

105110
private static File getResource(String resource) {

0 commit comments

Comments
 (0)