Skip to content

Commit 3bcf756

Browse files
committed
Merge pull request iipc#14 from ukwa/uncompressed-arcs-issue-13
Uncompressed arcs - issue 13
2 parents 399c58e + b38aae4 commit 3bcf756

3 files changed

Lines changed: 1067 additions & 3 deletions

File tree

src/main/java/org/archive/io/arc/ARCReaderFactory.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,11 @@ protected ArchiveReader getArchiveReader(final String arc,
147147
possiblyWrapped.mark(100);
148148
boolean compressed = testCompressedARCStream(possiblyWrapped);
149149
possiblyWrapped.reset();
150-
150+
151151
if (compressed) {
152152
return new CompressedARCReader(arc, possiblyWrapped, atFirstRecord);
153153
} else {
154-
return new UncompressedARCReader(arc, possiblyWrapped);
154+
return new UncompressedARCReader(arc, possiblyWrapped, atFirstRecord);
155155
}
156156
}
157157

@@ -330,10 +330,11 @@ public UncompressedARCReader(final File f, final long offset)
330330
* @param f Uncompressed arc to read.
331331
* @param is InputStream.
332332
*/
333-
public UncompressedARCReader(final String f, final InputStream is) {
333+
public UncompressedARCReader(final String f, final InputStream is, boolean atFirstRecord) {
334334
// Arc file has been tested for existence by time it has come
335335
// to here.
336336
setIn(new CountingInputStream(is));
337+
setAlignedOnFirstRecord(atFirstRecord);
337338
initialize(f);
338339
}
339340
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package org.archive.io.arc;
2+
3+
import java.io.File;
4+
import java.io.FileInputStream;
5+
import java.io.FileNotFoundException;
6+
import java.io.InputStream;
7+
import java.io.RandomAccessFile;
8+
9+
import org.archive.io.ArchiveReader;
10+
import org.archive.io.ArchiveRecord;
11+
12+
import junit.framework.TestCase;
13+
14+
/**
15+
*
16+
* Based on https://github.com/iipc/openwayback/pull/104/files
17+
*
18+
* @author csr@statsbiblioteket.dk (Colin Rosenthal)
19+
*
20+
*/
21+
public class ARCReaderFactoryTest extends TestCase {
22+
23+
private File testfile1 = new File("src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc");
24+
25+
/**
26+
* Test reading uncompressed arcfile for issue
27+
* https://github.com/iipc/openwayback/issues/101
28+
* @throws Exception
29+
*/
30+
public void testGetResource() throws Exception {
31+
this.offsetResourceTest(testfile1, 1515, "http://www.archive.org/robots.txt" );
32+
this.offsetResourceTest(testfile1, 36420, "http://www.archive.org/services/collection-rss.php" );
33+
}
34+
35+
private void offsetResourceTest( File testfile, long offset, String uri ) throws Exception {
36+
RandomAccessFile raf = new RandomAccessFile(testfile, "r");
37+
raf.seek(offset);
38+
InputStream is = new FileInputStream(raf.getFD());
39+
String fPath = testfile.getAbsolutePath();
40+
ArchiveReader reader = ARCReaderFactory.get(fPath, is, false);
41+
// This one works:
42+
//ArchiveReader reader = ARCReaderFactory.get(testfile, offset);
43+
ArchiveRecord record = reader.get();
44+
45+
final String url = record.getHeader().getUrl();
46+
assertEquals("URL of record is not as expected.", uri, url);
47+
48+
final long position = record.getPosition();
49+
final long recordLength = record.getHeader().getLength();
50+
assertTrue("Position " + position + " is after end of record " + recordLength, position <= recordLength);
51+
52+
// Clean up:
53+
if( raf != null )
54+
raf.close();
55+
}
56+
57+
}

0 commit comments

Comments
 (0)