Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/main/java/org/archive/io/warc/WARCReaderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,20 @@ public static ArchiveReader get(final String s, final InputStream is,
atFirstRecord);
}

/*
* Note that the ARC companion does this differently, with quite a lot of duplication.
*
* @see org.archive.io.arc.ARCReaderFactory.getArchiveReader(String, InputStream, boolean)
*/
protected ArchiveReader getArchiveReader(final String f,
final InputStream is, final boolean atFirstRecord)
throws IOException {
// For now, assume stream is compressed. Later add test of input
// stream or handle exception thrown when figure not compressed stream.
return new CompressedWARCReader(f, is, atFirstRecord);
// Check if it's compressed, based on file extension.
if( f.endsWith(".gz") ) {
return new CompressedWARCReader(f, is, atFirstRecord);
} else {
return new UncompressedWARCReader(f, is);
}
}

public static WARCReader get(final URL arcUrl, final long offset)
Expand Down
34 changes: 34 additions & 0 deletions src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.archive.io.warc;

import java.io.FileInputStream;
import java.io.IOException;

import org.archive.format.warc.WARCConstants;
import org.archive.format.warc.WARCConstants.WARCRecordType;
import org.archive.io.ArchiveReader;
import org.archive.io.ArchiveRecord;

import junit.framework.TestCase;

public class WARCReaderFactoryTest extends TestCase {

// Test files:
String[] files = new String[] {
"src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz",
"src/test/resources/org/archive/format/warc/IAH-urls-wget.warc"
};

public void testGetStringInputstreamBoolean() throws IOException {
// Check the test files can be opened:
for( String file : files ) {
FileInputStream is = new FileInputStream(file);
ArchiveReader ar = WARCReaderFactory.get(file, is, true);
ArchiveRecord r = ar.get();
String type = (String) r.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
// Check the first record comes out as a 'warcinfo' record.
assertEquals(WARCRecordType.warcinfo.name(), type);
}
}


}
Loading