From 4623d26af6131d9f161fb1d32e19ec8b4705eddc Mon Sep 17 00:00:00 2001
From: Andrew Jackson
Date: Fri, 7 Mar 2014 10:13:18 +0000
Subject: [PATCH 1/5] [maven-release-plugin] prepare release
webarchive-commons-1.1.1
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index cfd201b0..85c30fc9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
org.netpreserve.commons
webarchive-commons
- 1.1.1-SNAPSHOT
+ 1.1.1
jar
webarchive-commons
From 9a77285299d54b8e0c73a9006fab729d8c920dd3 Mon Sep 17 00:00:00 2001
From: Andrew Jackson
Date: Fri, 7 Mar 2014 10:14:22 +0000
Subject: [PATCH 2/5] [maven-release-plugin] rollback the release of
webarchive-commons-1.1.1
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 85c30fc9..cfd201b0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
org.netpreserve.commons
webarchive-commons
- 1.1.1
+ 1.1.1-SNAPSHOT
jar
webarchive-commons
From e1b40a47147c89e66dfd62d9330e211e62de1ec2 Mon Sep 17 00:00:00 2001
From: Andrew Jackson
Date: Fri, 7 Mar 2014 10:20:37 +0000
Subject: [PATCH 3/5] [maven-release-plugin] prepare release
webarchive-commons-1.1.1
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index cfd201b0..85c30fc9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
org.netpreserve.commons
webarchive-commons
- 1.1.1-SNAPSHOT
+ 1.1.1
jar
webarchive-commons
From 08b944291d5faed4001714a8ced78ca354933207 Mon Sep 17 00:00:00 2001
From: Andrew Jackson
Date: Fri, 7 Mar 2014 21:53:35 +0000
Subject: [PATCH 4/5] Added tests and initial fix for opening uncompressed
WARCs
This initial unit test just tests that the first record from
compressed and uncompressed WARCs can be accessed.
---
.../archive/io/warc/WARCReaderFactory.java | 10 +-
.../io/warc/WARCReaderFactoryTest.java | 34 +
.../archive/format/warc/IAH-urls-wget.warc | 3156 +++++++++++++++++
3 files changed, 3197 insertions(+), 3 deletions(-)
create mode 100644 src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java
create mode 100644 src/test/resources/org/archive/format/warc/IAH-urls-wget.warc
diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
index 9c6c7e77..a02adf03 100644
--- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java
+++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
@@ -103,9 +103,13 @@ public static ArchiveReader get(final String s, final InputStream is,
protected ArchiveReader getArchiveReader(final String f,
final InputStream is, final boolean atFirstRecord)
throws IOException {
- // For now, assume stream is compressed. Later add test of input
- // stream or handle exception thrown when figure not compressed stream.
- return new CompressedWARCReader(f, is, atFirstRecord);
+ // Check if it's compressed:
+ // TODO Currently relies on the file extension, but this should all really sniff the content properly.
+ if( f.endsWith(".gz") ) {
+ return new CompressedWARCReader(f, is, atFirstRecord);
+ } else {
+ return new UncompressedWARCReader(f, is);
+ }
}
public static WARCReader get(final URL arcUrl, final long offset)
diff --git a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java
new file mode 100644
index 00000000..25028797
--- /dev/null
+++ b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java
@@ -0,0 +1,34 @@
+package org.archive.io.warc;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.archive.format.warc.WARCConstants;
+import org.archive.format.warc.WARCConstants.WARCRecordType;
+import org.archive.io.ArchiveReader;
+import org.archive.io.ArchiveRecord;
+
+import junit.framework.TestCase;
+
+public class WARCReaderFactoryTest extends TestCase {
+
+ // Test files:
+ String[] files = new String[] {
+ "src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz",
+ "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc"
+ };
+
+ public void testGetStringInputstreamBoolean() throws IOException {
+ // Check the test files can be opened:
+ for( String file : files ) {
+ FileInputStream is = new FileInputStream(file);
+ ArchiveReader ar = WARCReaderFactory.get(file, is, true);
+ ArchiveRecord r = ar.get();
+ String type = (String) r.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE);
+ // Check the first record comes out as a 'warcinfo' record.
+ assertEquals(WARCRecordType.warcinfo.name(), type);
+ }
+ }
+
+
+}
diff --git a/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc
new file mode 100644
index 00000000..1125fe98
--- /dev/null
+++ b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc
@@ -0,0 +1,3156 @@
+WARC/1.0
+WARC-Type: warcinfo
+Content-Type: application/warc-fields
+WARC-Date: 2013-10-21T21:53:06Z
+WARC-Record-ID:
+WARC-Filename: IAH-urls-wget.warc.gz
+WARC-Block-Digest: sha1:I7UCIFZZDYO4O55ZOG6X5PRMVWMPZWMJ
+Content-Length: 235
+
+software: Wget/1.14 (darwin11.4.0)
+format: WARC File Format 1.0
+conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf
+robots: classic
+wget-arguments: "-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget"
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://www.archive.org/robots.txt
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:06Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:CPCUG5OU46Y5YHPTFCZLZV465AFPFJYY
+Content-Length: 126
+
+GET /robots.txt HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: www.archive.org
+Connection: Keep-Alive
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://www.archive.org/robots.txt
+WARC-Date: 2013-10-21T21:53:06Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:3L4DY55OVKT2IEHZEKOSIXRCQKJ7MNIE
+WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4
+Content-Type: application/http;msgtype=response
+Content-Length: 435
+
+HTTP/1.1 302 Moved Temporarily
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:06 GMT
+Content-Type: text/html
+Content-Length: 161
+Connection: keep-alive
+Location: http://archive.org/robots.txt
+Expires: Tue, 22 Oct 2013 03:53:06 GMT
+Cache-Control: max-age=21600
+
+
+302 Found
+
+302 Found
+nginx/1.1.19
+
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://archive.org/robots.txt
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:RQBBTMHS45XDYLYGRCT7YQ7P3UORCEQU
+Content-Length: 122
+
+GET /robots.txt HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://archive.org/robots.txt
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:ORAXOWRNZAEDKBOJUW2PYNLDX2LRDCBK
+WARC-Payload-Digest: sha1:ARS5OJBVROJW62M7JMB3BCHEUUEBVMJK
+Content-Type: application/http;msgtype=response
+Content-Length: 1014
+
+HTTP/1.1 200 OK
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:07 GMT
+Content-Type: text/plain
+Content-Length: 727
+Last-Modified: Mon, 21 Oct 2013 18:55:18 GMT
+Connection: keep-alive
+Expires: Tue, 22 Oct 2013 03:53:07 GMT
+Cache-Control: max-age=21600
+Accept-Ranges: bytes
+
+
+Sitemap: http://archive.org/sitemap/sitemap.xml
+
+##############################################
+#
+# Welcome to the Archive!
+#
+##############################################
+# Please crawl our files.
+# We appreciate if you can crawl responsibly.
+# Stay open!
+##############################################
+
+
+# slow down the ask jeeves crawler which was hitting our SE a little too fast
+# via collection pages. --Feb2008 tracey--
+User-agent: Teoma
+Disallow: /control/
+Disallow: /report/
+
+
+User-agent: *
+Disallow: /control/
+Disallow: /report/
+Disallow: /details/goldenbull2007john/
+Disallow: /stream/goldenbull2007john/
+Disallow: /download/goldenbull2007john/
+Disallow: /14/items/goldenbull2007john/goldenbull2007john_djvu.txt
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://www.archive.org/
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:GCYSQOYQGB7JDB57XMUYWFQERAKMNEQQ
+Content-Length: 116
+
+GET / HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: www.archive.org
+Connection: Keep-Alive
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://www.archive.org/
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:WDSM4DEMHGZEOPEG2HMQAIUBQJ6WRRN5
+WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4
+Content-Type: application/http;msgtype=response
+Content-Length: 434
+
+HTTP/1.1 302 Moved Temporarily
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:07 GMT
+Content-Type: text/html
+Content-Length: 161
+Connection: keep-alive
+Location: http://archive.org/index.php
+Expires: Tue, 22 Oct 2013 03:53:07 GMT
+Cache-Control: max-age=21600
+
+
+302 Found
+
+302 Found
+nginx/1.1.19
+
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://archive.org/index.php
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:CPMG7AGNNEDLYK5UOOZLLRHPI4JLEC3U
+Content-Length: 121
+
+GET /index.php HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://archive.org/index.php
+WARC-Date: 2013-10-21T21:53:07Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:RYQILVXCYAVUO7TRRO7CQ7VYKSD4COHM
+WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H
+Content-Type: application/http;msgtype=response
+Content-Length: 258
+
+HTTP/1.1 301 Moved Permanently
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:08 GMT
+Content-Type: text/html; charset=UTF-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+X-Powered-By: PHP/5.3.10-1ubuntu3.2
+Location: https://archive.org
+
+0
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: https://archive.org/
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:09Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:G6KJJNG7G7HVRFGJJZ7ELDMO2ZZEX4WR
+Content-Length: 112
+
+GET / HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: https://archive.org/
+WARC-Date: 2013-10-21T21:53:09Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:VRAITOLIHCUNC5A7LDUBFHDSYQCUO7JM
+WARC-Payload-Digest: sha1:WDT537KNDSUIRPB7R56KBDX3K77IR7W3
+Content-Type: application/http;msgtype=response
+Content-Length: 30849
+
+HTTP/1.1 200 OK
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:09 GMT
+Content-Type: text/html; charset=UTF-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+X-Powered-By: PHP/5.3.10-1ubuntu3.2
+Set-Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87; path=/; domain=.archive.org
+
+7756
+
+
+
+
+ Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Announcements (more )
+
+
+
+
+
+
+
+ 361 billion pages
+
+ Web
+
+
+
+
+
+
+
+
+
+
+
+ Welcome to the Archive
+
+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+
+
+
+
+
+
+
+
+ Video
+
+
+
+ 1,411,240 movies
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
filmcollectief-00-060a
+
+ Unknown movie, found in a cannister which should contain something elso. So if someone can help me...
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Live Music
+
+
+
+ 121,538 concerts
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Audio
+
+
+
+ 1,744,979 recordings
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Texts
+
+
+
+ 5,325,972 texts
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Most recent posts (write a post by going to a forum) more ...
+
+
+
+
+
+
+
+
+
Institutional Support
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+0
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://www.archive.org/index.php
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:10Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:DRAV5TKA4765LYFANCFHVNKEWGLRKUMM
+Content-Length: 171
+
+GET /index.php HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: www.archive.org
+Connection: Keep-Alive
+Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://www.archive.org/index.php
+WARC-Date: 2013-10-21T21:53:10Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:YXATLZCFORQS33ZVB3M3SMJY3S2Z6QUD
+WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4
+Content-Type: application/http;msgtype=response
+Content-Length: 434
+
+HTTP/1.1 302 Moved Temporarily
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:11 GMT
+Content-Type: text/html
+Content-Length: 161
+Connection: keep-alive
+Location: http://archive.org/index.php
+Expires: Tue, 22 Oct 2013 03:53:11 GMT
+Cache-Control: max-age=21600
+
+
+302 Found
+
+302 Found
+nginx/1.1.19
+
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://archive.org/index.php
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:11Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:D53DT5RU7NGDFBHOJOKLF56UG32P7AYF
+Content-Length: 167
+
+GET /index.php HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://archive.org/index.php
+WARC-Date: 2013-10-21T21:53:11Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:RS3Z4Z3NZ6BS6ANPCRKWA43E5O5YPVG6
+WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H
+Content-Type: application/http;msgtype=response
+Content-Length: 258
+
+HTTP/1.1 301 Moved Permanently
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:11 GMT
+Content-Type: text/html; charset=UTF-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+X-Powered-By: PHP/5.3.10-1ubuntu3.2
+Location: https://archive.org
+
+0
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: https://archive.org/
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:12Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:HRBVH5XQCN2OWGMQ7THZ675AZ4L4SEWV
+Content-Length: 158
+
+GET / HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: https://archive.org/
+WARC-Date: 2013-10-21T21:53:12Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:24OHCKJGVHH4GDPS65MSGZAS2FWN6U44
+WARC-Payload-Digest: sha1:7DW5UIXJ5NGLWNQ5WYE7AB4E5L74X275
+Content-Type: application/http;msgtype=response
+Content-Length: 30679
+
+HTTP/1.1 200 OK
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:13 GMT
+Content-Type: text/html; charset=UTF-8
+Transfer-Encoding: chunked
+Connection: keep-alive
+X-Powered-By: PHP/5.3.10-1ubuntu3.2
+
+76fb
+
+
+
+
+ Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Announcements (more )
+
+
+
+
+
+
+
+ 361 billion pages
+
+ Web
+
+
+
+
+
+
+
+
+
+
+
+ Welcome to the Archive
+
+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+
+
+
+
+
+
+
+
+ Video
+
+
+
+ 1,411,240 movies
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
Baby nursery (reel 5)
+
+ Description: Amateur movie of the baby nursery at the Peoples Temple Agricultural Mission in...
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Live Music
+
+
+
+ 121,538 concerts
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Audio
+
+
+
+ 1,744,979 recordings
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
[Miga_v16] "Nice summer"
+
+ extra video for audio-release [Miga32] Rominger "Music for camping" [Miga_v16] "Nice summer" video:...
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+ Texts
+
+
+
+ 5,325,972 texts
+
+
+
+
+
+
+
+ Curator's Choice
+
+ (more )
+
+
+
+
+
+
+
+
+
+
Recent Review
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Most recent posts (write a post by going to a forum) more ...
+
+
+
+
+
+
+
+
+
Institutional Support
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+0
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://www.archive.org/images/logoc.jpg
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:14Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:6PZOFZFFZRY7XJOJ2325DNXHG7LEP3G6
+Content-Length: 178
+
+GET /images/logoc.jpg HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: www.archive.org
+Connection: Keep-Alive
+Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://www.archive.org/images/logoc.jpg
+WARC-Date: 2013-10-21T21:53:14Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:JN3EE5W7CY5PSNTEJ7A6ORMLNNMNWS3J
+WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4
+Content-Type: application/http;msgtype=response
+Content-Length: 441
+
+HTTP/1.1 302 Moved Temporarily
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:14 GMT
+Content-Type: text/html
+Content-Length: 161
+Connection: keep-alive
+Location: http://archive.org/images/logoc.jpg
+Expires: Tue, 22 Oct 2013 03:53:14 GMT
+Cache-Control: max-age=21600
+
+
+302 Found
+
+302 Found
+nginx/1.1.19
+
+
+
+
+WARC/1.0
+WARC-Type: request
+WARC-Target-URI: http://archive.org/images/logoc.jpg
+Content-Type: application/http;msgtype=request
+WARC-Date: 2013-10-21T21:53:14Z
+WARC-Record-ID:
+WARC-IP-Address: 207.241.224.2
+WARC-Warcinfo-ID:
+WARC-Block-Digest: sha1:Q6EXPKA6ECDPIEX3MXCWAH2S4JEO4ZHI
+Content-Length: 174
+
+GET /images/logoc.jpg HTTP/1.1
+User-Agent: Wget/1.14 (darwin11.4.0)
+Accept: */*
+Host: archive.org
+Connection: Keep-Alive
+Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87
+
+
+
+WARC/1.0
+WARC-Type: response
+WARC-Record-ID:
+WARC-Warcinfo-ID:
+WARC-Concurrent-To:
+WARC-Target-URI: http://archive.org/images/logoc.jpg
+WARC-Date: 2013-10-21T21:53:14Z
+WARC-IP-Address: 207.241.224.2
+WARC-Block-Digest: sha1:6ESWUQAIQPTXYPDSKA2NGLDTHEFS6FLK
+WARC-Payload-Digest: sha1:UZY6ND6CCHXETFVJD2MSS7ZENMWF7KQ2
+Content-Type: application/http;msgtype=response
+Content-Length: 1951
+
+HTTP/1.1 200 OK
+Server: nginx/1.1.19
+Date: Mon, 21 Oct 2013 21:53:14 GMT
+Content-Type: image/jpeg
+Content-Length: 1662
+Last-Modified: Wed, 13 Feb 2013 16:33:25 GMT
+Connection: keep-alive
+Expires: Mon, 28 Oct 2013 21:53:14 GMT
+Cache-Control: max-age=604800
+Accept-Ranges: bytes
+
+ JFIF d d Adobe ImageReady Ducky <