diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..0dfd3f7f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +language: java + +jdk: + - oraclejdk7 + +before_install: + - "git clone https://github.com/iipc/travis.git target/travis" + +before_script: + - "export JAVA_OPTS=-Xmx1024m" + - "export MAVEN_OPTS=-Xmx512m" + - "ulimit -u 2048" + +script: + - "target/travis/deploy-if.sh" + +# whitelist in the master branch only +branches: + only: + - master + +env: + global: + - secure: "qDKjVdoe4Qcz4WfXiQydU7tyl51T62FUJrjqu4FUPBcgeQhFQiggwhpaE6xCOzOpxbsuBi2R1c8gMQf5esE5iDL5jZMu+kz++dYbuzMTd13ttvZWMW5wRPH0H8iHk609FP/RDtVKKBr7WO0JvvIAZEhWNHZrLXBrrKgdTey171g=" + - secure: "FXGBKJNP9X7ePJfS4eYTZtoFo4RT1sxor34XxncSJr7uV6ggtZb4B4WNd16IlLcDk6E32sx8YoWdltaOGwQ5Vg/kux5Ko/wKZCoccS018Ln1bRT86dD1KoPY34rGoNJVQxe7J/1MPqpBKwmi2XCKfzpsEh3W7bbIqg8w9MEOOZA=" + diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..b872846d --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,28 @@ +1.1.5 +----- +* [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36) +* [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) +* [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) +* [RecordingOutputStream can affect tcp packets sent in an undesirable way](https://github.com/iipc/webarchive-commons/issues/38) + +1.1.4 +----- +* [All dates should be independent of locale settings](https://github.com/iipc/webarchive-commons/pull/22) +* [Resolved fastutil conflict in dependencies](https://github.com/iipc/webarchive-commons/pull/24) + +1.1.3 +----- +* [Synchronised with IA fork](https://github.com/iipc/webarchive-commons/pull/18) +* [Updated to more recent Guava APIs](https://github.com/iipc/webarchive-commons/pull/17) +* [Fixed handling of uncompressed ARC files #13 and #14](https://github.com/iipc/webarchive-commons/pull/14) +* [Avoid pulling in the logback dependency IA#13](https://github.com/internetarchive/webarchive-commons/pull/13) + +1.1.2 +----- +* Fixed support for reading uncompressed WARCs, along with some unit testing. (https://github.com/iipc/webarchive-commons/pull/12) + +1.1.1 +----- +* Renamed from commons-webarchive to webarchive-commons (https://github.com/iipc/webarchive-commons/pull/8) +* Cope with malformed GZip extra fields as produced by wget 1.14 (https://github.com/iipc/webarchive-commons/pull/10) +* Switch to httpcomponents, and add IA deployment information. (https://github.com/iipc/webarchive-commons/pull/11) diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..37ec93a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..72858a52 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +IIPC Web Archive Commons +======================== + +[![Build Status](https://travis-ci.org/iipc/webarchive-commons.png?branch=master)](https://travis-ci.org/iipc/webarchive-commons/) + +This repository contains common utility code for [OpenWayback][1] and other projects. + +[1]: https://github.com/iipc/openwayback diff --git a/pom.xml b/pom.xml index d2004a27..222a4c78 100644 --- a/pom.xml +++ b/pom.xml @@ -1,19 +1,63 @@ - + 4.0.0 - org.archive - ia-web-commons - 1.1.1-SNAPSHOT + + org.sonatype.oss + oss-parent + 7 + + + org.netpreserve.commons + webarchive-commons + 1.1.5-IA jar - ia-web-commons - http://maven.apache.org + webarchive-commons + https://github.com/iipc/webarchive-commons + + + The International Internet Preservation Consortium + http://netpreserve.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + many-devs + Many Others Developers Proceed Me + many@dev.org + + + anjackson + Andrew Jackson + Andrew.Jackson@bl.uk + + + + GitHub Issues + https://github.com/iipc/webarchive-commons/issues + + + scm:git:git@github.com:iipc/webarchive-commons.git + scm:git:git@github.com:iipc/webarchive-commons.git + git@github.com:iipc/webarchive-commons.git + UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss + + + sonatype-nexus-staging + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + sonatype-nexus-snapshots + https://oss.sonatype.org/content/repositories/snapshots/ @@ -21,13 +65,12 @@ junit junit 3.8.1 - test com.google.guava guava - 14.0.1 + 17.0 @@ -42,7 +85,7 @@ - org.mozilla + com.googlecode.juniversalchardet juniversalchardet 1.0.3 @@ -86,6 +129,10 @@ tomcat jasper-compiler + + hsqldb + hsqldb + @@ -115,9 +162,15 @@ it.unimi.dsi - mg4j - 1.0.1 + dsiutils + 2.0.12 compile + + + ch.qos.logback + logback-classic + + org.apache.httpcomponents @@ -129,12 +182,6 @@ joda-time 1.6 - - fastutil - fastutil - 5.0.7 - compile - @@ -155,7 +202,7 @@ jar-with-dependencies - ia-web-commons + webarchive-commons @@ -176,24 +223,6 @@ - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - cloudera Cloudera Hadoop @@ -216,10 +245,13 @@ - repository - + ${repository.id} ${repository.url} + + ${snapshotRepository.id} + ${snapshotRepository.url} + diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java index a4151076..69591931 100644 --- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java +++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java @@ -9,8 +9,8 @@ import org.archive.util.StreamCopy; import org.json.JSONException; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class DumpingExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -22,7 +22,7 @@ public DumpingExtractorOutput(OutputStream out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); StreamCopy.copy(resource.getInputStream(), co); long bytes = co.getCount(); diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 306f67a3..8ca3ff82 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -1,8 +1,10 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; +import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.List; @@ -23,8 +25,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class RealCDXExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -72,7 +74,7 @@ public RealCDXExtractorOutput(PrintWriter out) { // SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length"); // SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length"); public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); @@ -306,12 +308,14 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) { return "-"; } - private String resolve(String context, String spec) { + static String resolve(String context, String spec) { // TODO: test! try { URL cUrl = new URL(context); - URL resolved = new URL(cUrl,spec); - return resolved.toURI().toASCIIString(); + URL url = new URL(cUrl, spec); + // this constructor escapes its arguments, if necessary + URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), url.getRef()); + return uri.toASCIIString(); } catch (URISyntaxException e) { } catch (MalformedURLException e) { diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 0d564a6f..ff46a914 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URISyntaxException; @@ -21,8 +22,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class WARCMetadataRecordExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -47,7 +48,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java index a4ed6260..0a9a82e0 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java @@ -98,12 +98,17 @@ public void writeTo(OutputStream os) throws IOException { os.write(value); } } - public int read(InputStream is) throws IOException { + public int read(InputStream is, int maxRead) throws IOException { byte tmpName[] = null; byte tmpVal[] = null; int valLen = 0; tmpName = ByteOp.readNBytes(is, GZIP_FEXTRA_NAME_BYTES); valLen = ByteOp.readShort(is); + if (valLen > (maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES)) { + /* read in what's left, but throw an exception */ + tmpVal = ByteOp.readNBytes(is, maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES); + throw new GZIPFormatException.GZIPExtraFieldShortException(maxRead); + } if(valLen > 0) { tmpVal = ByteOp.readNBytes(is, valLen); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java index 7dc0de44..e5920552 100755 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java @@ -53,12 +53,17 @@ public void readRecords(InputStream is) ArrayList tmpList = new ArrayList(); while(bytesRemaining > 0) { GZIPFExtraRecord tmpRecord = new GZIPFExtraRecord(); - int bytesRead = tmpRecord.read(is); - bytesRemaining -= bytesRead; + try { + int bytesRead = tmpRecord.read(is, bytesRemaining); + bytesRemaining -= bytesRead; + tmpList.add(tmpRecord); + } catch (GZIPFormatException.GZIPExtraFieldShortException ex) { + /* not enough bytes for the extra field; move on */ + bytesRemaining -= ex.bytesRead; + } if(bytesRemaining < 0) { throw new GZIPFormatException("Invalid FExtra length/records"); } - tmpList.add(tmpRecord); } this.addAll(tmpList); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFormatException.java b/src/main/java/org/archive/format/gzip/GZIPFormatException.java index ca627a88..3916dafa 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFormatException.java +++ b/src/main/java/org/archive/format/gzip/GZIPFormatException.java @@ -21,4 +21,11 @@ public GZIPFormatException(Exception e) { public GZIPFormatException(String message, IOException e) { super(message,e); } + public static class GZIPExtraFieldShortException extends GZIPFormatException { + int bytesRead; + public GZIPExtraFieldShortException(int bytesRead) { + super("Extra Field short."); + this.bytesRead = bytesRead; + } + } } diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java index bc773a58..a3d34a4b 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java @@ -21,6 +21,7 @@ import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Level; @@ -102,7 +103,7 @@ public void run() { public final static String LATEST_TIMESTAMP = "_LATEST"; public final static String OFF = "OFF"; - protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); protected Date startDate, endDate; class BlockSize diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java index fe05701c..7d2ff212 100644 --- a/src/main/java/org/archive/io/RecordingOutputStream.java +++ b/src/main/java/org/archive/io/RecordingOutputStream.java @@ -242,6 +242,26 @@ public void write(int b) throws IOException { checkLimits(); } + private int findMessageBodyBeginMark(byte[] b, int off, int len) { + if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r') + && len >= 1 && b[off] == '\n') { + return 1; + } else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') { + return 2; + } + + for (int i = off; i < off + len - 1; i++) { + if (b[i] == '\n' && b[i+1] == '\n') { + return i + 2; + } else if (b[i] == '\n' && b[i+1] == '\r' + && i + 2 < off + len && b[i+2] == '\n') { + return i + 3; + } + } + + return -1; + } + public void write(byte[] b, int off, int len) throws IOException { if(position < maxPosition) { if(position+len<=maxPosition) { @@ -255,20 +275,35 @@ public void write(byte[] b, int off, int len) throws IOException { off += consumeRange; len -= consumeRange; } - - // see comment on int[] lastTwoBytes - while (messageBodyBeginMark < 0 && len > 0) { - write(b[off]); - off++; - len--; + + if (messageBodyBeginMark < 0) { + // see comment on int[] lastTwoBytes + int mark = findMessageBodyBeginMark(b, off, len); + if (mark > 0) { + if(recording) { + record(b, off, mark - off); + } + if (this.out != null) { + this.out.write(b, off, mark - off); + } + markMessageBodyBegin(); + len = len - (mark - off); + off = mark; + } } - + if(recording) { record(b, off, len); } if (this.out != null) { this.out.write(b, off, len); } + if (len >= 1) { + lastTwoBytes[1] = b[off + len - 1]; + if (len >= 2) { + lastTwoBytes[0] = b[off + len - 2]; + } + } checkLimits(); } diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index e7dc1625..44437ed7 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -147,11 +147,11 @@ protected ArchiveReader getArchiveReader(final String arc, possiblyWrapped.mark(100); boolean compressed = testCompressedARCStream(possiblyWrapped); possiblyWrapped.reset(); - + if (compressed) { return new CompressedARCReader(arc, possiblyWrapped, atFirstRecord); } else { - return new UncompressedARCReader(arc, possiblyWrapped); + return new UncompressedARCReader(arc, possiblyWrapped, atFirstRecord); } } @@ -330,10 +330,11 @@ public UncompressedARCReader(final File f, final long offset) * @param f Uncompressed arc to read. * @param is InputStream. */ - public UncompressedARCReader(final String f, final InputStream is) { + public UncompressedARCReader(final String f, final InputStream is, boolean atFirstRecord) { // Arc file has been tested for existence by time it has come // to here. setIn(new CountingInputStream(is)); + setAlignedOnFirstRecord(atFirstRecord); initialize(f); } } diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index 9c6c7e77..c3e5baa0 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -100,12 +100,20 @@ public static ArchiveReader get(final String s, final InputStream is, atFirstRecord); } + /* + * Note that the ARC companion does this differently, with quite a lot of duplication. + * + * @see org.archive.io.arc.ARCReaderFactory.getArchiveReader(String, InputStream, boolean) + */ protected ArchiveReader getArchiveReader(final String f, final InputStream is, final boolean atFirstRecord) throws IOException { - // For now, assume stream is compressed. Later add test of input - // stream or handle exception thrown when figure not compressed stream. - return new CompressedWARCReader(f, is, atFirstRecord); + // Check if it's compressed, based on file extension. + if( f.endsWith(".gz") ) { + return new CompressedWARCReader(f, is, atFirstRecord); + } else { + return new UncompressedWARCReader(f, is); + } } public static WARCReader get(final URL arcUrl, final long offset) diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index b9558263..e2d28ee9 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -245,10 +245,11 @@ public void writeRecord(WARCRecordInfo recordInfo) write(bytes); totalBytes += bytes.length; + // Write out the header/body separator. + write(CRLF_BYTES); + totalBytes += CRLF_BYTES.length; + if (recordInfo.getContentStream() != null && recordInfo.getContentLength() > 0) { - // Write out the header/body separator. - write(CRLF_BYTES); // TODO: should this be written even for zero-length? - totalBytes += CRLF_BYTES.length; contentBytes += copyFrom(recordInfo.getContentStream(), recordInfo.getContentLength(), recordInfo.getEnforceLength()); diff --git a/src/main/java/org/archive/resource/AbstractResource.java b/src/main/java/org/archive/resource/AbstractResource.java index 409e7408..301c53d4 100755 --- a/src/main/java/org/archive/resource/AbstractResource.java +++ b/src/main/java/org/archive/resource/AbstractResource.java @@ -5,7 +5,7 @@ import org.archive.util.StreamCopy; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; public abstract class AbstractResource implements Resource { protected ResourceContainer container; @@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept // out.println("Headers Before"); // out.print(m.toString()); - long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream()); + long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream()); out.println("Resource Was:"+bytes+" Long"); out.println("[\n]Headers After"); diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java index 5d63fd4d..b6e0a1c1 100644 --- a/src/main/java/org/archive/resource/arc/ARCResource.java +++ b/src/main/java/org/archive/resource/arc/ARCResource.java @@ -18,8 +18,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class ARCResource extends AbstractResource @@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container, fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength()); countingIS = new CountingInputStream( - new LimitInputStream(raw, arcMetaData.getLength())); + ByteStreams.limit(raw, arcMetaData.getLength())); try { digIS = new DigestInputStream(countingIS, diff --git a/src/main/java/org/archive/resource/http/HTTPResponseResource.java b/src/main/java/org/archive/resource/http/HTTPResponseResource.java index b5d189bc..cc325427 100644 --- a/src/main/java/org/archive/resource/http/HTTPResponseResource.java +++ b/src/main/java/org/archive/resource/http/HTTPResponseResource.java @@ -7,7 +7,6 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Logger; - import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; import org.archive.format.http.HttpResponseMessage; @@ -20,8 +19,8 @@ import org.archive.util.io.EOFNotifyingInputStream; import org.archive.util.io.EOFObserver; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; @@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData, headers.putString(h.getName(),h.getValue()); } if(forceCheck && (length >= 0)) { - LimitInputStream lis = new LimitInputStream(response, length); + InputStream lis = ByteStreams.limit(response, length); countingIS = new CountingInputStream(lis); } else { countingIS = new CountingInputStream(response); diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index ab9b6900..80929206 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -19,8 +19,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants { CountingInputStream countingIS; @@ -51,7 +51,7 @@ public WARCResource(MetaData metaData, ResourceContainer container, if(length >= 0) { countingIS = new CountingInputStream( - new LimitInputStream(response, length)); + ByteStreams.limit(response, length)); } else { throw new ResourceParseException(null); } diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index c09ad6e6..5f39ce76 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -74,15 +74,15 @@ public void canonicalize(HandyURL url) { url.setPath(escapeOnce(normalizePath(path))); } - private static final Pattern SINGLE_FORWARDSLASH_PATTERN = Pattern - .compile("/"); + private static final Pattern SINGLE_FORWARDANDBACKSLASH_PATTERN = Pattern + .compile("[/\\\\]"); public String normalizePath(String path) { if (path == null) { path = "/"; } else { // -1 gives an empty trailing element if path ends with '/': - String[] paths = SINGLE_FORWARDSLASH_PATTERN.split(path, -1); + String[] paths = SINGLE_FORWARDANDBACKSLASH_PATTERN.split(path, -1); ArrayList keptPaths = new ArrayList(); boolean first = true; for (String p : paths) { diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 807333d3..e1cea9b7 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -211,7 +211,7 @@ protected void setURI() { if (_scheme.length == 4 && Arrays.equals(_scheme, HTTP_SCHEME)) { _scheme = HTTP_SCHEME; } else if (_scheme.length == 5 - && Arrays.equals(_scheme, HTTP_SCHEME)) { + && Arrays.equals(_scheme, HTTPS_SCHEME)) { _scheme = HTTPS_SCHEME; } } diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 930f5b34..c5505a74 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -101,7 +101,7 @@ public static String hostToPublicSuffix(String host) { InternetDomainName idn; try { - idn = InternetDomainName.fromLenient(host); + idn = InternetDomainName.from(host); } catch(IllegalArgumentException e) { return host; } @@ -109,7 +109,7 @@ public static String hostToPublicSuffix(String host) { if(tmp == null) { return host; } - String pubSuff = tmp.name(); + String pubSuff = tmp.toString(); int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2)); if(idx == -1) { return host; diff --git a/src/main/java/org/archive/url/UsableURI.java b/src/main/java/org/archive/url/UsableURI.java index b9c4ff9d..ed40f41a 100644 --- a/src/main/java/org/archive/url/UsableURI.java +++ b/src/main/java/org/archive/url/UsableURI.java @@ -18,6 +18,7 @@ */ package org.archive.url; +import gnu.inet.encoding.IDNA; import java.io.File; import java.io.IOException; import java.io.ObjectOutputStream; @@ -271,6 +272,55 @@ public String toString() { return toCustomString(); } + /** + * In the case of a puny encoded IDN, this method returns the decoded Unicode version. + *

+ * Most of this implementation is copied from {@link org.apache.commons.httpclient.URI#setURI()}. + * + * @return decoded IDN version of URI + */ + public String toUnicodeHostString() { + if (!_is_hostname) { + return toString(); + } + + try { + StringBuilder buf = new StringBuilder(); + + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + if (_userinfo != null) { + buf.append(_userinfo).append('@'); + } + buf.append(IDNA.toUnicode(getHost())); + if (_port >= 0) { + buf.append(':').append(_port); + } + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + return buf.toString(); + } catch (URIException ex) { + throw new RuntimeException(ex); + } + } + public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 46b8e119..9118b850 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,7 +20,7 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.mg4j.util.MutableString; +import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java index c41c0bc0..e4224384 100644 --- a/src/main/java/org/archive/util/ArchiveUtils.java +++ b/src/main/java/org/archive/util/ArchiveUtils.java @@ -104,7 +104,7 @@ public class ArchiveUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } @@ -393,9 +393,9 @@ public static Date getDate(String d) throws ParseException { } final static SimpleDateFormat dateToTimestampFormats[] = - {new SimpleDateFormat("MM/dd/yyyy"), - new SimpleDateFormat("MM/yyyy"), - new SimpleDateFormat("yyyy")}; + {new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH), + new SimpleDateFormat("MM/yyyy", Locale.ENGLISH), + new SimpleDateFormat("yyyy", Locale.ENGLISH)}; /** * Convert a user-entered date into a timestamp diff --git a/src/main/java/org/archive/util/DateUtils.java b/src/main/java/org/archive/util/DateUtils.java index e7fe78b7..d01b63ce 100755 --- a/src/main/java/org/archive/util/DateUtils.java +++ b/src/main/java/org/archive/util/DateUtils.java @@ -65,7 +65,7 @@ public class DateUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 707f93c7..9061a161 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -36,8 +36,9 @@ import org.apache.commons.lang.StringEscapeUtils; -import com.google.common.base.Function; -import com.google.common.collect.MapMaker; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; public class TextUtils { private static final String FIRSTWORD = "^([^\\s]*).*$"; @@ -51,11 +52,11 @@ protected Map initialValue() { }; /** global soft-cache of Patterns, by string key */ - private static final ConcurrentMap PATTERNS = new MapMaker() + private static final LoadingCache PATTERNS = CacheBuilder.newBuilder() .concurrencyLevel(16) .softValues() - .makeComputingMap(new Function() { - public Pattern apply(String regex) { + .build(new CacheLoader() { + public Pattern load(String regex) { return Pattern.compile(regex); } }); @@ -84,7 +85,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) { final Map matchers = TL_MATCHER_MAP.get(); Matcher m = (Matcher)matchers.get(pattern); if(m == null) { - m = PATTERNS.get(pattern).matcher(input); + m = PATTERNS.getUnchecked(pattern).matcher(input); } else { matchers.put(pattern,null); m.reset(input); diff --git a/src/main/java/org/archive/util/TmpDirTestCase.java b/src/main/java/org/archive/util/TmpDirTestCase.java new file mode 100644 index 00000000..09ec345b --- /dev/null +++ b/src/main/java/org/archive/util/TmpDirTestCase.java @@ -0,0 +1,119 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.util; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; + + +/** + * Base class for TestCases that want access to a tmp dir for the writing + * of files. + * + * @author stack + */ +public abstract class TmpDirTestCase extends TestCase +{ + /** + * Name of the system property that holds pointer to tmp directory into + * which we can safely write files. + */ + public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir"; + + /** + * Default test tmp. + */ + public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" + + File.separator + "heritrix-junit-tests"; + + /** + * Directory to write temporary files to. + */ + private File tmpDir = null; + + + public TmpDirTestCase() + { + super(); + } + + public TmpDirTestCase(String testName) + { + super(testName); + } + + /* + * @see TestCase#setUp() + */ + protected void setUp() throws Exception { + super.setUp(); + this.tmpDir = tmpDir(); + } + + /** + * @return Returns the tmpDir. + */ + public File getTmpDir() + { + return this.tmpDir; + } + + /** + * Delete any files left over from previous run. + * + * @param basename Base name of files we're to clean up. + */ + public void cleanUpOldFiles(String basename) { + cleanUpOldFiles(getTmpDir(), basename); + } + + /** + * Delete any files left over from previous run. + * + * @param prefix Base name of files we're to clean up. + * @param basedir Directory to start cleaning in. + */ + public void cleanUpOldFiles(File basedir, String prefix) { + File [] files = FileUtils.getFilesWithPrefix(basedir, prefix); + if (files != null) { + for (int i = 0; i < files.length; i++) { + org.apache.commons.io.FileUtils.deleteQuietly(files[i]); + } + } + } + + + public static File tmpDir() throws IOException { + String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME); + tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr; + File tmpDir = new File(tmpDirStr); + FileUtils.ensureWriteableDirectory(tmpDir); + + if (!tmpDir.canWrite()) + { + throw new IOException(tmpDir.getAbsolutePath() + + " is unwriteable."); + } + + return tmpDir; + } +} diff --git a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java index 621c6bce..93757a45 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java @@ -6,7 +6,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class HDFSSeekableLineReader extends AbstractSeekableLineReader { private FSDataInputStream fsdis; @@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { fsdis.seek(offset); if (maxLength >= 0) { - return new LimitInputStream(fsdis, maxLength); + return ByteStreams.limit(fsdis, maxLength); } else { return fsdis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java index b211db16..5131dd06 100644 --- a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java @@ -7,7 +7,7 @@ import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader { @@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { FileInputStream fis = new FileInputStream(raf.getFD()); if (maxLength > 0) { - return new LimitInputStream(fis, maxLength); + return ByteStreams.limit(fis, maxLength); } else { return fis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java index 9bd7542b..bc5b83f4 100644 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.Locale; import java.util.logging.Logger; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; @@ -156,7 +157,7 @@ public boolean isStaleChecking() public long getModTime() { HTTPSeekableLineReader reader = null; - SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz"); + SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH); try { reader = get(); diff --git a/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java new file mode 100644 index 00000000..14f8489d --- /dev/null +++ b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java @@ -0,0 +1,28 @@ +package org.archive.extract; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLEncoder; + +import junit.framework.TestCase; + + +public class RealCDXExtractorOutputTest extends TestCase { + + public void testEscapeResolvedUrl() throws Exception { + String context ="http://www.uni-giessen.de/cms/studium/dateien/informationberatung/merkblattpdf"; + String spec = "http://fss.plone.uni-giessen.de/fß/studium/dateien/informationberatung/merkblattpdf/file/Mérkblatt zur Gestaltung von Nachteilsausgleichen.pdf?föo=bar#änchor"; + String escaped = RealCDXExtractorOutput.resolve(context, spec); + assertTrue(escaped.indexOf(" ") < 0); + URI parsed = new URI(escaped); + assertEquals("änchor", parsed.getFragment()); + } + + public void testNoDoubleEscaping() throws Exception { + String spec = "https://www.google.com/search?q=java+escape+url+spaces&ie=utf-8&oe=utf-8"; + String resolved = RealCDXExtractorOutput.resolve(spec, spec); + assertTrue(spec.equals(resolved)); + } +} diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java index 95c7e96f..2eec46ec 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java @@ -374,6 +374,9 @@ public void testAutoSkip() throws IOException { assertNull(m); assertTrue(s.gotEOF()); } - + public void testWgetProblem() throws IndexOutOfBoundsException, FileNotFoundException, IOException { + InputStream is = getClass().getResourceAsStream("IAH-urls-wget.warc.gz"); + new GZIPDecoder().parseHeader(is); + } } diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java index 5cd75ccf..483d2baf 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java @@ -12,8 +12,8 @@ public class GZIPMemberWriterTest extends TestCase { public void testWrite() throws IOException { - String outPath = "/tmp/tmp.gz"; - GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(new File(outPath))); + File outFile = File.createTempFile("tmp", ".gz"); + GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(outFile)); gzw.write(new ByteArrayInputStream("Here is record 1".getBytes(IAUtils.UTF8))); gzw.write(new ByteArrayInputStream("Here is record 2".getBytes(IAUtils.UTF8))); } diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java new file mode 100644 index 00000000..f697ff31 --- /dev/null +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -0,0 +1,360 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.archive.util.Base32; +import org.archive.util.TmpDirTestCase; + + +/** + * Test casesfor RecordingOutputStream. + * + * @author stack + */ +public class RecordingOutputStreamTest extends TmpDirTestCase +{ + /** + * Size of buffer used in tests. + */ + private static final int BUFFER_SIZE = 5; + + /** + * How much to write total to testing RecordingOutputStream. + */ + private static final int WRITE_TOTAL = 10; + + + /* + * @see TmpDirTestCase#setUp() + */ + protected void setUp() throws Exception + { + super.setUp(); + } + + /** + * Test reusing instance of RecordingOutputStream. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testReuse() + throws IOException + { + final String BASENAME = "testReuse"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Bkg.txt")).getAbsolutePath()); + for (int i = 0; i < 3; i++) + { + reuse(BASENAME, ros, i); + } + } + + private void reuse(String baseName, RecordingOutputStream ros, int index) + throws IOException + { + final String BASENAME = baseName + Integer.toString(index); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(int). + * + * Uses small buffer size and small write size. Test mark and reset too. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWriteint() + throws IOException + { + final String BASENAME = "testWriteint"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(byte []). + * + * Uses small buffer size and small write size. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWritebytearray() + throws IOException + { + final String BASENAME = "testWritebytearray"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Test mark and reset. + * @throws IOException + */ + public void testMarkReset() throws IOException + { + final String BASENAME = "testMarkReset"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + ReplayInputStream ris = ros.getReplayInputStream(); + ris.mark(10 /*Arbitrary value*/); + // Read from the stream. + ris.read(); + ris.read(); + ris.read(); + // Reset it. It should be back at zero. + ris.reset(); + assertEquals("Reset to zero", ris.read(), 0); + assertEquals("Reset to zero char 1", ris.read(), 1); + assertEquals("Reset to zero char 2", ris.read(), 2); + // Mark stream. Here. Next character should be '3'. + ris.mark(10 /* Arbitrary value*/); + ris.read(); + ris.read(); + ris.reset(); + assertEquals("Reset to zero char 3", ris.read(), 3); + } + + /** + * Record a file write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes a character at a time. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeIntRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + for (int i = 0; i < WRITE_TOTAL; i++) + { + ros.write(i); + } + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Record a file byte array write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes using a byte array. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeByteRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + byte [] b = new byte[size]; + for (int i = 0; i < size; i++) + { + b[i] = (byte)i; + } + ros.write(b); + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Verify what was written is both in the file written to and in the + * recording stream. + * + * @param ros Stream to check. + * @param f File that was recorded. Stream should have its content + * exactly. + * @param size Amount of bytes written. + * + * @exception IOException Failure reading streams. + */ + private void verifyRecording(RecordingOutputStream ros, File f, + int size) throws IOException + { + assertEquals("Recorded file size.", size, f.length()); + FileInputStream fis = new FileInputStream(f); + assertNotNull("FileInputStream not null", fis); + ReplayInputStream ris = ros.getReplayInputStream(); + assertNotNull("ReplayInputStream not null", ris); + for (int i = 0; i < size; i++) + { + assertEquals("ReplayInputStream content verification", i, + ris.read()); + assertEquals("Recorded file content verification", i, + fis.read()); + } + assertEquals("ReplayInputStream at EOF", -1, ris.read()); + fis.close(); + ris.close(); + } + + public void testMessageBodyBegin() throws IOException { + final String BASENAME = "testMessageBodyBegin"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + ros.setSha1Digest(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\r\n\r\nabcdefghij".getBytes()); + assertEquals(14, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\r'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + } +} diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java new file mode 100644 index 00000000..0721f795 --- /dev/null +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -0,0 +1,57 @@ +package org.archive.io.arc; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.RandomAccessFile; + +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import junit.framework.TestCase; + +/** + * + * Based on https://github.com/iipc/openwayback/pull/104/files + * + * @author csr@statsbiblioteket.dk (Colin Rosenthal) + * + */ +public class ARCReaderFactoryTest extends TestCase { + + private File testfile1 = new File("src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"); + + /** + * Test reading uncompressed arcfile for issue + * https://github.com/iipc/openwayback/issues/101 + * @throws Exception + */ + public void testGetResource() throws Exception { + this.offsetResourceTest(testfile1, 1515, "http://www.archive.org/robots.txt" ); + this.offsetResourceTest(testfile1, 36420, "http://www.archive.org/services/collection-rss.php" ); + } + + private void offsetResourceTest( File testfile, long offset, String uri ) throws Exception { + RandomAccessFile raf = new RandomAccessFile(testfile, "r"); + raf.seek(offset); + InputStream is = new FileInputStream(raf.getFD()); + String fPath = testfile.getAbsolutePath(); + ArchiveReader reader = ARCReaderFactory.get(fPath, is, false); + // This one works: + //ArchiveReader reader = ARCReaderFactory.get(testfile, offset); + ArchiveRecord record = reader.get(); + + final String url = record.getHeader().getUrl(); + assertEquals("URL of record is not as expected.", uri, url); + + final long position = record.getPosition(); + final long recordLength = record.getHeader().getLength(); + assertTrue("Position " + position + " is after end of record " + recordLength, position <= recordLength); + + // Clean up: + if( raf != null ) + raf.close(); + } + +} diff --git a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java new file mode 100644 index 00000000..25028797 --- /dev/null +++ b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java @@ -0,0 +1,34 @@ +package org.archive.io.warc; + +import java.io.FileInputStream; +import java.io.IOException; + +import org.archive.format.warc.WARCConstants; +import org.archive.format.warc.WARCConstants.WARCRecordType; +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import junit.framework.TestCase; + +public class WARCReaderFactoryTest extends TestCase { + + // Test files: + String[] files = new String[] { + "src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz", + "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc" + }; + + public void testGetStringInputstreamBoolean() throws IOException { + // Check the test files can be opened: + for( String file : files ) { + FileInputStream is = new FileInputStream(file); + ArchiveReader ar = WARCReaderFactory.get(file, is, true); + ArchiveRecord r = ar.get(); + String type = (String) r.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE); + // Check the first record comes out as a 'warcinfo' record. + assertEquals(WARCRecordType.warcinfo.name(), type); + } + } + + +} diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index b88acb6d..ca6e6408 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -36,6 +36,7 @@ */ public class PublicSuffixesTest extends TestCase { // test of low level implementation + private final String NL = System.getProperty("line.separator"); public void testCompare() { Node n = new Node("hoge"); @@ -78,27 +79,26 @@ public void testTrie1() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); } public void testTrie2() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); } public void testTrie3() { @@ -107,11 +107,11 @@ public void testTrie3() { alt.addBranch("ac,!hoge,"); alt.addBranch("ac,*,"); // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); } // test of higher-level functionality diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 2aec0e96..2a2f41f5 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -21,7 +21,6 @@ import java.net.URISyntaxException; import org.apache.commons.httpclient.URIException; -import org.archive.url.UsableURI; import junit.framework.TestCase; @@ -53,4 +52,31 @@ public void testSchemalessRelative() throws URIException { UsableURI test = new UsableURI(base, relative); assertEquals("http://www.facebook.com/?href=http://www.archive.org/a", test.toString()); } + + /** + * Test of toUnicodeHostString method, of class UsableURI. + */ + public void testToUnicodeHostString() throws URIException { + assertEquals("http://øx.dk", new UsableURI("http://xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("xn--x-4ga.dk", new UsableURI("xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user:pass@øx.dk:8080", new UsableURI("http://user:pass@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user@øx.dk:8080", new UsableURI("http://user@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://øx.dk/foo/bar?query=q", new UsableURI("http://xn--x-4ga.dk/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://127.0.0.1/foo/bar?query=q", new UsableURI("http://127.0.0.1/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + + // test idn round trip + // XXX fails because idn is not handled here (it is converted to punycode in UsableURIFactory.fixupDomainlabel()) + // assertEquals("http://øx.dk", new UsableURI("http://øx.dk", false, "UTF-8").toUnicodeHostString()); + // To check the round trip it is then necessary to use the factory method in UsableURIFactory. + assertEquals("http://øx.dk/", UsableURIFactory.getInstance("http://øx.dk/", "UTF-8").toUnicodeHostString()); + + // non-idn domain name + assertEquals("http://example.org", new UsableURI("http://example.org", true, "UTF-8").toUnicodeHostString()); + + // ensure a call to toUnicodeHostString() has no effect on toString() + UsableURI uri = new UsableURI("http://xn--x-4ga.dk", true, "UTF-8"); + assertEquals("http://øx.dk", uri.toUnicodeHostString()); + uri.setPath(uri.getPath()); // force toString() cached value to be recomputed + assertEquals("http://xn--x-4ga.dk", uri.toString()); + } } diff --git a/src/test/java/org/archive/util/ArchiveUtilsTest.java b/src/test/java/org/archive/util/ArchiveUtilsTest.java index 8251615a..586a1821 100644 --- a/src/test/java/org/archive/util/ArchiveUtilsTest.java +++ b/src/test/java/org/archive/util/ArchiveUtilsTest.java @@ -229,16 +229,19 @@ public void testByteArrayEquals() { /** test doubleToString() */ public void testDoubleToString(){ - double test = 12.345; - assertTrue( + double test = 12.121d; + assertEquals( "cecking zero precision", - ArchiveUtils.doubleToString(test, 0).equals("12")); - assertTrue( + "12", + ArchiveUtils.doubleToString(test, 0)); + assertEquals( "cecking 2 character precision", - ArchiveUtils.doubleToString(test, 2).equals("12.34")); - assertTrue( + "12.12", + ArchiveUtils.doubleToString(test, 2)); + assertEquals( "cecking precision higher then the double has", - ArchiveUtils.doubleToString(test, 65).equals("12.345")); + "12.121", + ArchiveUtils.doubleToString(test, 65)); } diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 2c9d19e8..8f812b75 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -25,7 +25,7 @@ private void createFile(File target, int max) throws FileNotFoundException { public void testGetRecordIteratorStringBoolean() throws IOException { - File test = new File("/tmp/test.tmp"); + File test = File.createTempFile("test", null); int max = 1000000; createFile(test,max); RandomAccessFileSeekableLineReaderFactory factory = diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index f1c2a0ec..11ea1229 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; +import java.io.IOException; import java.io.PrintWriter; import java.util.Comparator; @@ -11,21 +12,11 @@ public class SortedCompositeIteratorTest extends TestCase { - public void testHasNext() throws FileNotFoundException { + public void testHasNext() throws FileNotFoundException, IOException { - long t = 210000; - long c = 134; - float f = (float)c / (float)t; - System.err.format("F(%f)\n",f); + File a = File.createTempFile("filea", null); + File b = File.createTempFile("fileb", null); - File a = new File("/tmp/a"); - File b = new File("/tmp/b"); - if(a.isFile()) { - a.delete(); - } - if(b.isFile()) { - b.delete(); - } PrintWriter apw = new PrintWriter(a); PrintWriter bpw = new PrintWriter(b); apw.println("1"); @@ -38,6 +29,7 @@ public void testHasNext() throws FileNotFoundException { BufferedReader bbr = new BufferedReader(new FileReader(b)); SortedCompositeIterator sci = new SortedCompositeIterator(new Comparator() { + @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java index d3dc1ff6..710ff069 100644 --- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java +++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java @@ -30,7 +30,7 @@ import org.archive.util.ArchiveUtils; import org.archive.util.zip.GZIPMembersInputStream; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; /** @@ -70,14 +70,14 @@ public static void main(String [] args) { public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); } public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); } @@ -85,31 +85,31 @@ public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); - int count0 = IOUtils.copy(gzin, new NullOutputStream()); + int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count1 = IOUtils.copy(gzin, new NullOutputStream()); + int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -118,14 +118,14 @@ public void testReadPerMemberSixSmall() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); gzin.nextMember(); } - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -172,19 +172,19 @@ public void testMemberSeek() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -195,7 +195,7 @@ public void testMemberIterator() throws IOException { Iterator iter = gzin.memberIterator(); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); - int count0 = IOUtils.copy(gzMember0, new NullOutputStream()); + int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); @@ -203,7 +203,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); - int count1 = IOUtils.copy(gzMember1, new NullOutputStream()); + int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); @@ -211,7 +211,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); - int count2 = IOUtils.copy(gzMember2, new NullOutputStream()); + int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); @@ -219,7 +219,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); - int count3 = IOUtils.copy(gzMember3, new NullOutputStream()); + int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); diff --git a/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc new file mode 100644 index 00000000..3cbffb81 --- /dev/null +++ b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc @@ -0,0 +1,1006 @@ +filedesc://IAH-20080430204825-00000-blackbook-truncated.arc 0.0.0.0 20080430204825 text/plain 1300 +1 1 InternetArchive +URL IP-address Archive-date Content-type Archive-length + + +Heritrix @VERSION@ http://crawler.archive.org +blackbook +192.168.1.13 +archive.org-shallow +archive.org shallow +Admin +2008-04-30T20:48:24+00:00 +Mozilla/5.0 (compatible; heritrix/1.14.0 +http://crawler.archive.org) +archive-crawler-agent@lists.sourceforge.net +classic +ARC file version 1.1 +http://www.archive.org/web/researcher/ArcFileFormat.php + +dns:www.archive.org 68.87.76.178 20080430204825 text/dns 56 +20080430204825 +www.archive.org. 589 IN A 207.241.229.39 +http://www.archive.org/robots.txt 207.241.229.39 20080430204825 text/plain 782 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:24 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sat, 02 Feb 2008 19:40:44 GMT +ETag: "47c3-1d3-11134700" +Accept-Ranges: bytes +Content-Length: 467 +Connection: close +Content-Type: text/plain; charset=UTF-8 + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## +User-agent: * +Disallow: /nothing---please-crawl-us-- + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Crawl-Delay: 10 +http://www.archive.org/ 207.241.229.39 20080430204826 text/html 680 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Wed, 09 Jan 2008 23:18:29 GMT +ETag: "47ac-16e-4f9e5b40" +Accept-Ranges: bytes +Content-Length: 366 +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + +
+Please visit our website at: +http://www.archive.org + + +http://www.archive.org/index.php 207.241.229.39 20080430204826 text/html 29000 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Set-Cookie: PHPSESSID=657fa9749e9426f2ffa75f14b54ed4ac; path=/; domain=.archive.org +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + Internet Archive + + + + + + + + + + + + +
+ (logo) + + + + + + +
+ +Web | +Moving Images | +Texts | +Audio | +Software | +Education | +Patron Info | +About IA
Forums | FAQs | Contributions | Jobs | Donate
+ + + + + +
+

+ Search: + + + + + + + +

+
+
+
+ (navigation image)
+

+ + + + + + + + + + + + + + + + +
UploadAnonymous User (login or join us) 
   +
Announcements (more)
+
   +
Web85 billion pages
+
+
+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + + Advanced Search + +
+
+
+
   +
Welcome to the ArchiveSee recent additions in RSS
+
+
+The Internet Archive is building a digital library of Internet + sites and other cultural artifacts in digital form. Like a paper + library, we provide free access to researchers, historians, + scholars, and the general public.
+
  
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
   +
Moving Images
 115,646 movies
movies icon
+
+
+
Browse   + + (by keyword)
+
   +
Live Music Archive
 48,893 concerts
etree icon
+
+
+
Browse   + + (by band)
+
   +
Audio
 250,854 recordings
audio icon
+
+
+
Browse   + + (by keyword)
+
   +
Texts
 395,004 texts
texts icon
+
+
+
Browse   + + (by keyword)
+
  
   +
Curator's Choice (more)
+
(movies pick)
A Few Good G-Men
Randall Glass, the maker of "Warthog Jump," re-creates in "A Few Good G-Men" an entire scene from...
+
   +
Curator's Choice (more)
+
(etree pick)
Grateful Dead Live at Nashville Municipal...
Set 1 Sugaree Beat It On Down The Line Candyman Me And My Uncle -> Big River Stagger Lee Looks Like...
+
   +
Curator's Choice (more)
+
(audio pick)
Zanstones - Slaakhuis: Live in Rotterdam, Holland
Zanstones confuses the dutch masses with this live display of wacked rhythms, whacked vocals, and...
+
   +
Curator's Choice (more)
+
  
   +
Recent Reviews
+
   +
Recent Reviews
+
   +
Recent Reviews
+
   +
Recent Reviews
+
  
+ + +
+ + + +
   +
+ + + + + + + + + + + + + +
Most recent posts (write a post by going to a forum) more...
Subject Poster Forum RepliesViewsDate
Re: Making a mix for a chick I know... William Tell GratefulDead 0 6 20 minutes ago
Re: Bob's shorts not going into archives BobsShortShorts GratefulDead 0 9 26 minutes ago
Re: Thanks to All airgarcia416 GratefulDead 0 5 26 minutes ago
Re: Bob's shorts not going into archives sydthecat2 GratefulDead 0 8 36 minutes ago
Re: What is the worst-reviewed feature film on IA? RipJarvis feature_films 0 9 50 minutes ago
Re: Playin' In The Band...all day and all night sydthecat2 GratefulDead 0 11 58 minutes ago
Re: Playin' In The Band...all day and all night rastamon GratefulDead 0 16 1 hour ago
Re: Making a mix for a chick I know... caspersvapors GratefulDead 1 11 1 hour ago
Re: Bob's shorts not going into archives rastamon GratefulDead 0 11 1 hour ago
Re: Bob's shorts not going into archives bluedevil GratefulDead 1 13 1 hour ago
+
  
+ + +
  + 
+ + +
+

Skin: classic | columns | custom!
+ + Terms of Use (10 Mar 2001) +

+ +http://www.archive.org/images/logoc.jpg 207.241.229.39 20080430204829 image/jpeg 1963 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:28 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Mon, 16 Jun 2003 22:28:51 GMT +ETag: "34dc-67e-2ed02ec0" +Accept-Ranges: bytes +Content-Length: 1662 +Connection: close +Content-Type: image/jpeg + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<)w@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C;http://www.archive.org/images/star.png 207.241.229.39 20080430204830 image/png 564 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sun, 27 Feb 2005 21:35:31 GMT +ETag: "358d-109-f15f4ec0" +Accept-Ranges: bytes +Content-Length: 265 +Connection: close +Content-Type: image/png + +PNG + + IHDR e^|3PLTEXtRNS@fbKGDH pHYs  tIME; BFoOIDATUQ0P霓?P1~@a 9?C31aѻ__ucTCԑJٕ4xW{2~aIENDB`http://www.archive.org/services/collection-rss.php 207.241.229.39 20080430204830 text/xml 50832 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Connection: close +Content-Type: text/xml + + + + + http://www.archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + tracey@archive.org + Wed, 30 Apr 2008 20:48:29 GMT + + http://www.archive.org/images/logo.jpg + Internet Archive + http://www.archive.org + + + ArtTECHtonic 5 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Arttechtonic5&mediatype=audio&collection=opensource_audio"/><p>An interview with Gretchen Wagner, General Counsel and Secretary of ARTstor on Fair Use.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Arttechtonic5 + http://www.archive.org/details/Arttechtonic5 + Wed, 30 Apr 2008 20:44:20 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + fair_use, ARTstor, libraries + + + ۩۞۩ جبريل يسأل والنبى يجيب - ترجمه الأمام مسلم (30-4-2008)۩۞۩ للشيخ محمد حسان ۩۞۩ + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=55322&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 24Kbps MP3, Cinepack, Metadata</p> + http://www.archive.org/details/55322 + http://www.archive.org/details/55322 + Wed, 30 Apr 2008 20:43:16 GMT + movies/opensource_movies + + + + alsrdaab + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=alsrdaab_125&mediatype=Other&collection=ourmedia"/><p>alsrdaab.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/alsrdaab_125 + http://www.archive.org/details/alsrdaab_125 + Wed, 30 Apr 2008 20:43:01 GMT + Other/ourmedia + + alsrdaab + + + DOC-DEBUT: Super Amigos + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_superamigos20080430&mediatype=movies&collection=opensource_movies"/><p>This action filled documentary follows five real-life "social wrestlers" in Mexico City who have capitalized on the popularity of Mexico's larger than life Lucha Libre wrestlers to fight for social justice rather than trophies..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 350Kb, MPEG4 60Kb, Metadata</p> + http://www.archive.org/details/linktv_superamigos20080430 + http://www.archive.org/details/linktv_superamigos20080430 + Wed, 30 Apr 2008 20:41:22 GMT + movies/opensource_movies + + + + erwews + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=reit987erfed&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media</p> + http://www.archive.org/details/reit987erfed + http://www.archive.org/details/reit987erfed + Wed, 30 Apr 2008 20:40:57 GMT + movies/opensource_movies + + + + quran + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=mohadart&mediatype=movies&collection=opensource_movies"/><p>walo had sa3a.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/mohadart + http://www.archive.org/details/mohadart + Wed, 30 Apr 2008 20:38:31 GMT + movies/opensource_movies + + + + asdas + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=asdas_725&mediatype=Image&collection=ourmedia"/><p>saa.</p><p>This item belongs to: Image/ourmedia.</p><p>This item has files of the following types: Metadata</p> + http://www.archive.org/details/asdas_725 + http://www.archive.org/details/asdas_725 + Wed, 30 Apr 2008 20:37:02 GMT + Image/ourmedia + + saas + + + nibrasukul + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=nibrasukul&mediatype=texts&collection=opensource"/><p>nibrasukul.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/nibrasukul + http://www.archive.org/details/nibrasukul + Wed, 30 Apr 2008 20:36:56 GMT + texts/opensource + + nibrasukul + + + rtyed + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hyu121&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media Audio</p> + http://www.archive.org/details/hyu121 + http://www.archive.org/details/hyu121 + Wed, 30 Apr 2008 20:36:53 GMT + movies/opensource_movies + + + + remomberfiler58.info + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=uictfwt&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/uictfwt + http://www.archive.org/details/uictfwt + Wed, 30 Apr 2008 20:21:58 GMT + movies/opensource_movies + + + + The committing magistrate, a treatise on the arrest, examination, bailing, and commitment of offenders, including fugitives from justice, with the remedial features of the writs of habeas corpus, certiorari, mandamus, and prohibition + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=committingmagist00flam&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/committingmagist00flam + http://www.archive.org/details/committingmagist00flam + Wed, 30 Apr 2008 03:14:26 GMT + texts/americana + + Police magistrates -- New York (State), Habeas corpus, Mandamus, Prohibition (Law), Appellate procedure -- New York (State) + + + Papers and addresses + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=papersaddresses00bras&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/papersaddresses00bras + http://www.archive.org/details/papersaddresses00bras + Wed, 30 Apr 2008 01:11:05 GMT + texts/americana + + Imperial federation, Great Britain -- Colonies + + + European years; the letters of an idle man + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=europeanyearslet00warn&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/europeanyearslet00warn + http://www.archive.org/details/europeanyearslet00warn + Tue, 29 Apr 2008 16:43:31 GMT + texts/americana + + Europe -- Description and travel, United States -- Description and travel + + + Cable and satellite carrier compulsory licenses : hearing before the Subcommittee on Intellectual Property and Judicial Administration of the Committee on the Judiciary, House of Representatives, One Hundred Third Congress, first session, on H.R. 759 and H.R. 1103 ... March 17, 1993 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=cablesatelliteca00unit&mediatype=texts&collection=americana"/><p>Includes bibliographical references.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/cablesatelliteca00unit + http://www.archive.org/details/cablesatelliteca00unit + Tue, 29 Apr 2008 16:28:47 GMT + texts/americana + + Cable television -- Licenses United States, Direct broadcast satellite television -- Licenses United States + + + Leinender + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Leinender&mediatype=Other&collection=ourmedia"/><p>TSOP Leinender.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/Leinender + http://www.archive.org/details/Leinender + Tue, 29 Apr 2008 06:59:06 GMT + Other/ourmedia + + TSOP + + + tribute + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=tribute_488&mediatype=audio&collection=opensource_audio"/><p>Tribute Yanni music Vocal: Nathan-Pacheco.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/tribute_488 + http://www.archive.org/details/tribute_488 + Tue, 29 Apr 2008 06:53:43 GMT + audio/opensource_audio + + Yanni, Tribute + + + La Voz de Brasil #012: Efemerides 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=RodrigoDubLaVozdeBrasil_012_Efemerides2008&mediatype=Audio&collection=ourmedia"/><p>¡Ogum yê! En el año en que el Sobrevivendo no Inferno de los Racionais MC's completa su decimo cumpleaño, la Voz de Brasil presenta algunas otras efemérides - empezando por João Gilberto y la primera invención de Brasil (la segunda fué del maestro Jorge Ben, pero eso queda para otro programa....</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 160Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + Tue, 29 Apr 2008 06:53:09 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + musica, brasil, brasileña, brazilian, music, psicodelia, psychodelic + + + Doubleknit Podcast #1 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=DoubleknitPodcast1&mediatype=audio&collection=opensource_audio"/><p>Debut podcast of the Doubleknit Twins..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/DoubleknitPodcast1 + http://www.archive.org/details/DoubleknitPodcast1 + Tue, 29 Apr 2008 06:51:57 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + audio/opensource_audio + + knit, knitting, yarn, seattle + + + etceterapodcast37 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=etceterapodcast37_831&mediatype=audio&collection=opensource_audio"/><p>Restaurantes repetidos, los lenguages que se pierden, trencito de gendarmería, trailer de la hamburgesa perfecta, la película fué Shaun of The Dead, guerra en la convención de Taekwondo, pez globo, hara-kiris, océanos del mundo, el efecto Rocky y un Adam Sandler, el punto en que las a....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/etceterapodcast37_831 + http://www.archive.org/details/etceterapodcast37_831 + Tue, 29 Apr 2008 06:49:31 GMT + http://creativecommons.org/licenses/by-nc-nd/2.5/ar/ + audio/opensource_audio + + Anhdres, Andres, Nahuel, Etcetera, Etc, Podcast, Español, Spanish, Buenos Aires, Argentina + + + midoz + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=22008-04-02.wwe0002&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Real Media</p> + http://www.archive.org/details/22008-04-02.wwe0002 + http://www.archive.org/details/22008-04-02.wwe0002 + Tue, 29 Apr 2008 06:47:41 GMT + movies/opensource_movies + + + + music9 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=music9&mediatype=audio&collection=opensource_audio"/><p>music9.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/music9 + http://www.archive.org/details/music9 + Tue, 29 Apr 2008 06:45:18 GMT + audio/opensource_audio + + music9 + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_741&mediatype=audio&collection=opensource_audio"/><p>Desire YANNI music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_741 + http://www.archive.org/details/desire_741 + Tue, 29 Apr 2008 06:44:53 GMT + audio/opensource_audio + + Desire * Yanni * Ender-Thomas + + + Live at Nelson Ledges Quarry Park on 2008-04-25 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-25.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Chicago-> 02. Hookahville-> 03. Chicago 04. Mississippi Steamboat 05. Sure Cure For the Blues* 06. Mexican Opera* 07. Sail Away 08. Washboard Annie 09. Serpentine 10. Tumblin' Set 2 (Disc 2): 01....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + Tue, 29 Apr 2008 06:44:36 GMT + etree/EkoostikHookah + + + + Live at Nelson Ledges Quarry Park on 2008-04-26 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-26.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Right Back Out in the Streets 02. Utopia 03. The Devil & Me 04. When the Sun Goes Down 05. Rocketman 06. Stuck In the Snow 07. John Henry 08. Green 09. Shadane Set 2 (Disc 2): 01. Ecstasy 02....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + Tue, 29 Apr 2008 06:44:23 GMT + etree/EkoostikHookah + + + + Burn C.C. + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BurnC.c&mediatype=movies&collection=opensource_movies"/><p>Burn C.C..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/BurnC.c + http://www.archive.org/details/BurnC.c + Tue, 29 Apr 2008 06:43:49 GMT + movies/opensource_movies + + + + + + + Burn C.C. + + + TOTD 29 APRIL JAM 17 - DEWI SHINTAW ATY - ICHSANUDIN NOORSY - MANTAN ANGGOTA DPR - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=visioning_641&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/visioning_641 + http://www.archive.org/details/visioning_641 + Tue, 29 Apr 2008 06:43:36 GMT + audio/opensource_audio + + pasfm + + + IrationVibrationShow-4-27-08-p3 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=IrationVibrationShow-4-27-08-p3&mediatype=audio&collection=opensource_audio"/><p>pt3 of.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + Tue, 29 Apr 2008 06:43:04 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + iration, vibration, luciano, toots, promo + + + Birthday Party + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BirthdayParty&mediatype=movies&collection=opensource_movies"/><p>Home movie of a birthday party and travels through India from the late 1960s. From Lostinlight.org.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Video, 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, MPEG1, MPEG2, Metadata, Thumbnail</p> + http://www.archive.org/details/BirthdayParty + http://www.archive.org/details/BirthdayParty + Tue, 29 Apr 2008 06:41:45 GMT + http://creativecommons.org/licenses/by-nc/3.0/ + movies/opensource_movies + + + + + + + + + + + rADIO + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=rADIO_538&mediatype=audio&collection=opensource_audio"/><p>rADIO rADIO rADIO rADIO rADIO.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/rADIO_538 + http://www.archive.org/details/rADIO_538 + Tue, 29 Apr 2008 06:40:23 GMT + audio/opensource_audio + + rADIO rADIO V + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_583&mediatype=audio&collection=opensource_audio"/><p>Desire Yanni music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_583 + http://www.archive.org/details/desire_583 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + Yanni * Desire + + + TOTD 29 APRIL JAM 12 - DEWI SHINTAW ATY - HAMDAN ZULVA - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_253&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_253 + http://www.archive.org/details/topic_253 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + pasfm + + + The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents&mediatype=audio&collection=opensource_audio"/><p>The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + Tue, 29 Apr 2008 06:38:38 GMT + audio/opensource_audio + + a + + + beethoven 9th + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Beethovenbeethoven9th&mediatype=Audio&collection=ourmedia"/><p>sample symphony music, test file.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata</p> + http://www.archive.org/details/Beethovenbeethoven9th + http://www.archive.org/details/Beethovenbeethoven9th + Tue, 29 Apr 2008 06:38:01 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + beethoven, 9th, symphony + + + KASDAMAM + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=KASDAMAM_444&mediatype=texts&collection=opensource"/><p>KASDAMAM.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/KASDAMAM_444 + http://www.archive.org/details/KASDAMAM_444 + Tue, 29 Apr 2008 06:37:53 GMT + texts/opensource + + KASDAMAM + + + James + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=James_278&mediatype=Other&collection=ourmedia"/><p>d.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/James_278 + http://www.archive.org/details/James_278 + Tue, 29 Apr 2008 06:37:47 GMT + Other/ourmedia + + s + + + Mosaic News - 04/28/08: World News From The Middle East + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_mosaic20080428&mediatype=movies&collection=opensource_movies"/><p>The Peabody Award-winning daily compilation of television news reports from the Middle East, including Egypt, Lebanon, Israel, Syria, the Palestinian Authority, Iraq and Iran..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 1.5Mbps, MPEG4 350Kb, MPEG4 60Kb, Metadata, iPod Video (MP4)</p> + http://www.archive.org/details/linktv_mosaic20080428 + http://www.archive.org/details/linktv_mosaic20080428 + Tue, 29 Apr 2008 06:37:38 GMT + movies/opensource_movies + + + + Fouth Wall Weekly #2 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Jesster_StateoftheArtandFredSoloFouthWallWeekly_2&mediatype=Audio&collection=ourmedia"/><p>In this weeks edition we go over : The end of countdown and Batman R.I.P preview Cloverfiled DVD Mortal Kombat aka the death of a franchise and GTA preview And other film news.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + Tue, 29 Apr 2008 06:36:06 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Comics, film, videogames + + + John 11:20-44 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=AlanDisbrowJohn11_20-44_0&mediatype=Audio&collection=ourmedia"/><p>Bible Study of John 11:20-44, Arise to a New Life, by Alan Disbrow..</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + Tue, 29 Apr 2008 06:34:13 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Christianity, Jesus, Bible Study, Calvary Chapel, John + + + day 6 potok + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Joshleo-day6Potok652-3&mediatype=movies&collection=bliptv"/><p>me and this cat don't get along.</p><p>This item belongs to: movies/bliptv.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash, Metadata, Quicktime, Thumbnail</p> + http://www.archive.org/details/Joshleo-day6Potok652-3 + http://www.archive.org/details/Joshleo-day6Potok652-3 + Tue, 29 Apr 2008 06:33:24 GMT + http://creativecommons.org/licenses/by-nc-sa/2.0/ + movies/bliptv + + + + + + + + + 4jkfhwjkl + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=4jkfhwjkl&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/4jkfhwjkl + http://www.archive.org/details/4jkfhwjkl + Tue, 29 Apr 2008 06:32:22 GMT + movies/opensource_movies + + + + 5knfwk + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=5knfwk&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/5knfwk + http://www.archive.org/details/5knfwk + Tue, 29 Apr 2008 06:31:57 GMT + movies/opensource_movies + + + + SA11 - Upheaval - Descending in Motion + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Sa11-Upheaval-DescendingInMotion&mediatype=audio&collection=opensource_audio"/><p>SA11 - Upheaval - Descending in Motion -------------------------------------- Descending in Motion -------------------------------------- Tom Maggio (also of Turmoil and Domestic Turmoil) brings us an EP of subtle sinking sounds and vaguely disturbing industrial echos....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, JPEG, Metadata, Ogg Vorbis, Text, VBR M3U, VBR MP3, VBR ZIP, ZIP</p> + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + Tue, 29 Apr 2008 06:31:51 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + dark, ambient, industrial + + + Vespa ride to Ufomammut - Smoke (3) + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=VespaRideToUfomammut-Smoke3&mediatype=movies&collection=opensource_movies"/><p>riding through London on my vespa, listening to Ufomammut, Smoke..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + Tue, 29 Apr 2008 06:31:05 GMT + http://creativecommons.org/licenses/by-nc-nd/2.0/uk/ + movies/opensource_movies + + + + vespa, london, ufomammut + + + NaturesLead_OV_04__LockedInAGraveyard + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=NaturesLead_OV_04__LockedInAGraveyard&mediatype=audio&collection=opensource_audio"/><p>In this Open Valley, I share my experience of getting locked in Rome's Protestant Cemetery where Keats and Shelley are buried..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + Tue, 29 Apr 2008 06:31:03 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + Keats, Shelley, cemetery + + + 20/20 Podcast #12A + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=2020Podcast12a&mediatype=audio&collection=opensource_audio"/><p>Discussion between Jamie and Scott about Google Android and the Open Handset Alliance. All things cellular are fair game..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/2020Podcast12a + http://www.archive.org/details/2020Podcast12a + Tue, 29 Apr 2008 06:30:26 GMT + audio/opensource_audio + + google, android, cell phone, gadget, samsung, htc, t-mobile + + + Urdu Poetry podcast + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=ZAhmedUrduPoetrypodcast_2&mediatype=Audio&collection=ourmedia"/><p>An Urdu poem by N M Rashid with english translation.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 256Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + Tue, 29 Apr 2008 06:30:23 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Urdu, Rashid, poetry + + + Beth Ann Turkey 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=SteveEasomBethAnnTurkey2008&mediatype=MovingImage&collection=ourmedia"/><p>2008 Rio Turkey taken By Beth Ann in Oklahoma.</p><p>This item belongs to: MovingImage/ourmedia.</p><p>This item has files of the following types: Metadata, QuickTime</p> + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + Tue, 29 Apr 2008 06:30:21 GMT + http://creativecommons.org/licenses/by/2.5/ + MovingImage/ourmedia + + Beth Ann, Turkey + + + TOTD 29 APRIL JAM 06 - DEWI SHINTAW ATY - AS HIKAM - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_462&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_462 + http://www.archive.org/details/topic_462 + Tue, 29 Apr 2008 06:30:12 GMT + audio/opensource_audio + + pasfm + + + hosam + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hosam_244&mediatype=movies&collection=opensource_movies"/><p>6w7u45r7.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, Thumbnail, Windows Media</p> + http://www.archive.org/details/hosam_244 + http://www.archive.org/details/hosam_244 + Tue, 29 Apr 2008 06:30:05 GMT + movies/opensource_movies + + + + + + + 57uy436 + + + free - destiny's child + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Free-DestinysChild&mediatype=audio&collection=opensource_audio"/><p>from destiny fufilled.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Free-DestinysChild + http://www.archive.org/details/Free-DestinysChild + Tue, 29 Apr 2008 06:30:01 GMT + audio/opensource_audio + + free, destiny's child + + + diff --git a/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz new file mode 100644 index 00000000..fa248f8d Binary files /dev/null and b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz differ diff --git a/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc new file mode 100644 index 00000000..1125fe98 --- /dev/null +++ b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc @@ -0,0 +1,3156 @@ +WARC/1.0 +WARC-Type: warcinfo +Content-Type: application/warc-fields +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-Filename: IAH-urls-wget.warc.gz +WARC-Block-Digest: sha1:I7UCIFZZDYO4O55ZOG6X5PRMVWMPZWMJ +Content-Length: 235 + +software: Wget/1.14 (darwin11.4.0) +format: WARC File Format 1.0 +conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf +robots: classic +wget-arguments: "-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPCUG5OU46Y5YHPTFCZLZV465AFPFJYY +Content-Length: 126 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:06Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:3L4DY55OVKT2IEHZEKOSIXRCQKJ7MNIE +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 435 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:06 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/robots.txt +Expires: Tue, 22 Oct 2013 03:53:06 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RQBBTMHS45XDYLYGRCT7YQ7P3UORCEQU +Content-Length: 122 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:ORAXOWRNZAEDKBOJUW2PYNLDX2LRDCBK +WARC-Payload-Digest: sha1:ARS5OJBVROJW62M7JMB3BCHEUUEBVMJK +Content-Type: application/http;msgtype=response +Content-Length: 1014 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/plain +Content-Length: 727 +Last-Modified: Mon, 21 Oct 2013 18:55:18 GMT +Connection: keep-alive +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 +Accept-Ranges: bytes + + +Sitemap: http://archive.org/sitemap/sitemap.xml + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## + + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Disallow: /control/ +Disallow: /report/ + + +User-agent: * +Disallow: /control/ +Disallow: /report/ +Disallow: /details/goldenbull2007john/ +Disallow: /stream/goldenbull2007john/ +Disallow: /download/goldenbull2007john/ +Disallow: /14/items/goldenbull2007john/goldenbull2007john_djvu.txt + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:GCYSQOYQGB7JDB57XMUYWFQERAKMNEQQ +Content-Length: 116 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/ +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:WDSM4DEMHGZEOPEG2HMQAIUBQJ6WRRN5 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPMG7AGNNEDLYK5UOOZLLRHPI4JLEC3U +Content-Length: 121 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RYQILVXCYAVUO7TRRO7CQ7VYKSD4COHM +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:08 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:09Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:G6KJJNG7G7HVRFGJJZ7ELDMO2ZZEX4WR +Content-Length: 112 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:09Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:VRAITOLIHCUNC5A7LDUBFHDSYQCUO7JM +WARC-Payload-Digest: sha1:WDT537KNDSUIRPB7R56KBDX3K77IR7W3 +Content-Type: application/http;msgtype=response +Content-Length: 30849 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:09 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Set-Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87; path=/; domain=.archive.org + +7756 + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
+ Universal Access To All Knowledge
+ + + + + + + + + + + + + +
+ Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
+ + + +
+ + + + + + + + + + + + + + + + + +
+
+ Search: + + + + + + + + Advanced Search +
+
+ + Anonymous User + + (login + or + + join us) + + +
Upload
+
+ +
+ + + + +
+ + + +
+
+

+
+ 361 billion pages +
+ Web +

+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + more info +
+
+
+
+ + +
+

+
+ See recent additions in RSS +
+ Welcome to the Archive +

+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+ + + +
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Video +
+ + + 1,411,240 movies + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (movies pick) +
+
+ filmcollectief-00-060a
+ + Unknown movie, found in a cannister which should contain something elso. So if someone can help me... +
+
+ +

Recent Review

+
+
+ The Stars Look Down (1940)
Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by band) +
+
+ Live Music +
+ + + 121,538 concerts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (etree pick) +
+
+ Grateful Dead Live at Jai-Alai Fronton on...
+ + Set 1 Ramble On Rose Black Throated Wind Mississippi Half-Step Uptown Toodeloo Beat It On Down The... +
+
+ +

Recent Review

+ + +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Audio +
+ + + 1,744,979 recordings + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (audio pick) +
+
+ Various Artists - phase01 [hi001]
+ + Our first release! This compilation includes all the original Heavy Industries collaborators... +
+
+ +

Recent Review

+
+
+ IAA Top 40 Countdown
Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Texts +
+ + + 5,325,972 texts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (texts pick) +
+
+ Sommaire du plaidoyé. Pour les abbé, prieur,...
+ + 7 p. ; in-2 Cote du document : FOL Z 588 INV 518 FA (P.29) +
+
+ +

Recent Review

+ + +
+
+
+ + + + + + + +
+
+

Most recent posts (write a post by going to a forum) more...

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SubjectPosterForumRepliesDate
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
+ Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
+ Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
+ band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
+ Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
+ Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
+ Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
+
+
+
+ + + + + + + +

+

+ Terms of Use (10 Mar 2001) +

+ + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:10Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:DRAV5TKA4765LYFANCFHVNKEWGLRKUMM +Content-Length: 171 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/index.php +WARC-Date: 2013-10-21T21:53:10Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:YXATLZCFORQS33ZVB3M3SMJY3S2Z6QUD +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:11 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:11Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:D53DT5RU7NGDFBHOJOKLF56UG32P7AYF +Content-Length: 167 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:11Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RS3Z4Z3NZ6BS6ANPCRKWA43E5O5YPVG6 +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:12Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:HRBVH5XQCN2OWGMQ7THZ675AZ4L4SEWV +Content-Length: 158 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:12Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:24OHCKJGVHH4GDPS65MSGZAS2FWN6U44 +WARC-Payload-Digest: sha1:7DW5UIXJ5NGLWNQ5WYE7AB4E5L74X275 +Content-Type: application/http;msgtype=response +Content-Length: 30679 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:13 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +76fb + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
+ Universal Access To All Knowledge
+ + + + + + + + + + + + + +
+ Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
+ + + +
+ + + + + + + + + + + + + + + + + +
+
+ Search: + + + + + + + + Advanced Search +
+
+ + Anonymous User + + (login + or + + join us) + + +
Upload
+
+ +
+ + + + +
+ + + +
+
+

+
+ 361 billion pages +
+ Web +

+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + more info +
+
+
+
+ + +
+

+
+ See recent additions in RSS +
+ Welcome to the Archive +

+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+ + + +
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Video +
+ + + 1,411,240 movies + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (movies pick) +
+
+ Baby nursery (reel 5)
+ + Description: Amateur movie of the baby nursery at the Peoples Temple Agricultural Mission in... +
+
+ +

Recent Review

+
+
+ The Stars Look Down (1940)
Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by band) +
+
+ Live Music +
+ + + 121,538 concerts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (etree pick) +
+
+ Grateful Dead Live at Uptown Theater on 1979-12-05
+ + Alabama Getaway-> Greatest Story Ever Told, Dire Wolf, Me & My Uncle-> Big River, Cold Rain & Snow,... +
+
+ +

Recent Review

+ + +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Audio +
+ + + 1,744,979 recordings + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (audio pick) +
+
+ [Miga_v16] "Nice summer"
+ + extra video for audio-release [Miga32] Rominger "Music for camping" [Miga_v16] "Nice summer" video:... +
+
+ +

Recent Review

+
+
+ IAA Top 40 Countdown
Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Texts +
+ + + 5,325,972 texts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (texts pick) +
+
+ Outlines of European history
+ + pt. 1. Earliest man...the Orient, Greece, and Rome; Europe from the break-up of the Roman Empire to... +
+
+ +

Recent Review

+ + +
+
+
+ + + + + + + +
+
+

Most recent posts (write a post by going to a forum) more...

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SubjectPosterForumRepliesDate
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
+ Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
+ Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
+ band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
+ Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
+ Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
+ Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
+
+
+
+ + + + + + + +

+

+ Terms of Use (10 Mar 2001) +

+ + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:6PZOFZFFZRY7XJOJ2325DNXHG7LEP3G6 +Content-Length: 178 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:JN3EE5W7CY5PSNTEJ7A6ORMLNNMNWS3J +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 441 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/logoc.jpg +Expires: Tue, 22 Oct 2013 03:53:14 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:Q6EXPKA6ECDPIEX3MXCWAH2S4JEO4ZHI +Content-Length: 174 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:6ESWUQAIQPTXYPDSKA2NGLDTHEFS6FLK +WARC-Payload-Digest: sha1:UZY6ND6CCHXETFVJD2MSS7ZENMWF7KQ2 +Content-Type: application/http;msgtype=response +Content-Length: 1951 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: image/jpeg +Content-Length: 1662 +Last-Modified: Wed, 13 Feb 2013 16:33:25 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:14 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<) +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I7TKK5MVPSOGRVZYP6L37NTE35F25HJQ +Content-Length: 190 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:H5UU46OLZY33AQRBCM7R4BKJBMMAPKHB +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 453 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/go-button-gateway.gif +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RDRO3REQIV4EDZDNVASSXC6W72SXUQSP +Content-Length: 186 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:OV6P7Y4LCKQ6R7B5EWRGUHFYUGNN2NV7 +WARC-Payload-Digest: sha1:72MRTMYOLSPFXGOTSETEJKAANDRDIE5O +Content-Type: application/http;msgtype=response +Content-Length: 1412 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: image/gif +Content-Length: 1124 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:15 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +GIF89aXWSݡQ͗KʔJYWמOZ_ozăƄŊERNjFŒďӋE~?WvݑH͆Cj׆CۉEu;r9L컓Xߔ]}yw@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C; + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I5U6LAWZPGDDZOSTJEHZT2BWCOPFKDLV +Content-Length: 177 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:5CUEYG4YEO3H5SKHN4UGZDLKCDXJTP2W +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 440 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/star.png +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:5OLRWKI5GCDS6JF4CCKLWJM23GJZBQOQ +Content-Length: 173 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:LUMN34VHUXETNH36JOWNHIBNR4DOO2I5 +WARC-Payload-Digest: sha1:CECJCMQ6SXDRBZX5COV7RTTQTHTY653H +Content-Type: application/http;msgtype=response +Content-Length: 1304 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: image/png +Content-Length: 1016 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:16 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +PNG + + IHDRagAMA asRGB cHRMz&u0`:pQ<bKGDC pHYs   vpAg\ƭIDAT8˕yHAzo]4BJۥ\)[eJ̲RH5,(%K4R+s. ="йNMWZ#3ٌQ}s"~GS.DN x.5B v~cQU2\(~_5s8jW)-a]|@['Ի[{^jOO%fCy $8f.?Z(&%мǮLT3Rv ;nlCy)E/ Һ`PTeru8|$>_@?J'"ڱQm~@&łǠOR 4Ųn]dBŶORPb%tEXtdate:create2012-03-28T02:07:14+00:005%tEXtdate:modify2012-03-25T15:54:33+00:00oIENDB` + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CTALP42WLFIFYU44MXGJNNLYA45BUQVG +Content-Length: 189 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:A24EOC2MZA4SHKQTCFE5RWLN3EG3WSO2 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 452 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/services/collection-rss.php +Expires: Tue, 22 Oct 2013 03:53:16 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:VD75DH7UF5EHTROCENIJAVOE6HWRMYAL +Content-Length: 185 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:GVOLA26JHJVANCRZ545PTRNG6HMT7PWW +WARC-Payload-Digest: sha1:CBRMZGMT7IQRUCDW23ABAL6RN7H6MGIE +Content-Type: application/http;msgtype=response +Content-Length: 78007 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/xml;charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +7fa0 + + + + https://archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + info@archive.org (Info Box) + Mon, 21 Oct 2013 21:48:07 GMT + + https://archive.org/images/glogo.png + Internet Archive + https://archive.org + + + gov.uscourts.mnd.126519 + gov.uscourts.mnd.126519 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.mnd.126519&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.mnd.126519 + https://archive.org/details/gov.uscourts.mnd.126519 + Mon, 21 Oct 2013 19:50:15 GMT + texts/usfederalcourts + + 03337F0F2C418DC4A098F37A8F17A528536B75A2 + + + gov.uscourts.ded.45655 + gov.uscourts.ded.45655 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ded.45655&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Image Container PDF, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.ded.45655 + https://archive.org/details/gov.uscourts.ded.45655 + Mon, 21 Oct 2013 18:08:34 GMT + texts/usfederalcourts + + 3E074A11E37C54C6725043593A22AAFC4A71EAB8 + + + PC Longplay 319 Spec Ops The Line + PC Longplay 319 Spec Ops The Line + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=PC_Longplay_319_Spec_Ops_The_Line&mediatype=movies&collection=opensource_movies"/><p>This game was a fairly decent shooter. Can't say I would play it again but it had a worth while story to play through with the ability to choose your own destiny..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Matroska, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + Mon, 21 Oct 2013 15:19:16 GMT + movies/opensource_movies + + + + + 38F8748912CF1483DA9505B3D41C65D76990B4A4 + + + gov.uscourts.ohsd.166725 + gov.uscourts.ohsd.166725 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ohsd.166725&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata</p> + https://archive.org/details/gov.uscourts.ohsd.166725 + https://archive.org/details/gov.uscourts.ohsd.166725 + Mon, 21 Oct 2013 14:52:49 GMT + texts/usfederalcourts + + 1CFB2CEA490A23F960F63FE4B2996FC5073A752B + + + George Griffin Pt 5 + George Griffin Pt 5 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-315234-georgegriffinpt5&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-315234-georgegriffinpt5 + https://archive.org/details/scm-315234-georgegriffinpt5 + Mon, 21 Oct 2013 08:32:22 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/SeattleCommunityMedia + + + + + + + History + + A4CAB132D3017E520D5D2BCFC40AE162C8FFEA5A + + + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=3dbo_nnassil_el3asr&mediatype=audio&collection=opensource_audio"/><p>No description available.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/3dbo_nnassil_el3asr + https://archive.org/details/3dbo_nnassil_el3asr + Mon, 21 Oct 2013 07:55:35 GMT + audio/opensource_audio + + + + 245CEC3E026E543DB855113D5DA639411258594F + + + gov.uscourts.dcd.153973 + gov.uscourts.dcd.153973 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.dcd.153973&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.dcd.153973 + https://archive.org/details/gov.uscourts.dcd.153973 + Mon, 21 Oct 2013 06:45:41 GMT + texts/usfederalcourts + + 5C897BB4B02ADE078AE7C399EA38897EEC76C265 + + + فلم 19 + فلم 19 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=MezaaGe_234F067D-&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG4, Metadata, Ogg Video, Thumbnail</p> + https://archive.org/details/MezaaGe_234F067D- + https://archive.org/details/MezaaGe_234F067D- + Mon, 21 Oct 2013 05:36:21 GMT + movies/opensource_movies + + + + + EE2E9BF9883DC1DECD99A039F9B2CCB2EB87C56F + + + Katsaus Journal + Katsaus Journal + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Katsaus_Journal&mediatype=movies&collection=opensource_movies"/><p>Finnish continuation war newsreel.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/Katsaus_Journal + https://archive.org/details/Katsaus_Journal + Mon, 21 Oct 2013 05:19:10 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + movies/opensource_movies + + + + + + + 1941-1945 + + 71801FCE2832C222C013CF73A06054D012F17025 + + + AwPT - SHADE + AwPT - SHADE + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368707-awpt-shade&mediatype=movies&collection=SeattleCommunityMedia"/><p>  The first 57 minutes of a great  new 1:33:30 minute film  - available here: http://12160.info/video/shade-the-motion-picture-full-video-documentary More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368707-awpt-shade + https://archive.org/details/scm-368707-awpt-shade + Mon, 21 Oct 2013 04:46:41 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Documentary + + 96BAB900271137D5B85E4CD48744C6BF43F2FAC5 + + + When Trouble Comes My Way - Part 1 + When Trouble Comes My Way - Part 1 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WhenTroubleComesMyWay-Part1_289&mediatype=movies&collection=opensource_movies"/><p>Sermon delivered by Pastor David Vos at Lake Palms Community Church, 380 Fulton Drive SE, Largo, FL 33771 on October 20, 2013..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + Mon, 21 Oct 2013 04:34:48 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/opensource_movies + + + + Pastor David Vos, Sermon, Lake Palms Community Church + + 02D291F021ED97C7B8AAC171695C5488C6E8B740 + + + Appreciation + Appreciation + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368703-appreciation&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368703-appreciation + https://archive.org/details/scm-368703-appreciation + Mon, 21 Oct 2013 04:17:17 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Self improvement + + F9CA049C2858C148EE3A81011A48AC5248708D4B + + + دروس عامة للشيخ مصطفى العدوي + دروس عامة للشيخ مصطفى العدوي + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=4-islamic-1151&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Ogg Vorbis, Thumbnail, VBR MP3, Windows Media, h.264</p> + https://archive.org/details/4-islamic-1151 + https://archive.org/details/4-islamic-1151 + Mon, 21 Oct 2013 04:15:51 GMT + movies/opensource_movies + + + + + + + + C9F0489AC735484AE388B39D1E509B5FC0AE2E9F + + + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=cw2003-09-15.flac16&mediatype=etree&collection=ChrisWhitley"/><p>Chris Whitley Hanbury Ballroom, Brighton 15 September 2003 Bandridge BMC530 stereo condenser mic > Sony MZ-R91 MD > Philips CDR-760 > EAC >FLAC disc one: 01. new lost world 02. to joy 03. crystal ship 04....</p><p>This item belongs to: etree/ChrisWhitley.</p><p>This item has files of the following types: Archive BitTorrent, Checksums, Flac, Flac FingerPrint, Metadata, Ogg Vorbis, Text, VBR MP3</p> + https://archive.org/details/cw2003-09-15.flac16 + https://archive.org/details/cw2003-09-15.flac16 + Mon, 21 Oct 2013 04:12:23 GMT + etree/ChrisWhitley + + + + 0232B264B2349F7D97B6FD4656D8A42342918DC3 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418097-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418097-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67A8F0DB3FDC8602264446066F98925EBB5DDC46 + + + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418098-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418098-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 344D9552AE1A4B7655275C3556123B8471722045 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418099-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418099-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:01:56 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 68CE81DE137E079A4566C4FCF3ED4C5A8EC82082 + + + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikisource-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikisource that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikisource-20131019 + https://archive.org/details/incr-svwikisource-20131019 + Mon, 21 Oct 2013 04:01:54 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikisource, Swedish, Wikisource + + D5AAE1198D3729DF6C3D4FC61130F966863BA389 + + + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswiktionary-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wiktionary that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswiktionary-20131020 + https://archive.org/details/incr-eswiktionary-20131020 + Mon, 21 Oct 2013 04:01:49 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswiktionary, Spanish, Wiktionary + + 333665884E6DA297C78B0A10A7B9A729210D8DF4 + + + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418100-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418100-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:45 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + D6D73BBC1BFD646AF32AF7EC187565304C7FCA89 + + + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + <img width="160" style="padding-rig +8000 +ht:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516&mediatype=image&collection=coverartarchive"/><p>No description available.</p><p>This item belongs to: image/coverartarchive.</p><p>This item has files of the following types: Archive BitTorrent, JPEG, JPEG 250px Thumb, JPEG 500px Thumb, JPEG Thumb, JSON, Metadata, Metadata Log, MusicBrainz Metadata</p> + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + Mon, 21 Oct 2013 04:01:38 GMT + image/coverartarchive + + 37636FB9D3094A1F605BB78DB6B786F8ABB29BCC + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418101-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418101-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:34 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 258C467633F6C486084321E1E9542D3D1478A539 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418102-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418102-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 8D8869D528E2E6622287400A580DC388A58CAEF4 + + + alexa20131017-24 + alexa20131017-24 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=alexa20131017-24&mediatype=web&collection=alexacrawls"/><p>Alexa crawl.</p><p>This item belongs to: web/alexacrawls.</p><p>This item has files of the following types: Metadata</p> + https://archive.org/details/alexa20131017-24 + https://archive.org/details/alexa20131017-24 + Mon, 21 Oct 2013 04:01:09 GMT + web/alexacrawls + crawldata + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418103-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418103-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:06 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F9687CF40B8065FB8AF30042BFBB2ED3F62ED6F4 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418104-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418104-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:53 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + CDCB99998DB6C9E281F90B69C7315F3192C9D8D9 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418105-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418105-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:39 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F85C5F1543BCB96FF4CE64F33C704095934452AC + + + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikivoyage-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikivoyage that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikivoyage-20131020 + https://archive.org/details/incr-eswikivoyage-20131020 + Mon, 21 Oct 2013 04:00:33 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikivoyage, Spanish, Wikivoyage + + F2E6A9CCC64109E4A1F1548F23A5AC55AE317E84 + + + Eso No 16 10 13 Nota Gari + Eso No 16 10 13 Nota Gari + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=EsoNo161013NotaGari&mediatype=audio&collection=opensource_audio"/><p>Programa Eso no!! nota con Gary de Mr. White!!!.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/EsoNo161013NotaGari + https://archive.org/details/EsoNo161013NotaGari + Mon, 21 Oct 2013 04:00:32 GMT + audio/opensource_audio + + + "audios eso no" + + 97ACF3820CD8DEE4DBE7AD0F46AA00EAE8F735BB + + + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikiquote-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikiquote that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikiquote-20131019 + https://archive.org/details/incr-svwikiquote-20131019 + Mon, 21 Oct 2013 04:00:29 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikiquote, Swedish, Wikiquote + + 43439D621A3C1BB74D39948590A9193B2D165D4B + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418106-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418106-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:25 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 4A0A6C8BAECB9A545B3BDC40465ABB74442319A0 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418107-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418107-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:13 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F76FD396A322CC62A4197DE088CA6DCF833558A7 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418108-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418108-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + E2E238B9DF57CC47B4982E375A0F89E75CA9EAC2 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418109-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418109-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:47 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 78B49781C5AADB75726B576A00313D2C145074AE + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418110-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418110-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:35 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 221C3ED39EA1046DA1442C242B2EDF35114991FE + + + Silo 2.2 + Silo 2.2 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Silo2.2_201310&mediatype=texts&collection=opensource_media"/><p>Unbiased reviews of the Arc'teryx Silo 50 winter pack by real people. Silo 2.2 is now available If you have not already updated, you can follow the link below to download the latest version, which is a free upgrade for all Silo 2 owners....</p><p>This item belongs to: texts/opensource_media.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Windows Executable</p> + https://archive.org/details/Silo2.2_201310 + https://archive.org/details/Silo2.2_201310 + Mon, 21 Oct 2013 03:59:34 GMT + texts/opensource_media + Silo 2.2 + + 2E63B46A804C6EDF3A3200D0202E1DE357B2FF12 + + + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder&mediatype=movies&collection=opensource_movies"/><p>Season 3 Episode 17.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + Mon, 21 Oct 2013 03:59:26 GMT + movies/opensource_movies + + + + animation + + 7C9CAF48A8554A9E68AB6F7B25FD2EC2A93C8632 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418111-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418111-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:21 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 0E9E3AAD85082031C2525F7A9568048F8B1E8E7E + + + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikiversity-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikiversity that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikiversity-20131020 + https://archive.org/details/incr-eswikiversity-20131020 + Mon, 21 Oct 2013 03:59:17 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikiversity, Spanish, Wikiversity + + AD372EA58CF3006D8471BA41026C62BD1861078A + + + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418112-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418112-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 012A94D6BD2802A88F233C09E8C70437682D2131 + + + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikinews-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikinews that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikinews-20131019 + https://archive.org/details/incr-svwikinews-20131019 + Mon, 21 Oct 2013 03:59:05 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikinews, Swedish, Wikinews + + E501281C8B708DE164F23964725A87773F582478 + + + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WIDE-20131020220354-crawl422&mediatype=web&collection=wide00009"/><p>Internet Archive crawldata from Webwide Crawl, captured by crawl422.us.archive.org:wide from Sun Oct 20 22:03:54 PDT 2013 to Sun Oct 20 16:43:21 PDT 2013..</p><p>This item belongs to: web/wide00009.</p><p>This item has files of the following types: Item CDX Index, Item CDX Meta-Index, Metadata, Text, WARC CDX Index, Web ARChive GZ</p> + https://archive.org/details/WIDE-20131020220354-crawl422 + https://archive.org/details/WIDE-20131020220354-crawl422 + Mon, 21 Oct 2013 03:59:01 GMT + web/wide00009 + crawldata + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + <img width="160" style="padding-right:3px;floa +302d +t:left;" src="https://archive.org/services/get-item-image.php?identifier=418113-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418113-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:54 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 96238A946E099625F0281BEEF4A336EF1A1C447F + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418114-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418114-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:41 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67E3C7FAF4AC13E3E8AD70AEBFDBDED581CE4BA0 + + + gov.uscourts.flmd.283032 + gov.uscourts.flmd.283032 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.flmd.283032&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.flmd.283032 + https://archive.org/details/gov.uscourts.flmd.283032 + Mon, 21 Oct 2013 03:58:35 GMT + texts/usfederalcourts + + 6252A903C01875100635D5011543196896010583 + + + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418115-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418115-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:26 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 98F1A74C04D215AC2175D27B24C73C24E1F77A43 + + + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418116-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418116-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:12 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 1A93DFBE2B84324B987F7C3EF91FDC3299E5BB2A + + + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikisource-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikisource that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikisource-20131020 + https://archive.org/details/incr-eswikisource-20131020 + Mon, 21 Oct 2013 03:58:01 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikisource, Spanish, Wikisource + + BF69AA565071874D71ABB5D3D4FECFA85448C261 + + + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418117-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418117-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 6B645B4FBAD190864C10A471F96FA25AA67B88EE + + + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__&mediatype=texts&collection=opensource"/><p>Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 Romanian Book. Aceasta carte face parte din Colectia 10.000 de carti. Puteti downloada aici un fisier zip cu intreaga colectie 10.000 de carti (2 GB) sau puteti alege doar cartea dorita....</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF, Word Document</p> + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + Mon, 21 Oct 2013 03:57:59 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + texts/opensource + 10000 carti, Alain Le Bussy, Fata Care Se Temea De Apa 0.9 07, carti, carte, online, pdf, download, romana, carti in limba romana, romania, romanian, carti pdf, Books in Romanian language, ro-books, kjb, ftp.kjb.ro, 10000, 10000 carti + + 6F89BFC6BE691DC13FE417E2B09D81BE90DA24EA + + + + +0 + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/MANIFEST.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:MDYPVAS3DVOGNNDRGXW6RHSIQJHDUL6H +Content-Type: text/plain +Content-Length: 48 + + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget_arguments.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:4RRGL67PFUKCW27T2JYSPL7U2WU3UVAP +Content-Type: text/plain +Content-Length: 54 + +"-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget.log +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:WTDCWU2CDUXK2GWFEVX7NWJNCTC25MVC +Content-Type: text/plain +Content-Length: 5647 + +Opening WARC file ‘IAH-urls-wget.warc.gz’. + +urls.txt: Invalid URL dns:www.archive.org: Unsupported scheme ‘dns’ +--2013-10-21 22:53:06-- http://www.archive.org/robots.txt +Resolving www.archive.org... 207.241.224.2 +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/robots.txt [following] + + 0K 100% 9.03M=0s + +--2013-10-21 22:53:06-- http://archive.org/robots.txt +Resolving archive.org... 207.241.224.2 +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 727 [text/plain] +Saving to: ‘STDOUT’ + + 0K 100% 826K=0.001s + +2013-10-21 22:53:07 (826 KB/s) - written to stdout [727/727] + +--2013-10-21 22:53:07-- http://www.archive.org/ +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 19.2M=0s + +--2013-10-21 22:53:07-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:08-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 9.59M=0.003s + +2013-10-21 22:53:10 (9.59 MB/s) - written to stdout [30550] + +--2013-10-21 22:53:10-- http://www.archive.org/index.php +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 10.2M=0s + +--2013-10-21 22:53:11-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:11-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 96.9K=0.3s + +2013-10-21 22:53:14 (96.9 KB/s) - written to stdout [30459] + +--2013-10-21 22:53:14-- http://www.archive.org/images/logoc.jpg +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/logoc.jpg [following] + + 0K 100% 15.4M=0s + +--2013-10-21 22:53:14-- http://archive.org/images/logoc.jpg +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1662 (1.6K) [image/jpeg] +Saving to: ‘STDOUT’ + + 0K . 100% 122M=0s + +2013-10-21 22:53:14 (122 MB/s) - written to stdout [1662/1662] + +--2013-10-21 22:53:14-- http://www.archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/go-button-gateway.gif [following] + + 0K 100% 11.0M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1124 (1.1K) [image/gif] +Saving to: ‘STDOUT’ + + 0K . 100% 97.4M=0s + +2013-10-21 22:53:15 (97.4 MB/s) - written to stdout [1124/1124] + +--2013-10-21 22:53:15-- http://www.archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/star.png [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1016 [image/png] +Saving to: ‘STDOUT’ + + 0K 100% 74.5M=0s + +2013-10-21 22:53:16 (74.5 MB/s) - written to stdout [1016/1016] + +--2013-10-21 22:53:16-- http://www.archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/services/collection-rss.php [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:16-- http://archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/xml] +Saving to: ‘STDOUT’ + + 0K .......... .......... .......... .......... .......... 54.2K + 50K .......... .......... ..... 92.7K=1.2s + +2013-10-21 22:53:18 (63.2 KB/s) - written to stdout [77773] + +FINISHED --2013-10-21 22:53:18-- +Total wall clock time: 12s +Downloaded: 7 files, 140K in 1.5s (92.5 KB/s) + +