From 7a1a39b39b7e2dd886a5bfa15d5b65f2c05f5e18 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Tue, 23 Apr 2013 11:44:04 +0200 Subject: [PATCH 01/86] Added a suitable travis-ci config file. --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..52ea3bf1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: java +jdk: + - oraclejdk7 +before_script: + - "echo $JAVA_OPTS" + - "export JAVA_OPTS=-Xmx1024m" + - "echo $JAVA_OPTS" + - "ulimit -a" + - "ulimit -u 2048" + - "ulimit -a" From 299281f5ec54b007ea61b74edd690bd05db6399d Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 09:18:00 +0100 Subject: [PATCH 02/86] Initial build under org.netpreserve Renamed the project, resolved some dependencies which were only held at IA. This did involve updating the code very slightly, as one of the dependant classes had moved package. --- LICENSE | 191 ++++++++++++++++++ README.md | 4 + pom.xml | 43 ++-- .../org/archive/url/UsableURIFactory.java | 2 +- 4 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..37ec93a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..b70f8318 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +OpenWayback Web Commons +======================= + +This repository contains common utility code for the OpenWayback project. diff --git a/pom.xml b/pom.xml index cbdb154b..c3a7a358 100644 --- a/pom.xml +++ b/pom.xml @@ -2,13 +2,20 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.archive - ia-web-commons - 1.0-SNAPSHOT + + org.sonatype.oss + oss-parent + 7 + + + + org.netpreserve.commons + web-commons + 1.0.0-SNAPSHOT jar - ia-web-commons - http://maven.apache.org + iipc-web-commons + https://github.com/iipc/iipc-web-commons UTF-8 @@ -42,7 +49,7 @@ - org.mozilla + com.googlecode.juniversalchardet juniversalchardet 1.0.3 @@ -115,8 +122,8 @@ it.unimi.dsi - mg4j - 1.0.1 + dsiutils + 2.0.12 compile @@ -139,7 +146,7 @@ jar-with-dependencies - ia-web-commons + iipc-web-commons @@ -160,24 +167,6 @@ - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - cloudera Cloudera Hadoop diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 46b8e119..9118b850 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,7 +20,7 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.mg4j.util.MutableString; +import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; From 7f7054287b86650adb6955f43a38311e913152f9 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 13:08:36 +0100 Subject: [PATCH 03/86] Copied PublicSuffixes code into web-commons from heritrix-commons. --- .../java/org/archive/url/PublicSuffixes.java | 363 ++ src/main/resources/effective_tld_names.dat | 5229 +++++++++++++++++ .../org/archive/url/PublicSuffixesTest.java | 193 + 3 files changed, 5785 insertions(+) create mode 100644 src/main/java/org/archive/url/PublicSuffixes.java create mode 100644 src/main/resources/effective_tld_names.dat create mode 100644 src/test/java/org/archive/url/PublicSuffixesTest.java diff --git a/src/main/java/org/archive/url/PublicSuffixes.java b/src/main/java/org/archive/url/PublicSuffixes.java new file mode 100644 index 00000000..7c3df6b8 --- /dev/null +++ b/src/main/java/org/archive/url/PublicSuffixes.java @@ -0,0 +1,363 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.url; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.IOUtils; +import org.archive.util.TextUtils; + +/** + * Utility class for making use of the information about 'public suffixes' at + * http://publicsuffix.org. + * + * The public suffix list (once known as 'effective TLDs') was motivated by the + * need to decide on which broader domains a subdomain was allowed to set + * cookies. For example, a server at 'www.example.com' can set cookies for + * 'www.example.com' or 'example.com' but not 'com'. 'www.example.co.uk' can set + * cookies for 'www.example.co.uk' or 'example.co.uk' but not 'co.uk' or 'uk'. + * The number of rules for all top-level-domains and 2nd- or 3rd- level domains + * has become quite long; essentially the broadest domain a subdomain may assign + * to is the one that was sold/registered to a specific name registrant. + * + * This concept should be useful in other contexts, too. Grouping URIs (or + * queues of URIs to crawl) together with others sharing the same registered + * suffix may be useful for applying the same rules to all, such as assigning + * them to the same queue or crawler in a multi- machine setup. + * + * As of Heritrix3, we prefer the term 'Assignment Level Domain' (ALD) + * for such domains, by analogy to 'Top Level Domain' (TLD) or '2nd Level + * Domain' (2LD), etc. + * + * @author Gojomo + * + * this version of PublicSuffixes uses suffix-tree data structure for generating less + * redundant regular expression. It may be even possible to write a light-weight, + * thread-safe matcher based on this class. + * @author Kenji Nagahashi + */ +public class PublicSuffixes { + protected static Pattern topmostAssignedSurtPrefixPattern; + protected static String topmostAssignedSurtPrefixRegex; + + /** + * prefix tree node. each Node represents sequence of letters (prefix) + * and alternative sequences following it (list of Node's). Nodes in + * {@code branches} are sorted for skip list like lookup and for generating + * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char).) + * + * as is intended for internal use only, there's no access methods. procedures for updating + * prefix tree with new input are defined within this class ({@link #addBranch(CharSequence)}). + * + * terminal node could be represented in two different form: 1) Node with zero branches, + * or 2) Node with zero-length {@code cs}. So, root node must be initialized with empty (not null) + * {@code branches} unless empty string matches the overall pattern. + * {@code cs} must not be null except for root node. + */ + public static class Node implements Comparable { + protected CharSequence cs; + protected List branches; + public Node() { + this("", null); + } + protected Node(CharSequence cs) { + this(cs, null); + } + protected Node(CharSequence cs, List branches) { + this.cs = cs; + this.branches = branches; + } + public void addBranch(CharSequence s) { + if (branches == null) { + branches = new ArrayList(); + branches.add(new Node("", null)); + } + for (int i = 0; i < branches.size(); i++) { + Node alt = branches.get(i); + if (alt.add(s)) return; + if (alt.compareTo(s.charAt(0)) > 0) { + Node alt1 = new Node(s, null); + branches.add(i, alt1); + return; + } + } + Node alt2 = new Node(s, null); + branches.add(alt2); + } + public boolean add(CharSequence s) { + int l = Math.min(s.length(), cs.length()); + int i = 0; + while (i < l && s.charAt(i) == cs.charAt(i)) + i++; + // zero-length match holds only when both cs and s are empty. + if (i == 0) return cs.length() == 0 && s.length() == 0; + if (i < cs.length()) { + CharSequence cs0 = cs.subSequence(0, i); + CharSequence cs1 = cs.subSequence(i, cs.length()); + CharSequence cs2 = s.subSequence(i, s.length()); + cs = cs0; + Node alt1 = new Node(cs1, branches); + (branches = new ArrayList()).add(alt1); + addBranch(cs2); + } else { + assert i == cs.length(); + addBranch(s.subSequence(i, s.length())); + } + return true; + } + public int compareTo(Node other) { + if (other.cs == null || other.cs.length() == 0) + return (cs == null || cs.length() == 0) ? 0 : -1; + return compareTo(other.cs.charAt(0)); + } + public int compareTo(char oc) { + if (cs == null || cs.length() == 0) return 1; + // '!' and '*' must come after ordinary letters, in this order, for regexp + // to work as intended. + char c = cs.charAt(0); + if (c == oc) return 0; + if (c == '!') return oc == '*' ? -1 : 1; + if (c == '*') return 1; + if (oc == '*' || oc == '!') return -1; + return Character.valueOf(c).compareTo(oc); + // for generating the same regexp as previous version. + //return Character.valueOf(oc).compareTo(c); + } + } + + /** + * Utility method for dumping a regex String, based on a published public + * suffix list, which matches any SURT-form hostname up through the broadest + * 'private' (assigned/sold) domain-segment. That is, for any of the + * SURT-form hostnames... + * + * com,example, com,example,www, com,example,california,www + * + * ...the regex will match 'com,example,'. + * + * @param args + * @throws IOException + */ + public static void main(String args[]) throws IOException { + InputStream is; + if (args.length == 0 || "=".equals(args[0])) { + // use bundled list + is = PublicSuffixes.class.getClassLoader().getResourceAsStream( + "effective_tld_names.dat"); + } else { + is = new FileInputStream(args[0]); + } + BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + String regex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(is); + + boolean needsClose = false; + BufferedWriter writer; + if (args.length >= 2) { + // write to specified file + writer = new BufferedWriter(new FileWriter(args[1])); + needsClose = true; + } else { + // write to stdout + writer = new BufferedWriter(new OutputStreamWriter(System.out)); + } + writer.append(regex); + writer.flush(); + if (needsClose) { + writer.close(); + } + } + /** + * Reads a file of the format promulgated by publicsuffix.org, ignoring + * comments and '!' exceptions/notations, converting domain segments to + * SURT-ordering. Leaves glob-style '*' wildcarding in place. Returns root + * node of SURT-ordered prefix tree. + * + * @param reader + * @return root of prefix tree node. + * @throws IOException + */ + protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws IOException { + // initializing with empty Alt list prevents empty pattern from being + // created for the first addBranch() + Node alt = new Node(null, new ArrayList()); + String line; + while ((line = reader.readLine()) != null) { + // discard whitespace, empty lines, comments, exceptions + line = line.trim(); + if (line.length() == 0 || line.startsWith("//")) continue; + // discard utf8 notation after entry + line = line.split("\\s+")[0]; + // TODO: maybe we don't need to create lower-cased String + line = line.toLowerCase(); + // SURT-order domain segments + String[] segs = line.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = segs.length - 1; i >= 0; i--) { + if (segs[i].length() == 0) continue; + sb.append(segs[i]).append(','); + } + alt.addBranch(sb.toString()); + } + return alt; + } + /** + * utility function for dumping prefix tree structure. intended for debug use. + * @param alt root of prefix tree. + * @param lv indent level. 0 for root (no indent). + * @param out writer to send output to. + */ + public static void dump(Node alt, int lv, PrintWriter out) { + for (int i = 0; i < lv; i++) + out.print(" "); + out.println(alt.cs != null ? ('"'+alt.cs.toString()+'"') : "(null)"); + if (alt.branches != null) { + for (Node br : alt.branches) { + dump(br, lv + 1, out); + } + } + } + /** + * bulids regular expression from prefix-tree {@code alt} into buffer {@code sb}. + * @param alt prefix tree root. + * @param sb StringBuffer to store regular expression. + */ + protected static void buildRegex(Node alt, StringBuilder sb) { + String close = null; + if (alt.cs != null) { + // actually '!' always be the first character, because it is + // always used along with '*'. + for (int i = 0; i < alt.cs.length(); i++) { + char c = alt.cs.charAt(i); + if (c == '!') { + if (close != null) + throw new RuntimeException("more than one '!'"); + sb.append("(?="); + close = ")"; + } else if (c == '*') { + sb.append("[-\\w]+"); + } else { + sb.append(c); + } + } + } + if (alt.branches != null) { + // alt.branches.size() should always be > 1 + if (alt.branches.size() > 1) { + sb.append("(?:"); + } + String sep = ""; + for (Node alt1 : alt.branches) { + sb.append(sep); sep = "|"; + buildRegex(alt1, sb); + } + if (alt.branches.size() > 1) { + sb.append(")"); + } + } + if (close != null) + sb.append(close); + } + + /** + * Converts SURT-ordered list of public prefixes into a Java regex which + * matches the public-portion "plus one" segment, giving the domain on which + * cookies can be set or other policy grouping should occur. Also adds to + * regex a fallback matcher that for any new/unknown TLDs assumes the + * second-level domain is assignable. (Eg: 'zzz,example,'). + * + * @param list + * @return + */ + private static String surtPrefixRegexFromTrie(Node trie) { + StringBuilder regex = new StringBuilder(); + regex.append("(?ix)^\n"); + trie.addBranch("*,"); // for new/unknown TLDs + buildRegex(trie, regex); + regex.append("\n([-\\w]+,)"); + return regex.toString(); + } + + public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { + if (topmostAssignedSurtPrefixPattern == null) { + topmostAssignedSurtPrefixPattern = Pattern + .compile(getTopmostAssignedSurtPrefixRegex()); + } + return topmostAssignedSurtPrefixPattern; + } + + public static synchronized String getTopmostAssignedSurtPrefixRegex() { + if (topmostAssignedSurtPrefixRegex == null) { + // use bundled list + try { + BufferedReader reader = new BufferedReader(new InputStreamReader( + PublicSuffixes.class.getClassLoader().getResourceAsStream( + "effective_tld_names.dat"), "UTF-8")); + topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(reader); + } catch (UnsupportedEncodingException ex) { + // should never happen + throw new RuntimeException(ex); + } + } + return topmostAssignedSurtPrefixRegex; + } + + public static String getTopmostAssignedSurtPrefixRegex(BufferedReader reader) { + try { + Node trie = readPublishedFileToSurtTrie(reader); + return surtPrefixRegexFromTrie(trie); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Truncate SURT to its topmost assigned domain segment; that is, + * the public suffix plus one segment, but as a SURT-ordered prefix. + * + * if the pattern doesn't match, the passed-in SURT is returned. + * + * @param surt SURT to truncate + * @return truncated-to-topmost-assigned SURT prefix + */ + public static String reduceSurtToAssignmentLevel(String surt) { + Matcher matcher = TextUtils.getMatcher( + getTopmostAssignedSurtPrefixRegex(), surt); + if (matcher.find()) { + surt = matcher.group(); + } + TextUtils.recycleMatcher(matcher); + return surt; + } +} diff --git a/src/main/resources/effective_tld_names.dat b/src/main/resources/effective_tld_names.dat new file mode 100644 index 00000000..2c201312 --- /dev/null +++ b/src/main/resources/effective_tld_names.dat @@ -0,0 +1,5229 @@ +// ***** BEGIN LICENSE BLOCK ***** +// Version: MPL 1.1/GPL 2.0/LGPL 2.1 +// +// The contents of this file are subject to the Mozilla Public License Version +// 1.1 (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// http://www.mozilla.org/MPL/ +// +// Software distributed under the License is distributed on an "AS IS" basis, +// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +// for the specific language governing rights and limitations under the +// License. +// +// The Original Code is the Public Suffix List. +// +// The Initial Developer of the Original Code is +// Jo Hermans . +// Portions created by the Initial Developer are Copyright (C) 2007 +// the Initial Developer. All Rights Reserved. +// +// Contributor(s): +// Ruben Arakelyan +// Gervase Markham +// Pamela Greene +// David Triendl +// Jothan Frakes +// The kind representatives of many TLD registries +// +// Alternatively, the contents of this file may be used under the terms of +// either the GNU General Public License Version 2 or later (the "GPL"), or +// the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +// in which case the provisions of the GPL or the LGPL are applicable instead +// of those above. If you wish to allow use of your version of this file only +// under the terms of either the GPL or the LGPL, and not to allow others to +// use your version of this file under the terms of the MPL, indicate your +// decision by deleting the provisions above and replace them with the notice +// and other provisions required by the GPL or the LGPL. If you do not delete +// the provisions above, a recipient may use your version of this file under +// the terms of any one of the MPL, the GPL or the LGPL. +// +// ***** END LICENSE BLOCK ***** + +// ===BEGIN ICANN DOMAINS=== + +// ac : http://en.wikipedia.org/wiki/.ac +ac +com.ac +edu.ac +gov.ac +net.ac +mil.ac +org.ac + +// ad : http://en.wikipedia.org/wiki/.ad +ad +nom.ad + +// ae : http://en.wikipedia.org/wiki/.ae +// see also: "Domain Name Eligibility Policy" at http://www.aeda.ae/eng/aepolicy.php +ae +co.ae +net.ae +org.ae +sch.ae +ac.ae +gov.ae +mil.ae + +// aero : see http://www.information.aero/index.php?id=66 +aero +accident-investigation.aero +accident-prevention.aero +aerobatic.aero +aeroclub.aero +aerodrome.aero +agents.aero +aircraft.aero +airline.aero +airport.aero +air-surveillance.aero +airtraffic.aero +air-traffic-control.aero +ambulance.aero +amusement.aero +association.aero +author.aero +ballooning.aero +broker.aero +caa.aero +cargo.aero +catering.aero +certification.aero +championship.aero +charter.aero +civilaviation.aero +club.aero +conference.aero +consultant.aero +consulting.aero +control.aero +council.aero +crew.aero +design.aero +dgca.aero +educator.aero +emergency.aero +engine.aero +engineer.aero +entertainment.aero +equipment.aero +exchange.aero +express.aero +federation.aero +flight.aero +freight.aero +fuel.aero +gliding.aero +government.aero +groundhandling.aero +group.aero +hanggliding.aero +homebuilt.aero +insurance.aero +journal.aero +journalist.aero +leasing.aero +logistics.aero +magazine.aero +maintenance.aero +marketplace.aero +media.aero +microlight.aero +modelling.aero +navigation.aero +parachuting.aero +paragliding.aero +passenger-association.aero +pilot.aero +press.aero +production.aero +recreation.aero +repbody.aero +res.aero +research.aero +rotorcraft.aero +safety.aero +scientist.aero +services.aero +show.aero +skydiving.aero +software.aero +student.aero +taxi.aero +trader.aero +trading.aero +trainer.aero +union.aero +workinggroup.aero +works.aero + +// af : http://www.nic.af/help.jsp +af +gov.af +com.af +org.af +net.af +edu.af + +// ag : http://www.nic.ag/prices.htm +ag +com.ag +org.ag +net.ag +co.ag +nom.ag + +// ai : http://nic.com.ai/ +ai +off.ai +com.ai +net.ai +org.ai + +// al : http://www.ert.gov.al/ert_alb/faq_det.html?Id=31 +al +com.al +edu.al +gov.al +mil.al +net.al +org.al + +// am : http://en.wikipedia.org/wiki/.am +am + +// an : http://www.una.an/an_domreg/default.asp +an +com.an +net.an +org.an +edu.an + +// ao : http://en.wikipedia.org/wiki/.ao +// http://www.dns.ao/REGISTR.DOC +ao +ed.ao +gv.ao +og.ao +co.ao +pb.ao +it.ao + +// aq : http://en.wikipedia.org/wiki/.aq +aq + +// ar : http://en.wikipedia.org/wiki/.ar +*.ar +!congresodelalengua3.ar +!educ.ar +!gobiernoelectronico.ar +!mecon.ar +!nacion.ar +!nic.ar +!promocion.ar +!retina.ar +!uba.ar + +// arpa : http://en.wikipedia.org/wiki/.arpa +// Confirmed by registry 2008-06-18 +e164.arpa +in-addr.arpa +ip6.arpa +iris.arpa +uri.arpa +urn.arpa + +// as : http://en.wikipedia.org/wiki/.as +as +gov.as + +// asia : http://en.wikipedia.org/wiki/.asia +asia + +// at : http://en.wikipedia.org/wiki/.at +// Confirmed by registry 2008-06-17 +at +ac.at +co.at +gv.at +or.at + +// au : http://en.wikipedia.org/wiki/.au +// http://www.auda.org.au/ +// 2LDs +com.au +net.au +org.au +edu.au +gov.au +csiro.au +asn.au +id.au +// Historic 2LDs (closed to new registration, but sites still exist) +info.au +conf.au +oz.au +// CGDNs - http://www.cgdn.org.au/ +act.au +nsw.au +nt.au +qld.au +sa.au +tas.au +vic.au +wa.au +// 3LDs +act.edu.au +nsw.edu.au +nt.edu.au +qld.edu.au +sa.edu.au +tas.edu.au +vic.edu.au +wa.edu.au +act.gov.au +// Removed at request of Shae.Donelan@services.nsw.gov.au, 2010-03-04 +// nsw.gov.au +nt.gov.au +qld.gov.au +sa.gov.au +tas.gov.au +vic.gov.au +wa.gov.au + +// aw : http://en.wikipedia.org/wiki/.aw +aw +com.aw + +// ax : http://en.wikipedia.org/wiki/.ax +ax + +// az : http://en.wikipedia.org/wiki/.az +az +com.az +net.az +int.az +gov.az +org.az +edu.az +info.az +pp.az +mil.az +name.az +pro.az +biz.az + +// ba : http://en.wikipedia.org/wiki/.ba +ba +org.ba +net.ba +edu.ba +gov.ba +mil.ba +unsa.ba +unbi.ba +co.ba +com.ba +rs.ba + +// bb : http://en.wikipedia.org/wiki/.bb +bb +biz.bb +com.bb +edu.bb +gov.bb +info.bb +net.bb +org.bb +store.bb + +// bd : http://en.wikipedia.org/wiki/.bd +*.bd + +// be : http://en.wikipedia.org/wiki/.be +// Confirmed by registry 2008-06-08 +be +ac.be + +// bf : http://en.wikipedia.org/wiki/.bf +bf +gov.bf + +// bg : http://en.wikipedia.org/wiki/.bg +// https://www.register.bg/user/static/rules/en/index.html +bg +a.bg +b.bg +c.bg +d.bg +e.bg +f.bg +g.bg +h.bg +i.bg +j.bg +k.bg +l.bg +m.bg +n.bg +o.bg +p.bg +q.bg +r.bg +s.bg +t.bg +u.bg +v.bg +w.bg +x.bg +y.bg +z.bg +0.bg +1.bg +2.bg +3.bg +4.bg +5.bg +6.bg +7.bg +8.bg +9.bg + +// bh : http://en.wikipedia.org/wiki/.bh +bh +com.bh +edu.bh +net.bh +org.bh +gov.bh + +// bi : http://en.wikipedia.org/wiki/.bi +// http://whois.nic.bi/ +bi +co.bi +com.bi +edu.bi +or.bi +org.bi + +// biz : http://en.wikipedia.org/wiki/.biz +biz + +// bj : http://en.wikipedia.org/wiki/.bj +bj +asso.bj +barreau.bj +gouv.bj + +// bm : http://www.bermudanic.bm/dnr-text.txt +bm +com.bm +edu.bm +gov.bm +net.bm +org.bm + +// bn : http://en.wikipedia.org/wiki/.bn +*.bn + +// bo : http://www.nic.bo/ +bo +com.bo +edu.bo +gov.bo +gob.bo +int.bo +org.bo +net.bo +mil.bo +tv.bo + +// br : http://registro.br/dominio/dpn.html +// Updated by registry 2011-03-01 +br +adm.br +adv.br +agr.br +am.br +arq.br +art.br +ato.br +b.br +bio.br +blog.br +bmd.br +can.br +cim.br +cng.br +cnt.br +com.br +coop.br +ecn.br +edu.br +emp.br +eng.br +esp.br +etc.br +eti.br +far.br +flog.br +fm.br +fnd.br +fot.br +fst.br +g12.br +ggf.br +gov.br +imb.br +ind.br +inf.br +jor.br +jus.br +lel.br +mat.br +med.br +mil.br +mus.br +net.br +nom.br +not.br +ntr.br +odo.br +org.br +ppg.br +pro.br +psc.br +psi.br +qsl.br +radio.br +rec.br +slg.br +srv.br +taxi.br +teo.br +tmp.br +trd.br +tur.br +tv.br +vet.br +vlog.br +wiki.br +zlg.br + +// bs : http://www.nic.bs/rules.html +bs +com.bs +net.bs +org.bs +edu.bs +gov.bs + +// bt : http://en.wikipedia.org/wiki/.bt +bt +com.bt +edu.bt +gov.bt +net.bt +org.bt + +// bv : No registrations at this time. +// Submitted by registry 2006-06-16 + +// bw : http://en.wikipedia.org/wiki/.bw +// http://www.gobin.info/domainname/bw.doc +// list of other 2nd level tlds ? +bw +co.bw +org.bw + +// by : http://en.wikipedia.org/wiki/.by +// http://tld.by/rules_2006_en.html +// list of other 2nd level tlds ? +by +gov.by +mil.by +// Official information does not indicate that com.by is a reserved +// second-level domain, but it's being used as one (see www.google.com.by and +// www.yahoo.com.by, for example), so we list it here for safety's sake. +com.by + +// http://hoster.by/ +of.by + +// bz : http://en.wikipedia.org/wiki/.bz +// http://www.belizenic.bz/ +bz +com.bz +net.bz +org.bz +edu.bz +gov.bz + +// ca : http://en.wikipedia.org/wiki/.ca +ca +// ca geographical names +ab.ca +bc.ca +mb.ca +nb.ca +nf.ca +nl.ca +ns.ca +nt.ca +nu.ca +on.ca +pe.ca +qc.ca +sk.ca +yk.ca +// gc.ca: http://en.wikipedia.org/wiki/.gc.ca +// see also: http://registry.gc.ca/en/SubdomainFAQ +gc.ca + +// cat : http://en.wikipedia.org/wiki/.cat +cat + +// cc : http://en.wikipedia.org/wiki/.cc +cc + +// cd : http://en.wikipedia.org/wiki/.cd +// see also: https://www.nic.cd/domain/insertDomain_2.jsp?act=1 +cd +gov.cd + +// cf : http://en.wikipedia.org/wiki/.cf +cf + +// cg : http://en.wikipedia.org/wiki/.cg +cg + +// ch : http://en.wikipedia.org/wiki/.ch +ch + +// ci : http://en.wikipedia.org/wiki/.ci +// http://www.nic.ci/index.php?page=charte +ci +org.ci +or.ci +com.ci +co.ci +edu.ci +ed.ci +ac.ci +net.ci +go.ci +asso.ci +aéroport.ci +int.ci +presse.ci +md.ci +gouv.ci + +// ck : http://en.wikipedia.org/wiki/.ck +*.ck +!www.ck + +// cl : http://en.wikipedia.org/wiki/.cl +cl +gov.cl +gob.cl +co.cl +mil.cl + +// cm : http://en.wikipedia.org/wiki/.cm +cm +gov.cm + +// cn : http://en.wikipedia.org/wiki/.cn +// Submitted by registry 2008-06-11 +cn +ac.cn +com.cn +edu.cn +gov.cn +net.cn +org.cn +mil.cn +公司.cn +网络.cn +網絡.cn +// cn geographic names +ah.cn +bj.cn +cq.cn +fj.cn +gd.cn +gs.cn +gz.cn +gx.cn +ha.cn +hb.cn +he.cn +hi.cn +hl.cn +hn.cn +jl.cn +js.cn +jx.cn +ln.cn +nm.cn +nx.cn +qh.cn +sc.cn +sd.cn +sh.cn +sn.cn +sx.cn +tj.cn +xj.cn +xz.cn +yn.cn +zj.cn +hk.cn +mo.cn +tw.cn + +// co : http://en.wikipedia.org/wiki/.co +// Submitted by registry 2008-06-11 +co +arts.co +com.co +edu.co +firm.co +gov.co +info.co +int.co +mil.co +net.co +nom.co +org.co +rec.co +web.co + +// com : http://en.wikipedia.org/wiki/.com +com + +// coop : http://en.wikipedia.org/wiki/.coop +coop + +// cr : http://www.nic.cr/niccr_publico/showRegistroDominiosScreen.do +cr +ac.cr +co.cr +ed.cr +fi.cr +go.cr +or.cr +sa.cr + +// cu : http://en.wikipedia.org/wiki/.cu +cu +com.cu +edu.cu +org.cu +net.cu +gov.cu +inf.cu + +// cv : http://en.wikipedia.org/wiki/.cv +cv + +// cx : http://en.wikipedia.org/wiki/.cx +// list of other 2nd level tlds ? +cx +gov.cx + +// cy : http://en.wikipedia.org/wiki/.cy +*.cy + +// cz : http://en.wikipedia.org/wiki/.cz +cz + +// de : http://en.wikipedia.org/wiki/.de +// Confirmed by registry (with technical +// reservations) 2008-07-01 +de + +// dj : http://en.wikipedia.org/wiki/.dj +dj + +// dk : http://en.wikipedia.org/wiki/.dk +// Confirmed by registry 2008-06-17 +dk + +// dm : http://en.wikipedia.org/wiki/.dm +dm +com.dm +net.dm +org.dm +edu.dm +gov.dm + +// do : http://en.wikipedia.org/wiki/.do +do +art.do +com.do +edu.do +gob.do +gov.do +mil.do +net.do +org.do +sld.do +web.do + +// dz : http://en.wikipedia.org/wiki/.dz +dz +com.dz +org.dz +net.dz +gov.dz +edu.dz +asso.dz +pol.dz +art.dz + +// ec : http://www.nic.ec/reg/paso1.asp +// Submitted by registry 2008-07-04 +ec +com.ec +info.ec +net.ec +fin.ec +k12.ec +med.ec +pro.ec +org.ec +edu.ec +gov.ec +gob.ec +mil.ec + +// edu : http://en.wikipedia.org/wiki/.edu +edu + +// ee : http://www.eenet.ee/EENet/dom_reeglid.html#lisa_B +ee +edu.ee +gov.ee +riik.ee +lib.ee +med.ee +com.ee +pri.ee +aip.ee +org.ee +fie.ee + +// eg : http://en.wikipedia.org/wiki/.eg +eg +com.eg +edu.eg +eun.eg +gov.eg +mil.eg +name.eg +net.eg +org.eg +sci.eg + +// er : http://en.wikipedia.org/wiki/.er +*.er + +// es : https://www.nic.es/site_ingles/ingles/dominios/index.html +es +com.es +nom.es +org.es +gob.es +edu.es + +// et : http://en.wikipedia.org/wiki/.et +*.et + +// eu : http://en.wikipedia.org/wiki/.eu +eu + +// fi : http://en.wikipedia.org/wiki/.fi +fi +// aland.fi : http://en.wikipedia.org/wiki/.ax +// This domain is being phased out in favor of .ax. As there are still many +// domains under aland.fi, we still keep it on the list until aland.fi is +// completely removed. +// TODO: Check for updates (expected to be phased out around Q1/2009) +aland.fi + +// fj : http://en.wikipedia.org/wiki/.fj +*.fj + +// fk : http://en.wikipedia.org/wiki/.fk +*.fk + +// fm : http://en.wikipedia.org/wiki/.fm +fm + +// fo : http://en.wikipedia.org/wiki/.fo +fo + +// fr : http://www.afnic.fr/ +// domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs +fr +com.fr +asso.fr +nom.fr +prd.fr +presse.fr +tm.fr +// domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels +aeroport.fr +assedic.fr +avocat.fr +avoues.fr +cci.fr +chambagri.fr +chirurgiens-dentistes.fr +experts-comptables.fr +geometre-expert.fr +gouv.fr +greta.fr +huissier-justice.fr +medecin.fr +notaires.fr +pharmacien.fr +port.fr +veterinaire.fr + +// ga : http://en.wikipedia.org/wiki/.ga +ga + +// gb : This registry is effectively dormant +// Submitted by registry 2008-06-12 + +// gd : http://en.wikipedia.org/wiki/.gd +gd + +// ge : http://www.nic.net.ge/policy_en.pdf +ge +com.ge +edu.ge +gov.ge +org.ge +mil.ge +net.ge +pvt.ge + +// gf : http://en.wikipedia.org/wiki/.gf +gf + +// gg : http://www.channelisles.net/applic/avextn.shtml +gg +co.gg +org.gg +net.gg +sch.gg +gov.gg + +// gh : http://en.wikipedia.org/wiki/.gh +// see also: http://www.nic.gh/reg_now.php +// Although domains directly at second level are not possible at the moment, +// they have been possible for some time and may come back. +gh +com.gh +edu.gh +gov.gh +org.gh +mil.gh + +// gi : http://www.nic.gi/rules.html +gi +com.gi +ltd.gi +gov.gi +mod.gi +edu.gi +org.gi + +// gl : http://en.wikipedia.org/wiki/.gl +// http://nic.gl +gl + +// gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm +gm + +// gn : http://psg.com/dns/gn/gn.txt +// Submitted by registry 2008-06-17 +ac.gn +com.gn +edu.gn +gov.gn +org.gn +net.gn + +// gov : http://en.wikipedia.org/wiki/.gov +gov + +// gp : http://www.nic.gp/index.php?lang=en +gp +com.gp +net.gp +mobi.gp +edu.gp +org.gp +asso.gp + +// gq : http://en.wikipedia.org/wiki/.gq +gq + +// gr : https://grweb.ics.forth.gr/english/1617-B-2005.html +// Submitted by registry 2008-06-09 +gr +com.gr +edu.gr +net.gr +org.gr +gov.gr + +// gs : http://en.wikipedia.org/wiki/.gs +gs + +// gt : http://www.gt/politicas.html +*.gt +!www.gt + +// gu : http://gadao.gov.gu/registration.txt +*.gu + +// gw : http://en.wikipedia.org/wiki/.gw +gw + +// gy : http://en.wikipedia.org/wiki/.gy +// http://registry.gy/ +gy +co.gy +com.gy +net.gy + +// hk : https://www.hkdnr.hk +// Submitted by registry 2008-06-11 +hk +com.hk +edu.hk +gov.hk +idv.hk +net.hk +org.hk +公司.hk +教育.hk +敎育.hk +政府.hk +個人.hk +个人.hk +箇人.hk +網络.hk +网络.hk +组織.hk +網絡.hk +网絡.hk +组织.hk +組織.hk +組织.hk + +// hm : http://en.wikipedia.org/wiki/.hm +hm + +// hn : http://www.nic.hn/politicas/ps02,,05.html +hn +com.hn +edu.hn +org.hn +net.hn +mil.hn +gob.hn + +// hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf +hr +iz.hr +from.hr +name.hr +com.hr + +// ht : http://www.nic.ht/info/charte.cfm +ht +com.ht +shop.ht +firm.ht +info.ht +adult.ht +net.ht +pro.ht +org.ht +med.ht +art.ht +coop.ht +pol.ht +asso.ht +edu.ht +rel.ht +gouv.ht +perso.ht + +// hu : http://www.domain.hu/domain/English/sld.html +// Confirmed by registry 2008-06-12 +hu +co.hu +info.hu +org.hu +priv.hu +sport.hu +tm.hu +2000.hu +agrar.hu +bolt.hu +casino.hu +city.hu +erotica.hu +erotika.hu +film.hu +forum.hu +games.hu +hotel.hu +ingatlan.hu +jogasz.hu +konyvelo.hu +lakas.hu +media.hu +news.hu +reklam.hu +sex.hu +shop.hu +suli.hu +szex.hu +tozsde.hu +utazas.hu +video.hu + +// id : http://en.wikipedia.org/wiki/.id +// see also: https://register.pandi.or.id/ +id +ac.id +co.id +go.id +mil.id +net.id +or.id +sch.id +web.id + +// ie : http://en.wikipedia.org/wiki/.ie +ie +gov.ie + +// il : http://en.wikipedia.org/wiki/.il +*.il + +// im : https://www.nic.im/pdfs/imfaqs.pdf +im +co.im +ltd.co.im +plc.co.im +net.im +gov.im +org.im +nic.im +ac.im + +// in : http://en.wikipedia.org/wiki/.in +// see also: http://www.inregistry.in/policies/ +// Please note, that nic.in is not an offical eTLD, but used by most +// government institutions. +in +co.in +firm.in +net.in +org.in +gen.in +ind.in +nic.in +ac.in +edu.in +res.in +gov.in +mil.in + +// info : http://en.wikipedia.org/wiki/.info +info + +// int : http://en.wikipedia.org/wiki/.int +// Confirmed by registry 2008-06-18 +int +eu.int + +// io : http://www.nic.io/rules.html +// list of other 2nd level tlds ? +io +com.io + +// iq : http://www.cmc.iq/english/iq/iqregister1.htm +iq +gov.iq +edu.iq +mil.iq +com.iq +org.iq +net.iq + +// ir : http://www.nic.ir/Terms_and_Conditions_ir,_Appendix_1_Domain_Rules +// Also see http://www.nic.ir/Internationalized_Domain_Names +// Two .ir entries added at request of , 2010-04-16 +ir +ac.ir +co.ir +gov.ir +id.ir +net.ir +org.ir +sch.ir +// xn--mgba3a4f16a.ir (.ir, Persian YEH) +ایران.ir +// xn--mgba3a4fra.ir (.ir, Arabic YEH) +ايران.ir + +// is : http://www.isnic.is/domain/rules.php +// Confirmed by registry 2008-12-06 +is +net.is +com.is +edu.is +gov.is +org.is +int.is + +// it : http://en.wikipedia.org/wiki/.it +it +gov.it +edu.it +// list of reserved geo-names : +// http://www.nic.it/documenti/regolamenti-e-linee-guida/regolamento-assegnazione-versione-6.0.pdf +// (There is also a list of reserved geo-names corresponding to Italian +// municipalities : http://www.nic.it/documenti/appendice-c.pdf , but it is +// not included here.) +agrigento.it +ag.it +alessandria.it +al.it +ancona.it +an.it +aosta.it +aoste.it +ao.it +arezzo.it +ar.it +ascoli-piceno.it +ascolipiceno.it +ap.it +asti.it +at.it +avellino.it +av.it +bari.it +ba.it +andria-barletta-trani.it +andriabarlettatrani.it +trani-barletta-andria.it +tranibarlettaandria.it +barletta-trani-andria.it +barlettatraniandria.it +andria-trani-barletta.it +andriatranibarletta.it +trani-andria-barletta.it +traniandriabarletta.it +bt.it +belluno.it +bl.it +benevento.it +bn.it +bergamo.it +bg.it +biella.it +bi.it +bologna.it +bo.it +bolzano.it +bozen.it +balsan.it +alto-adige.it +altoadige.it +suedtirol.it +bz.it +brescia.it +bs.it +brindisi.it +br.it +cagliari.it +ca.it +caltanissetta.it +cl.it +campobasso.it +cb.it +carboniaiglesias.it +carbonia-iglesias.it +iglesias-carbonia.it +iglesiascarbonia.it +ci.it +caserta.it +ce.it +catania.it +ct.it +catanzaro.it +cz.it +chieti.it +ch.it +como.it +co.it +cosenza.it +cs.it +cremona.it +cr.it +crotone.it +kr.it +cuneo.it +cn.it +dell-ogliastra.it +dellogliastra.it +ogliastra.it +og.it +enna.it +en.it +ferrara.it +fe.it +fermo.it +fm.it +firenze.it +florence.it +fi.it +foggia.it +fg.it +forli-cesena.it +forlicesena.it +cesena-forli.it +cesenaforli.it +fc.it +frosinone.it +fr.it +genova.it +genoa.it +ge.it +gorizia.it +go.it +grosseto.it +gr.it +imperia.it +im.it +isernia.it +is.it +laquila.it +aquila.it +aq.it +la-spezia.it +laspezia.it +sp.it +latina.it +lt.it +lecce.it +le.it +lecco.it +lc.it +livorno.it +li.it +lodi.it +lo.it +lucca.it +lu.it +macerata.it +mc.it +mantova.it +mn.it +massa-carrara.it +massacarrara.it +carrara-massa.it +carraramassa.it +ms.it +matera.it +mt.it +medio-campidano.it +mediocampidano.it +campidano-medio.it +campidanomedio.it +vs.it +messina.it +me.it +milano.it +milan.it +mi.it +modena.it +mo.it +monza.it +monza-brianza.it +monzabrianza.it +monzaebrianza.it +monzaedellabrianza.it +monza-e-della-brianza.it +mb.it +napoli.it +naples.it +na.it +novara.it +no.it +nuoro.it +nu.it +oristano.it +or.it +padova.it +padua.it +pd.it +palermo.it +pa.it +parma.it +pr.it +pavia.it +pv.it +perugia.it +pg.it +pescara.it +pe.it +pesaro-urbino.it +pesarourbino.it +urbino-pesaro.it +urbinopesaro.it +pu.it +piacenza.it +pc.it +pisa.it +pi.it +pistoia.it +pt.it +pordenone.it +pn.it +potenza.it +pz.it +prato.it +po.it +ragusa.it +rg.it +ravenna.it +ra.it +reggio-calabria.it +reggiocalabria.it +rc.it +reggio-emilia.it +reggioemilia.it +re.it +rieti.it +ri.it +rimini.it +rn.it +roma.it +rome.it +rm.it +rovigo.it +ro.it +salerno.it +sa.it +sassari.it +ss.it +savona.it +sv.it +siena.it +si.it +siracusa.it +sr.it +sondrio.it +so.it +taranto.it +ta.it +tempio-olbia.it +tempioolbia.it +olbia-tempio.it +olbiatempio.it +ot.it +teramo.it +te.it +terni.it +tr.it +torino.it +turin.it +to.it +trapani.it +tp.it +trento.it +trentino.it +tn.it +treviso.it +tv.it +trieste.it +ts.it +udine.it +ud.it +varese.it +va.it +venezia.it +venice.it +ve.it +verbania.it +vb.it +vercelli.it +vc.it +verona.it +vr.it +vibo-valentia.it +vibovalentia.it +vv.it +vicenza.it +vi.it +viterbo.it +vt.it + +// je : http://www.channelisles.net/applic/avextn.shtml +je +co.je +org.je +net.je +sch.je +gov.je + +// jm : http://www.com.jm/register.html +*.jm + +// jo : http://www.dns.jo/Registration_policy.aspx +jo +com.jo +org.jo +net.jo +edu.jo +sch.jo +gov.jo +mil.jo +name.jo + +// jobs : http://en.wikipedia.org/wiki/.jobs +jobs + +// jp : http://en.wikipedia.org/wiki/.jp +// http://jprs.co.jp/en/jpdomain.html +// Submitted by registry 2008-06-11 +// Updated by registry 2008-12-04 +jp +// jp organizational type names +ac.jp +ad.jp +co.jp +ed.jp +go.jp +gr.jp +lg.jp +ne.jp +or.jp +// jp geographic type names +// http://jprs.jp/doc/rule/saisoku-1.html +*.aichi.jp +*.akita.jp +*.aomori.jp +*.chiba.jp +*.ehime.jp +*.fukui.jp +*.fukuoka.jp +*.fukushima.jp +*.gifu.jp +*.gunma.jp +*.hiroshima.jp +*.hokkaido.jp +*.hyogo.jp +*.ibaraki.jp +*.ishikawa.jp +*.iwate.jp +*.kagawa.jp +*.kagoshima.jp +*.kanagawa.jp +*.kawasaki.jp +*.kitakyushu.jp +*.kobe.jp +*.kochi.jp +*.kumamoto.jp +*.kyoto.jp +*.mie.jp +*.miyagi.jp +*.miyazaki.jp +*.nagano.jp +*.nagasaki.jp +*.nagoya.jp +*.nara.jp +*.niigata.jp +*.oita.jp +*.okayama.jp +*.okinawa.jp +*.osaka.jp +*.saga.jp +*.saitama.jp +*.sapporo.jp +*.sendai.jp +*.shiga.jp +*.shimane.jp +*.shizuoka.jp +*.tochigi.jp +*.tokushima.jp +*.tokyo.jp +*.tottori.jp +*.toyama.jp +*.wakayama.jp +*.yamagata.jp +*.yamaguchi.jp +*.yamanashi.jp +*.yokohama.jp +!metro.tokyo.jp +!pref.aichi.jp +!pref.akita.jp +!pref.aomori.jp +!pref.chiba.jp +!pref.ehime.jp +!pref.fukui.jp +!pref.fukuoka.jp +!pref.fukushima.jp +!pref.gifu.jp +!pref.gunma.jp +!pref.hiroshima.jp +!pref.hokkaido.jp +!pref.hyogo.jp +!pref.ibaraki.jp +!pref.ishikawa.jp +!pref.iwate.jp +!pref.kagawa.jp +!pref.kagoshima.jp +!pref.kanagawa.jp +!pref.kochi.jp +!pref.kumamoto.jp +!pref.kyoto.jp +!pref.mie.jp +!pref.miyagi.jp +!pref.miyazaki.jp +!pref.nagano.jp +!pref.nagasaki.jp +!pref.nara.jp +!pref.niigata.jp +!pref.oita.jp +!pref.okayama.jp +!pref.okinawa.jp +!pref.osaka.jp +!pref.saga.jp +!pref.saitama.jp +!pref.shiga.jp +!pref.shimane.jp +!pref.shizuoka.jp +!pref.tochigi.jp +!pref.tokushima.jp +!pref.tottori.jp +!pref.toyama.jp +!pref.wakayama.jp +!pref.yamagata.jp +!pref.yamaguchi.jp +!pref.yamanashi.jp +!city.chiba.jp +!city.fukuoka.jp +!city.hiroshima.jp +!city.kawasaki.jp +!city.kitakyushu.jp +!city.kobe.jp +!city.kyoto.jp +!city.nagoya.jp +!city.niigata.jp +!city.okayama.jp +!city.osaka.jp +!city.saitama.jp +!city.sapporo.jp +!city.sendai.jp +!city.shizuoka.jp +!city.yokohama.jp + +// ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145 +*.ke + +// kg : http://www.domain.kg/dmn_n.html +kg +org.kg +net.kg +com.kg +edu.kg +gov.kg +mil.kg + +// kh : http://www.mptc.gov.kh/dns_registration.htm +*.kh + +// ki : http://www.ki/dns/index.html +ki +edu.ki +biz.ki +net.ki +org.ki +gov.ki +info.ki +com.ki + +// km : http://en.wikipedia.org/wiki/.km +// http://www.domaine.km/documents/charte.doc +km +org.km +nom.km +gov.km +prd.km +tm.km +edu.km +mil.km +ass.km +com.km +// These are only mentioned as proposed suggestions at domaine.km, but +// http://en.wikipedia.org/wiki/.km says they're available for registration: +coop.km +asso.km +presse.km +medecin.km +notaires.km +pharmaciens.km +veterinaire.km +gouv.km + +// kn : http://en.wikipedia.org/wiki/.kn +// http://www.dot.kn/domainRules.html +kn +net.kn +org.kn +edu.kn +gov.kn + +// kp : http://www.kcce.kp/en_index.php +com.kp +edu.kp +gov.kp +org.kp +rep.kp +tra.kp + +// kr : http://en.wikipedia.org/wiki/.kr +// see also: http://domain.nida.or.kr/eng/registration.jsp +kr +ac.kr +co.kr +es.kr +go.kr +hs.kr +kg.kr +mil.kr +ms.kr +ne.kr +or.kr +pe.kr +re.kr +sc.kr +// kr geographical names +busan.kr +chungbuk.kr +chungnam.kr +daegu.kr +daejeon.kr +gangwon.kr +gwangju.kr +gyeongbuk.kr +gyeonggi.kr +gyeongnam.kr +incheon.kr +jeju.kr +jeonbuk.kr +jeonnam.kr +seoul.kr +ulsan.kr + +// kw : http://en.wikipedia.org/wiki/.kw +*.kw + +// ky : http://www.icta.ky/da_ky_reg_dom.php +// Confirmed by registry 2008-06-17 +ky +edu.ky +gov.ky +com.ky +org.ky +net.ky + +// kz : http://en.wikipedia.org/wiki/.kz +// see also: http://www.nic.kz/rules/index.jsp +kz +org.kz +edu.kz +net.kz +gov.kz +mil.kz +com.kz + +// la : http://en.wikipedia.org/wiki/.la +// Submitted by registry 2008-06-10 +la +int.la +net.la +info.la +edu.la +gov.la +per.la +com.la +org.la + +// lb : http://en.wikipedia.org/wiki/.lb +// Submitted by registry 2008-06-17 +com.lb +edu.lb +gov.lb +net.lb +org.lb + +// lc : http://en.wikipedia.org/wiki/.lc +// see also: http://www.nic.lc/rules.htm +lc +com.lc +net.lc +co.lc +org.lc +edu.lc +gov.lc + +// li : http://en.wikipedia.org/wiki/.li +li + +// lk : http://www.nic.lk/seclevpr.html +lk +gov.lk +sch.lk +net.lk +int.lk +com.lk +org.lk +edu.lk +ngo.lk +soc.lk +web.lk +ltd.lk +assn.lk +grp.lk +hotel.lk + +// lr : http://psg.com/dns/lr/lr.txt +// Submitted by registry 2008-06-17 +com.lr +edu.lr +gov.lr +org.lr +net.lr + +// ls : http://en.wikipedia.org/wiki/.ls +ls +co.ls +org.ls + +// lt : http://en.wikipedia.org/wiki/.lt +lt +// gov.lt : http://www.gov.lt/index_en.php +gov.lt + +// lu : http://www.dns.lu/en/ +lu + +// lv : http://www.nic.lv/DNS/En/generic.php +lv +com.lv +edu.lv +gov.lv +org.lv +mil.lv +id.lv +net.lv +asn.lv +conf.lv + +// ly : http://www.nic.ly/regulations.php +ly +com.ly +net.ly +gov.ly +plc.ly +edu.ly +sch.ly +med.ly +org.ly +id.ly + +// ma : http://en.wikipedia.org/wiki/.ma +// http://www.anrt.ma/fr/admin/download/upload/file_fr782.pdf +ma +co.ma +net.ma +gov.ma +org.ma +ac.ma +press.ma + +// mc : http://www.nic.mc/ +mc +tm.mc +asso.mc + +// md : http://en.wikipedia.org/wiki/.md +md + +// me : http://en.wikipedia.org/wiki/.me +me +co.me +net.me +org.me +edu.me +ac.me +gov.me +its.me +priv.me + +// mg : http://www.nic.mg/tarif.htm +mg +org.mg +nom.mg +gov.mg +prd.mg +tm.mg +edu.mg +mil.mg +com.mg + +// mh : http://en.wikipedia.org/wiki/.mh +mh + +// mil : http://en.wikipedia.org/wiki/.mil +mil + +// mk : http://en.wikipedia.org/wiki/.mk +// see also: http://dns.marnet.net.mk/postapka.php +mk +com.mk +org.mk +net.mk +edu.mk +gov.mk +inf.mk +name.mk + +// ml : http://www.gobin.info/domainname/ml-template.doc +// see also: http://en.wikipedia.org/wiki/.ml +ml +com.ml +edu.ml +gouv.ml +gov.ml +net.ml +org.ml +presse.ml + +// mm : http://en.wikipedia.org/wiki/.mm +*.mm + +// mn : http://en.wikipedia.org/wiki/.mn +mn +gov.mn +edu.mn +org.mn + +// mo : http://www.monic.net.mo/ +mo +com.mo +net.mo +org.mo +edu.mo +gov.mo + +// mobi : http://en.wikipedia.org/wiki/.mobi +mobi + +// mp : http://www.dot.mp/ +// Confirmed by registry 2008-06-17 +mp + +// mq : http://en.wikipedia.org/wiki/.mq +mq + +// mr : http://en.wikipedia.org/wiki/.mr +mr +gov.mr + +// ms : http://en.wikipedia.org/wiki/.ms +ms + +// mt : https://www.nic.org.mt/dotmt/ +*.mt + +// mu : http://en.wikipedia.org/wiki/.mu +mu +com.mu +net.mu +org.mu +gov.mu +ac.mu +co.mu +or.mu + +// museum : http://about.museum/naming/ +// http://index.museum/ +museum +academy.museum +agriculture.museum +air.museum +airguard.museum +alabama.museum +alaska.museum +amber.museum +ambulance.museum +american.museum +americana.museum +americanantiques.museum +americanart.museum +amsterdam.museum +and.museum +annefrank.museum +anthro.museum +anthropology.museum +antiques.museum +aquarium.museum +arboretum.museum +archaeological.museum +archaeology.museum +architecture.museum +art.museum +artanddesign.museum +artcenter.museum +artdeco.museum +arteducation.museum +artgallery.museum +arts.museum +artsandcrafts.museum +asmatart.museum +assassination.museum +assisi.museum +association.museum +astronomy.museum +atlanta.museum +austin.museum +australia.museum +automotive.museum +aviation.museum +axis.museum +badajoz.museum +baghdad.museum +bahn.museum +bale.museum +baltimore.museum +barcelona.museum +baseball.museum +basel.museum +baths.museum +bauern.museum +beauxarts.museum +beeldengeluid.museum +bellevue.museum +bergbau.museum +berkeley.museum +berlin.museum +bern.museum +bible.museum +bilbao.museum +bill.museum +birdart.museum +birthplace.museum +bonn.museum +boston.museum +botanical.museum +botanicalgarden.museum +botanicgarden.museum +botany.museum +brandywinevalley.museum +brasil.museum +bristol.museum +british.museum +britishcolumbia.museum +broadcast.museum +brunel.museum +brussel.museum +brussels.museum +bruxelles.museum +building.museum +burghof.museum +bus.museum +bushey.museum +cadaques.museum +california.museum +cambridge.museum +can.museum +canada.museum +capebreton.museum +carrier.museum +cartoonart.museum +casadelamoneda.museum +castle.museum +castres.museum +celtic.museum +center.museum +chattanooga.museum +cheltenham.museum +chesapeakebay.museum +chicago.museum +children.museum +childrens.museum +childrensgarden.museum +chiropractic.museum +chocolate.museum +christiansburg.museum +cincinnati.museum +cinema.museum +circus.museum +civilisation.museum +civilization.museum +civilwar.museum +clinton.museum +clock.museum +coal.museum +coastaldefence.museum +cody.museum +coldwar.museum +collection.museum +colonialwilliamsburg.museum +coloradoplateau.museum +columbia.museum +columbus.museum +communication.museum +communications.museum +community.museum +computer.museum +computerhistory.museum +comunicações.museum +contemporary.museum +contemporaryart.museum +convent.museum +copenhagen.museum +corporation.museum +correios-e-telecomunicações.museum +corvette.museum +costume.museum +countryestate.museum +county.museum +crafts.museum +cranbrook.museum +creation.museum +cultural.museum +culturalcenter.museum +culture.museum +cyber.museum +cymru.museum +dali.museum +dallas.museum +database.museum +ddr.museum +decorativearts.museum +delaware.museum +delmenhorst.museum +denmark.museum +depot.museum +design.museum +detroit.museum +dinosaur.museum +discovery.museum +dolls.museum +donostia.museum +durham.museum +eastafrica.museum +eastcoast.museum +education.museum +educational.museum +egyptian.museum +eisenbahn.museum +elburg.museum +elvendrell.museum +embroidery.museum +encyclopedic.museum +england.museum +entomology.museum +environment.museum +environmentalconservation.museum +epilepsy.museum +essex.museum +estate.museum +ethnology.museum +exeter.museum +exhibition.museum +family.museum +farm.museum +farmequipment.museum +farmers.museum +farmstead.museum +field.museum +figueres.museum +filatelia.museum +film.museum +fineart.museum +finearts.museum +finland.museum +flanders.museum +florida.museum +force.museum +fortmissoula.museum +fortworth.museum +foundation.museum +francaise.museum +frankfurt.museum +franziskaner.museum +freemasonry.museum +freiburg.museum +fribourg.museum +frog.museum +fundacio.museum +furniture.museum +gallery.museum +garden.museum +gateway.museum +geelvinck.museum +gemological.museum +geology.museum +georgia.museum +giessen.museum +glas.museum +glass.museum +gorge.museum +grandrapids.museum +graz.museum +guernsey.museum +halloffame.museum +hamburg.museum +handson.museum +harvestcelebration.museum +hawaii.museum +health.museum +heimatunduhren.museum +hellas.museum +helsinki.museum +hembygdsforbund.museum +heritage.museum +histoire.museum +historical.museum +historicalsociety.museum +historichouses.museum +historisch.museum +historisches.museum +history.museum +historyofscience.museum +horology.museum +house.museum +humanities.museum +illustration.museum +imageandsound.museum +indian.museum +indiana.museum +indianapolis.museum +indianmarket.museum +intelligence.museum +interactive.museum +iraq.museum +iron.museum +isleofman.museum +jamison.museum +jefferson.museum +jerusalem.museum +jewelry.museum +jewish.museum +jewishart.museum +jfk.museum +journalism.museum +judaica.museum +judygarland.museum +juedisches.museum +juif.museum +karate.museum +karikatur.museum +kids.museum +koebenhavn.museum +koeln.museum +kunst.museum +kunstsammlung.museum +kunstunddesign.museum +labor.museum +labour.museum +lajolla.museum +lancashire.museum +landes.museum +lans.museum +läns.museum +larsson.museum +lewismiller.museum +lincoln.museum +linz.museum +living.museum +livinghistory.museum +localhistory.museum +london.museum +losangeles.museum +louvre.museum +loyalist.museum +lucerne.museum +luxembourg.museum +luzern.museum +mad.museum +madrid.museum +mallorca.museum +manchester.museum +mansion.museum +mansions.museum +manx.museum +marburg.museum +maritime.museum +maritimo.museum +maryland.museum +marylhurst.museum +media.museum +medical.museum +medizinhistorisches.museum +meeres.museum +memorial.museum +mesaverde.museum +michigan.museum +midatlantic.museum +military.museum +mill.museum +miners.museum +mining.museum +minnesota.museum +missile.museum +missoula.museum +modern.museum +moma.museum +money.museum +monmouth.museum +monticello.museum +montreal.museum +moscow.museum +motorcycle.museum +muenchen.museum +muenster.museum +mulhouse.museum +muncie.museum +museet.museum +museumcenter.museum +museumvereniging.museum +music.museum +national.museum +nationalfirearms.museum +nationalheritage.museum +nativeamerican.museum +naturalhistory.museum +naturalhistorymuseum.museum +naturalsciences.museum +nature.museum +naturhistorisches.museum +natuurwetenschappen.museum +naumburg.museum +naval.museum +nebraska.museum +neues.museum +newhampshire.museum +newjersey.museum +newmexico.museum +newport.museum +newspaper.museum +newyork.museum +niepce.museum +norfolk.museum +north.museum +nrw.museum +nuernberg.museum +nuremberg.museum +nyc.museum +nyny.museum +oceanographic.museum +oceanographique.museum +omaha.museum +online.museum +ontario.museum +openair.museum +oregon.museum +oregontrail.museum +otago.museum +oxford.museum +pacific.museum +paderborn.museum +palace.museum +paleo.museum +palmsprings.museum +panama.museum +paris.museum +pasadena.museum +pharmacy.museum +philadelphia.museum +philadelphiaarea.museum +philately.museum +phoenix.museum +photography.museum +pilots.museum +pittsburgh.museum +planetarium.museum +plantation.museum +plants.museum +plaza.museum +portal.museum +portland.museum +portlligat.museum +posts-and-telecommunications.museum +preservation.museum +presidio.museum +press.museum +project.museum +public.museum +pubol.museum +quebec.museum +railroad.museum +railway.museum +research.museum +resistance.museum +riodejaneiro.museum +rochester.museum +rockart.museum +roma.museum +russia.museum +saintlouis.museum +salem.museum +salvadordali.museum +salzburg.museum +sandiego.museum +sanfrancisco.museum +santabarbara.museum +santacruz.museum +santafe.museum +saskatchewan.museum +satx.museum +savannahga.museum +schlesisches.museum +schoenbrunn.museum +schokoladen.museum +school.museum +schweiz.museum +science.museum +scienceandhistory.museum +scienceandindustry.museum +sciencecenter.museum +sciencecenters.museum +science-fiction.museum +sciencehistory.museum +sciences.museum +sciencesnaturelles.museum +scotland.museum +seaport.museum +settlement.museum +settlers.museum +shell.museum +sherbrooke.museum +sibenik.museum +silk.museum +ski.museum +skole.museum +society.museum +sologne.museum +soundandvision.museum +southcarolina.museum +southwest.museum +space.museum +spy.museum +square.museum +stadt.museum +stalbans.museum +starnberg.museum +state.museum +stateofdelaware.museum +station.museum +steam.museum +steiermark.museum +stjohn.museum +stockholm.museum +stpetersburg.museum +stuttgart.museum +suisse.museum +surgeonshall.museum +surrey.museum +svizzera.museum +sweden.museum +sydney.museum +tank.museum +tcm.museum +technology.museum +telekommunikation.museum +television.museum +texas.museum +textile.museum +theater.museum +time.museum +timekeeping.museum +topology.museum +torino.museum +touch.museum +town.museum +transport.museum +tree.museum +trolley.museum +trust.museum +trustee.museum +uhren.museum +ulm.museum +undersea.museum +university.museum +usa.museum +usantiques.museum +usarts.museum +uscountryestate.museum +usculture.museum +usdecorativearts.museum +usgarden.museum +ushistory.museum +ushuaia.museum +uslivinghistory.museum +utah.museum +uvic.museum +valley.museum +vantaa.museum +versailles.museum +viking.museum +village.museum +virginia.museum +virtual.museum +virtuel.museum +vlaanderen.museum +volkenkunde.museum +wales.museum +wallonie.museum +war.museum +washingtondc.museum +watchandclock.museum +watch-and-clock.museum +western.museum +westfalen.museum +whaling.museum +wildlife.museum +williamsburg.museum +windmill.museum +workshop.museum +york.museum +yorkshire.museum +yosemite.museum +youth.museum +zoological.museum +zoology.museum +ירושלים.museum +иком.museum + +// mv : http://en.wikipedia.org/wiki/.mv +// "mv" included because, contra Wikipedia, google.mv exists. +mv +aero.mv +biz.mv +com.mv +coop.mv +edu.mv +gov.mv +info.mv +int.mv +mil.mv +museum.mv +name.mv +net.mv +org.mv +pro.mv + +// mw : http://www.registrar.mw/ +mw +ac.mw +biz.mw +co.mw +com.mw +coop.mw +edu.mw +gov.mw +int.mw +museum.mw +net.mw +org.mw + +// mx : http://www.nic.mx/ +// Submitted by registry 2008-06-19 +mx +com.mx +org.mx +gob.mx +edu.mx +net.mx + +// my : http://www.mynic.net.my/ +my +com.my +net.my +org.my +gov.my +edu.my +mil.my +name.my + +// mz : http://www.gobin.info/domainname/mz-template.doc +*.mz + +// na : http://www.na-nic.com.na/ +// http://www.info.na/domain/ +na +info.na +pro.na +name.na +school.na +or.na +dr.na +us.na +mx.na +ca.na +in.na +cc.na +tv.na +ws.na +mobi.na +co.na +com.na +org.na + +// name : has 2nd-level tlds, but there's no list of them +name + +// nc : http://www.cctld.nc/ +nc +asso.nc + +// ne : http://en.wikipedia.org/wiki/.ne +ne + +// net : http://en.wikipedia.org/wiki/.net +net + +// nf : http://en.wikipedia.org/wiki/.nf +nf +com.nf +net.nf +per.nf +rec.nf +web.nf +arts.nf +firm.nf +info.nf +other.nf +store.nf + +// ng : http://psg.com/dns/ng/ +// Submitted by registry 2008-06-17 +ac.ng +com.ng +edu.ng +gov.ng +net.ng +org.ng + +// ni : http://www.nic.ni/dominios.htm +*.ni + +// nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html +// Confirmed by registry (with technical +// reservations) 2008-06-08 +nl + +// BV.nl will be a registry for dutch BV's (besloten vennootschap) +bv.nl + +// no : http://www.norid.no/regelverk/index.en.html +// The Norwegian registry has declined to notify us of updates. The web pages +// referenced below are the official source of the data. There is also an +// announce mailing list: +// https://postlister.uninett.no/sympa/info/norid-diskusjon +no +// Norid generic domains : http://www.norid.no/regelverk/vedlegg-c.en.html +fhs.no +vgs.no +fylkesbibl.no +folkebibl.no +museum.no +idrett.no +priv.no +// Non-Norid generic domains : http://www.norid.no/regelverk/vedlegg-d.en.html +mil.no +stat.no +dep.no +kommune.no +herad.no +// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html +// counties +aa.no +ah.no +bu.no +fm.no +hl.no +hm.no +jan-mayen.no +mr.no +nl.no +nt.no +of.no +ol.no +oslo.no +rl.no +sf.no +st.no +svalbard.no +tm.no +tr.no +va.no +vf.no +// primary and lower secondary schools per county +gs.aa.no +gs.ah.no +gs.bu.no +gs.fm.no +gs.hl.no +gs.hm.no +gs.jan-mayen.no +gs.mr.no +gs.nl.no +gs.nt.no +gs.of.no +gs.ol.no +gs.oslo.no +gs.rl.no +gs.sf.no +gs.st.no +gs.svalbard.no +gs.tm.no +gs.tr.no +gs.va.no +gs.vf.no +// cities +akrehamn.no +åkrehamn.no +algard.no +ålgård.no +arna.no +brumunddal.no +bryne.no +bronnoysund.no +brønnøysund.no +drobak.no +drøbak.no +egersund.no +fetsund.no +floro.no +florø.no +fredrikstad.no +hokksund.no +honefoss.no +hønefoss.no +jessheim.no +jorpeland.no +jørpeland.no +kirkenes.no +kopervik.no +krokstadelva.no +langevag.no +langevåg.no +leirvik.no +mjondalen.no +mjøndalen.no +mo-i-rana.no +mosjoen.no +mosjøen.no +nesoddtangen.no +orkanger.no +osoyro.no +osøyro.no +raholt.no +råholt.no +sandnessjoen.no +sandnessjøen.no +skedsmokorset.no +slattum.no +spjelkavik.no +stathelle.no +stavern.no +stjordalshalsen.no +stjørdalshalsen.no +tananger.no +tranby.no +vossevangen.no +// communities +afjord.no +åfjord.no +agdenes.no +al.no +ål.no +alesund.no +ålesund.no +alstahaug.no +alta.no +áltá.no +alaheadju.no +álaheadju.no +alvdal.no +amli.no +åmli.no +amot.no +åmot.no +andebu.no +andoy.no +andøy.no +andasuolo.no +ardal.no +årdal.no +aremark.no +arendal.no +ås.no +aseral.no +åseral.no +asker.no +askim.no +askvoll.no +askoy.no +askøy.no +asnes.no +åsnes.no +audnedaln.no +aukra.no +aure.no +aurland.no +aurskog-holand.no +aurskog-høland.no +austevoll.no +austrheim.no +averoy.no +averøy.no +balestrand.no +ballangen.no +balat.no +bálát.no +balsfjord.no +bahccavuotna.no +báhccavuotna.no +bamble.no +bardu.no +beardu.no +beiarn.no +bajddar.no +bájddar.no +baidar.no +báidár.no +berg.no +bergen.no +berlevag.no +berlevåg.no +bearalvahki.no +bearalváhki.no +bindal.no +birkenes.no +bjarkoy.no +bjarkøy.no +bjerkreim.no +bjugn.no +bodo.no +bodø.no +badaddja.no +bådåddjå.no +budejju.no +bokn.no +bremanger.no +bronnoy.no +brønnøy.no +bygland.no +bykle.no +barum.no +bærum.no +bo.telemark.no +bø.telemark.no +bo.nordland.no +bø.nordland.no +bievat.no +bievát.no +bomlo.no +bømlo.no +batsfjord.no +båtsfjord.no +bahcavuotna.no +báhcavuotna.no +dovre.no +drammen.no +drangedal.no +dyroy.no +dyrøy.no +donna.no +dønna.no +eid.no +eidfjord.no +eidsberg.no +eidskog.no +eidsvoll.no +eigersund.no +elverum.no +enebakk.no +engerdal.no +etne.no +etnedal.no +evenes.no +evenassi.no +evenášši.no +evje-og-hornnes.no +farsund.no +fauske.no +fuossko.no +fuoisku.no +fedje.no +fet.no +finnoy.no +finnøy.no +fitjar.no +fjaler.no +fjell.no +flakstad.no +flatanger.no +flekkefjord.no +flesberg.no +flora.no +fla.no +flå.no +folldal.no +forsand.no +fosnes.no +frei.no +frogn.no +froland.no +frosta.no +frana.no +fræna.no +froya.no +frøya.no +fusa.no +fyresdal.no +forde.no +førde.no +gamvik.no +gangaviika.no +gáŋgaviika.no +gaular.no +gausdal.no +gildeskal.no +gildeskål.no +giske.no +gjemnes.no +gjerdrum.no +gjerstad.no +gjesdal.no +gjovik.no +gjøvik.no +gloppen.no +gol.no +gran.no +grane.no +granvin.no +gratangen.no +grimstad.no +grong.no +kraanghke.no +kråanghke.no +grue.no +gulen.no +hadsel.no +halden.no +halsa.no +hamar.no +hamaroy.no +habmer.no +hábmer.no +hapmir.no +hápmir.no +hammerfest.no +hammarfeasta.no +hámmárfeasta.no +haram.no +hareid.no +harstad.no +hasvik.no +aknoluokta.no +ákŋoluokta.no +hattfjelldal.no +aarborte.no +haugesund.no +hemne.no +hemnes.no +hemsedal.no +heroy.more-og-romsdal.no +herøy.møre-og-romsdal.no +heroy.nordland.no +herøy.nordland.no +hitra.no +hjartdal.no +hjelmeland.no +hobol.no +hobøl.no +hof.no +hol.no +hole.no +holmestrand.no +holtalen.no +holtålen.no +hornindal.no +horten.no +hurdal.no +hurum.no +hvaler.no +hyllestad.no +hagebostad.no +hægebostad.no +hoyanger.no +høyanger.no +hoylandet.no +høylandet.no +ha.no +hå.no +ibestad.no +inderoy.no +inderøy.no +iveland.no +jevnaker.no +jondal.no +jolster.no +jølster.no +karasjok.no +karasjohka.no +kárášjohka.no +karlsoy.no +galsa.no +gálsá.no +karmoy.no +karmøy.no +kautokeino.no +guovdageaidnu.no +klepp.no +klabu.no +klæbu.no +kongsberg.no +kongsvinger.no +kragero.no +kragerø.no +kristiansand.no +kristiansund.no +krodsherad.no +krødsherad.no +kvalsund.no +rahkkeravju.no +ráhkkerávju.no +kvam.no +kvinesdal.no +kvinnherad.no +kviteseid.no +kvitsoy.no +kvitsøy.no +kvafjord.no +kvæfjord.no +giehtavuoatna.no +kvanangen.no +kvænangen.no +navuotna.no +návuotna.no +kafjord.no +kåfjord.no +gaivuotna.no +gáivuotna.no +larvik.no +lavangen.no +lavagis.no +loabat.no +loabát.no +lebesby.no +davvesiida.no +leikanger.no +leirfjord.no +leka.no +leksvik.no +lenvik.no +leangaviika.no +leaŋgaviika.no +lesja.no +levanger.no +lier.no +lierne.no +lillehammer.no +lillesand.no +lindesnes.no +lindas.no +lindås.no +lom.no +loppa.no +lahppi.no +láhppi.no +lund.no +lunner.no +luroy.no +lurøy.no +luster.no +lyngdal.no +lyngen.no +ivgu.no +lardal.no +lerdal.no +lærdal.no +lodingen.no +lødingen.no +lorenskog.no +lørenskog.no +loten.no +løten.no +malvik.no +masoy.no +måsøy.no +muosat.no +muosát.no +mandal.no +marker.no +marnardal.no +masfjorden.no +meland.no +meldal.no +melhus.no +meloy.no +meløy.no +meraker.no +meråker.no +moareke.no +moåreke.no +midsund.no +midtre-gauldal.no +modalen.no +modum.no +molde.no +moskenes.no +moss.no +mosvik.no +malselv.no +målselv.no +malatvuopmi.no +málatvuopmi.no +namdalseid.no +aejrie.no +namsos.no +namsskogan.no +naamesjevuemie.no +nååmesjevuemie.no +laakesvuemie.no +nannestad.no +narvik.no +narviika.no +naustdal.no +nedre-eiker.no +nes.akershus.no +nes.buskerud.no +nesna.no +nesodden.no +nesseby.no +unjarga.no +unjárga.no +nesset.no +nissedal.no +nittedal.no +nord-aurdal.no +nord-fron.no +nord-odal.no +norddal.no +nordkapp.no +davvenjarga.no +davvenjárga.no +nordre-land.no +nordreisa.no +raisa.no +ráisa.no +nore-og-uvdal.no +notodden.no +naroy.no +nærøy.no +notteroy.no +nøtterøy.no +odda.no +oksnes.no +øksnes.no +oppdal.no +oppegard.no +oppegård.no +orkdal.no +orland.no +ørland.no +orskog.no +ørskog.no +orsta.no +ørsta.no +os.hedmark.no +os.hordaland.no +osen.no +osteroy.no +osterøy.no +ostre-toten.no +østre-toten.no +overhalla.no +ovre-eiker.no +øvre-eiker.no +oyer.no +øyer.no +oygarden.no +øygarden.no +oystre-slidre.no +øystre-slidre.no +porsanger.no +porsangu.no +porsáŋgu.no +porsgrunn.no +radoy.no +radøy.no +rakkestad.no +rana.no +ruovat.no +randaberg.no +rauma.no +rendalen.no +rennebu.no +rennesoy.no +rennesøy.no +rindal.no +ringebu.no +ringerike.no +ringsaker.no +rissa.no +risor.no +risør.no +roan.no +rollag.no +rygge.no +ralingen.no +rælingen.no +rodoy.no +rødøy.no +romskog.no +rømskog.no +roros.no +røros.no +rost.no +røst.no +royken.no +røyken.no +royrvik.no +røyrvik.no +rade.no +råde.no +salangen.no +siellak.no +saltdal.no +salat.no +sálát.no +sálat.no +samnanger.no +sande.more-og-romsdal.no +sande.møre-og-romsdal.no +sande.vestfold.no +sandefjord.no +sandnes.no +sandoy.no +sandøy.no +sarpsborg.no +sauda.no +sauherad.no +sel.no +selbu.no +selje.no +seljord.no +sigdal.no +siljan.no +sirdal.no +skaun.no +skedsmo.no +ski.no +skien.no +skiptvet.no +skjervoy.no +skjervøy.no +skierva.no +skiervá.no +skjak.no +skjåk.no +skodje.no +skanland.no +skånland.no +skanit.no +skánit.no +smola.no +smøla.no +snillfjord.no +snasa.no +snåsa.no +snoasa.no +snaase.no +snåase.no +sogndal.no +sokndal.no +sola.no +solund.no +songdalen.no +sortland.no +spydeberg.no +stange.no +stavanger.no +steigen.no +steinkjer.no +stjordal.no +stjørdal.no +stokke.no +stor-elvdal.no +stord.no +stordal.no +storfjord.no +omasvuotna.no +strand.no +stranda.no +stryn.no +sula.no +suldal.no +sund.no +sunndal.no +surnadal.no +sveio.no +svelvik.no +sykkylven.no +sogne.no +søgne.no +somna.no +sømna.no +sondre-land.no +søndre-land.no +sor-aurdal.no +sør-aurdal.no +sor-fron.no +sør-fron.no +sor-odal.no +sør-odal.no +sor-varanger.no +sør-varanger.no +matta-varjjat.no +mátta-várjjat.no +sorfold.no +sørfold.no +sorreisa.no +sørreisa.no +sorum.no +sørum.no +tana.no +deatnu.no +time.no +tingvoll.no +tinn.no +tjeldsund.no +dielddanuorri.no +tjome.no +tjøme.no +tokke.no +tolga.no +torsken.no +tranoy.no +tranøy.no +tromso.no +tromsø.no +tromsa.no +romsa.no +trondheim.no +troandin.no +trysil.no +trana.no +træna.no +trogstad.no +trøgstad.no +tvedestrand.no +tydal.no +tynset.no +tysfjord.no +divtasvuodna.no +divttasvuotna.no +tysnes.no +tysvar.no +tysvær.no +tonsberg.no +tønsberg.no +ullensaker.no +ullensvang.no +ulvik.no +utsira.no +vadso.no +vadsø.no +cahcesuolo.no +čáhcesuolo.no +vaksdal.no +valle.no +vang.no +vanylven.no +vardo.no +vardø.no +varggat.no +várggát.no +vefsn.no +vaapste.no +vega.no +vegarshei.no +vegårshei.no +vennesla.no +verdal.no +verran.no +vestby.no +vestnes.no +vestre-slidre.no +vestre-toten.no +vestvagoy.no +vestvågøy.no +vevelstad.no +vik.no +vikna.no +vindafjord.no +volda.no +voss.no +varoy.no +værøy.no +vagan.no +vågan.no +voagat.no +vagsoy.no +vågsøy.no +vaga.no +vågå.no +valer.ostfold.no +våler.østfold.no +valer.hedmark.no +våler.hedmark.no + +// np : http://www.mos.com.np/register.html +*.np + +// nr : http://cenpac.net.nr/dns/index.html +// Confirmed by registry 2008-06-17 +nr +biz.nr +info.nr +gov.nr +edu.nr +org.nr +net.nr +com.nr + +// nu : http://en.wikipedia.org/wiki/.nu +nu + +// nz : http://en.wikipedia.org/wiki/.nz +*.nz + +// om : http://en.wikipedia.org/wiki/.om +*.om +!mediaphone.om +!nawrastelecom.om +!nawras.om +!omanmobile.om +!omanpost.om +!omantel.om +!rakpetroleum.om +!siemens.om +!songfest.om +!statecouncil.om + +// org : http://en.wikipedia.org/wiki/.org +org + +// pa : http://www.nic.pa/ +// Some additional second level "domains" resolve directly as hostnames, such as +// pannet.pa, so we add a rule for "pa". +pa +ac.pa +gob.pa +com.pa +org.pa +sld.pa +edu.pa +net.pa +ing.pa +abo.pa +med.pa +nom.pa + +// pe : https://www.nic.pe/InformeFinalComision.pdf +pe +edu.pe +gob.pe +nom.pe +mil.pe +org.pe +com.pe +net.pe + +// pf : http://www.gobin.info/domainname/formulaire-pf.pdf +pf +com.pf +org.pf +edu.pf + +// pg : http://en.wikipedia.org/wiki/.pg +*.pg + +// ph : http://www.domains.ph/FAQ2.asp +// Submitted by registry 2008-06-13 +ph +com.ph +net.ph +org.ph +gov.ph +edu.ph +ngo.ph +mil.ph +i.ph + +// pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK +pk +com.pk +net.pk +edu.pk +org.pk +fam.pk +biz.pk +web.pk +gov.pk +gob.pk +gok.pk +gon.pk +gop.pk +gos.pk +info.pk + +// pl : http://www.dns.pl/english/ +pl +// NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html +aid.pl +agro.pl +atm.pl +auto.pl +biz.pl +com.pl +edu.pl +gmina.pl +gsm.pl +info.pl +mail.pl +miasta.pl +media.pl +mil.pl +net.pl +nieruchomosci.pl +nom.pl +org.pl +pc.pl +powiat.pl +priv.pl +realestate.pl +rel.pl +sex.pl +shop.pl +sklep.pl +sos.pl +szkola.pl +targi.pl +tm.pl +tourism.pl +travel.pl +turystyka.pl +// ICM functional domains (icm.edu.pl) +6bone.pl +art.pl +mbone.pl +// Government domains (administred by ippt.gov.pl) +gov.pl +uw.gov.pl +um.gov.pl +ug.gov.pl +upow.gov.pl +starostwo.gov.pl +so.gov.pl +sr.gov.pl +po.gov.pl +pa.gov.pl +// other functional domains +ngo.pl +irc.pl +usenet.pl +// NASK geographical domains : http://www.dns.pl/english/dns-regiony.html +augustow.pl +babia-gora.pl +bedzin.pl +beskidy.pl +bialowieza.pl +bialystok.pl +bielawa.pl +bieszczady.pl +boleslawiec.pl +bydgoszcz.pl +bytom.pl +cieszyn.pl +czeladz.pl +czest.pl +dlugoleka.pl +elblag.pl +elk.pl +glogow.pl +gniezno.pl +gorlice.pl +grajewo.pl +ilawa.pl +jaworzno.pl +jelenia-gora.pl +jgora.pl +kalisz.pl +kazimierz-dolny.pl +karpacz.pl +kartuzy.pl +kaszuby.pl +katowice.pl +kepno.pl +ketrzyn.pl +klodzko.pl +kobierzyce.pl +kolobrzeg.pl +konin.pl +konskowola.pl +kutno.pl +lapy.pl +lebork.pl +legnica.pl +lezajsk.pl +limanowa.pl +lomza.pl +lowicz.pl +lubin.pl +lukow.pl +malbork.pl +malopolska.pl +mazowsze.pl +mazury.pl +mielec.pl +mielno.pl +mragowo.pl +naklo.pl +nowaruda.pl +nysa.pl +olawa.pl +olecko.pl +olkusz.pl +olsztyn.pl +opoczno.pl +opole.pl +ostroda.pl +ostroleka.pl +ostrowiec.pl +ostrowwlkp.pl +pila.pl +pisz.pl +podhale.pl +podlasie.pl +polkowice.pl +pomorze.pl +pomorskie.pl +prochowice.pl +pruszkow.pl +przeworsk.pl +pulawy.pl +radom.pl +rawa-maz.pl +rybnik.pl +rzeszow.pl +sanok.pl +sejny.pl +siedlce.pl +slask.pl +slupsk.pl +sosnowiec.pl +stalowa-wola.pl +skoczow.pl +starachowice.pl +stargard.pl +suwalki.pl +swidnica.pl +swiebodzin.pl +swinoujscie.pl +szczecin.pl +szczytno.pl +tarnobrzeg.pl +tgory.pl +turek.pl +tychy.pl +ustka.pl +walbrzych.pl +warmia.pl +warszawa.pl +waw.pl +wegrow.pl +wielun.pl +wlocl.pl +wloclawek.pl +wodzislaw.pl +wolomin.pl +wroclaw.pl +zachpomor.pl +zagan.pl +zarow.pl +zgora.pl +zgorzelec.pl +// TASK geographical domains (www.task.gda.pl/uslugi/dns) +gda.pl +gdansk.pl +gdynia.pl +med.pl +sopot.pl +// other geographical domains +gliwice.pl +krakow.pl +poznan.pl +wroc.pl +zakopane.pl + +// pm : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +pm + +// pn : http://www.government.pn/PnRegistry/policies.htm +pn +gov.pn +co.pn +org.pn +edu.pn +net.pn + +// pr : http://www.nic.pr/index.asp?f=1 +pr +com.pr +net.pr +org.pr +gov.pr +edu.pr +isla.pr +pro.pr +biz.pr +info.pr +name.pr +// these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr +est.pr +prof.pr +ac.pr + +// pro : http://www.nic.pro/support_faq.htm +pro +aca.pro +bar.pro +cpa.pro +jur.pro +law.pro +med.pro +eng.pro + +// ps : http://en.wikipedia.org/wiki/.ps +// http://www.nic.ps/registration/policy.html#reg +ps +edu.ps +gov.ps +sec.ps +plo.ps +com.ps +org.ps +net.ps + +// pt : http://online.dns.pt/dns/start_dns +pt +net.pt +gov.pt +org.pt +edu.pt +int.pt +publ.pt +com.pt +nome.pt + +// pw : http://en.wikipedia.org/wiki/.pw +pw +co.pw +ne.pw +or.pw +ed.pw +go.pw +belau.pw + +// py : http://www.nic.py/faq_a.html#faq_b +*.py + +// qa : http://domains.qa/en/ +qa +com.qa +edu.qa +gov.qa +mil.qa +name.qa +net.qa +org.qa +sch.qa + +// re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs +re +com.re +asso.re +nom.re + +// ro : http://www.rotld.ro/ +ro +com.ro +org.ro +tm.ro +nt.ro +nom.ro +info.ro +rec.ro +arts.ro +firm.ro +store.ro +www.ro + +// rs : http://en.wikipedia.org/wiki/.rs +rs +co.rs +org.rs +edu.rs +ac.rs +gov.rs +in.rs + +// ru : http://www.cctld.ru/ru/docs/aktiv_8.php +// Industry domains +ru +ac.ru +com.ru +edu.ru +int.ru +net.ru +org.ru +pp.ru +// Geographical domains +adygeya.ru +altai.ru +amur.ru +arkhangelsk.ru +astrakhan.ru +bashkiria.ru +belgorod.ru +bir.ru +bryansk.ru +buryatia.ru +cbg.ru +chel.ru +chelyabinsk.ru +chita.ru +chukotka.ru +chuvashia.ru +dagestan.ru +dudinka.ru +e-burg.ru +grozny.ru +irkutsk.ru +ivanovo.ru +izhevsk.ru +jar.ru +joshkar-ola.ru +kalmykia.ru +kaluga.ru +kamchatka.ru +karelia.ru +kazan.ru +kchr.ru +kemerovo.ru +khabarovsk.ru +khakassia.ru +khv.ru +kirov.ru +koenig.ru +komi.ru +kostroma.ru +krasnoyarsk.ru +kuban.ru +kurgan.ru +kursk.ru +lipetsk.ru +magadan.ru +mari.ru +mari-el.ru +marine.ru +mordovia.ru +mosreg.ru +msk.ru +murmansk.ru +nalchik.ru +nnov.ru +nov.ru +novosibirsk.ru +nsk.ru +omsk.ru +orenburg.ru +oryol.ru +palana.ru +penza.ru +perm.ru +pskov.ru +ptz.ru +rnd.ru +ryazan.ru +sakhalin.ru +samara.ru +saratov.ru +simbirsk.ru +smolensk.ru +spb.ru +stavropol.ru +stv.ru +surgut.ru +tambov.ru +tatarstan.ru +tom.ru +tomsk.ru +tsaritsyn.ru +tsk.ru +tula.ru +tuva.ru +tver.ru +tyumen.ru +udm.ru +udmurtia.ru +ulan-ude.ru +vladikavkaz.ru +vladimir.ru +vladivostok.ru +volgograd.ru +vologda.ru +voronezh.ru +vrn.ru +vyatka.ru +yakutia.ru +yamal.ru +yaroslavl.ru +yekaterinburg.ru +yuzhno-sakhalinsk.ru +// More geographical domains +amursk.ru +baikal.ru +cmw.ru +fareast.ru +jamal.ru +kms.ru +k-uralsk.ru +kustanai.ru +kuzbass.ru +magnitka.ru +mytis.ru +nakhodka.ru +nkz.ru +norilsk.ru +oskol.ru +pyatigorsk.ru +rubtsovsk.ru +snz.ru +syzran.ru +vdonsk.ru +zgrad.ru +// State domains +gov.ru +mil.ru +// Technical domains +test.ru + +// rw : http://www.nic.rw/cgi-bin/policy.pl +rw +gov.rw +net.rw +edu.rw +ac.rw +com.rw +co.rw +int.rw +mil.rw +gouv.rw + +// sa : http://www.nic.net.sa/ +sa +com.sa +net.sa +org.sa +gov.sa +med.sa +pub.sa +edu.sa +sch.sa + +// sb : http://www.sbnic.net.sb/ +// Submitted by registry 2008-06-08 +sb +com.sb +edu.sb +gov.sb +net.sb +org.sb + +// sc : http://www.nic.sc/ +sc +com.sc +gov.sc +net.sc +org.sc +edu.sc + +// sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm +// Submitted by registry 2008-06-17 +sd +com.sd +net.sd +org.sd +edu.sd +med.sd +gov.sd +info.sd + +// se : http://en.wikipedia.org/wiki/.se +// Submitted by registry 2008-06-24 +se +a.se +ac.se +b.se +bd.se +brand.se +c.se +d.se +e.se +f.se +fh.se +fhsk.se +fhv.se +g.se +h.se +i.se +k.se +komforb.se +kommunalforbund.se +komvux.se +l.se +lanbib.se +m.se +n.se +naturbruksgymn.se +o.se +org.se +p.se +parti.se +pp.se +press.se +r.se +s.se +sshn.se +t.se +tm.se +u.se +w.se +x.se +y.se +z.se + +// sg : http://www.nic.net.sg/sub_policies_agreement/2ld.html +sg +com.sg +net.sg +org.sg +gov.sg +edu.sg +per.sg + +// sh : http://www.nic.sh/rules.html +// list of 2nd level domains ? +sh + +// si : http://en.wikipedia.org/wiki/.si +si + +// sj : No registrations at this time. +// Submitted by registry 2008-06-16 + +// sk : http://en.wikipedia.org/wiki/.sk +// list of 2nd level domains ? +sk + +// sl : http://www.nic.sl +// Submitted by registry 2008-06-12 +sl +com.sl +net.sl +edu.sl +gov.sl +org.sl + +// sm : http://en.wikipedia.org/wiki/.sm +sm + +// sn : http://en.wikipedia.org/wiki/.sn +sn +art.sn +com.sn +edu.sn +gouv.sn +org.sn +perso.sn +univ.sn + +// so : http://www.soregistry.com/ +so +com.so +net.so +org.so + +// sr : http://en.wikipedia.org/wiki/.sr +sr + +// st : http://www.nic.st/html/policyrules/ +st +co.st +com.st +consulado.st +edu.st +embaixada.st +gov.st +mil.st +net.st +org.st +principe.st +saotome.st +store.st + +// su : http://en.wikipedia.org/wiki/.su +su + +// sv : http://www.svnet.org.sv/svpolicy.html +*.sv + +// sy : http://en.wikipedia.org/wiki/.sy +// see also: http://www.gobin.info/domainname/sy.doc +sy +edu.sy +gov.sy +net.sy +mil.sy +com.sy +org.sy + +// sz : http://en.wikipedia.org/wiki/.sz +// http://www.sispa.org.sz/ +sz +co.sz +ac.sz +org.sz + +// tc : http://en.wikipedia.org/wiki/.tc +tc + +// td : http://en.wikipedia.org/wiki/.td +td + +// tel: http://en.wikipedia.org/wiki/.tel +// http://www.telnic.org/ +tel + +// tf : http://en.wikipedia.org/wiki/.tf +tf + +// tg : http://en.wikipedia.org/wiki/.tg +// http://www.nic.tg/nictg/index.php implies no reserved 2nd-level domains, +// although this contradicts wikipedia. +tg + +// th : http://en.wikipedia.org/wiki/.th +// Submitted by registry 2008-06-17 +th +ac.th +co.th +go.th +in.th +mi.th +net.th +or.th + +// tj : http://www.nic.tj/policy.htm +tj +ac.tj +biz.tj +co.tj +com.tj +edu.tj +go.tj +gov.tj +int.tj +mil.tj +name.tj +net.tj +nic.tj +org.tj +test.tj +web.tj + +// tk : http://en.wikipedia.org/wiki/.tk +tk + +// tl : http://en.wikipedia.org/wiki/.tl +tl +gov.tl + +// tm : http://www.nic.tm/rules.html +// list of 2nd level tlds ? +tm + +// tn : http://en.wikipedia.org/wiki/.tn +// http://whois.ati.tn/ +tn +com.tn +ens.tn +fin.tn +gov.tn +ind.tn +intl.tn +nat.tn +net.tn +org.tn +info.tn +perso.tn +tourism.tn +edunet.tn +rnrt.tn +rns.tn +rnu.tn +mincom.tn +agrinet.tn +defense.tn +turen.tn + +// to : http://en.wikipedia.org/wiki/.to +// Submitted by registry 2008-06-17 +to +com.to +gov.to +net.to +org.to +edu.to +mil.to + +// tr : http://en.wikipedia.org/wiki/.tr +*.tr +!nic.tr +// Used by government in the TRNC +// http://en.wikipedia.org/wiki/.nc.tr +gov.nc.tr + +// travel : http://en.wikipedia.org/wiki/.travel +travel + +// tt : http://www.nic.tt/ +tt +co.tt +com.tt +org.tt +net.tt +biz.tt +info.tt +pro.tt +int.tt +coop.tt +jobs.tt +mobi.tt +travel.tt +museum.tt +aero.tt +name.tt +gov.tt +edu.tt + +// tv : http://en.wikipedia.org/wiki/.tv +// Not listing any 2LDs as reserved since none seem to exist in practice, +// Wikipedia notwithstanding. +tv + +// tw : http://en.wikipedia.org/wiki/.tw +tw +edu.tw +gov.tw +mil.tw +com.tw +net.tw +org.tw +idv.tw +game.tw +ebiz.tw +club.tw +網路.tw +組織.tw +商業.tw + +// tz : http://en.wikipedia.org/wiki/.tz +// Submitted by registry 2008-06-17 +// Updated from http://www.tznic.or.tz/index.php/domains.html 2010-10-25 +ac.tz +co.tz +go.tz +mil.tz +ne.tz +or.tz +sc.tz + +// ua : http://www.nic.net.ua/ +ua +com.ua +edu.ua +gov.ua +in.ua +net.ua +org.ua +// ua geo-names +cherkassy.ua +chernigov.ua +chernovtsy.ua +ck.ua +cn.ua +crimea.ua +cv.ua +dn.ua +dnepropetrovsk.ua +donetsk.ua +dp.ua +if.ua +ivano-frankivsk.ua +kh.ua +kharkov.ua +kherson.ua +khmelnitskiy.ua +kiev.ua +kirovograd.ua +km.ua +kr.ua +ks.ua +kv.ua +lg.ua +lugansk.ua +lutsk.ua +lviv.ua +mk.ua +nikolaev.ua +od.ua +odessa.ua +pl.ua +poltava.ua +rovno.ua +rv.ua +sebastopol.ua +sumy.ua +te.ua +ternopil.ua +uzhgorod.ua +vinnica.ua +vn.ua +zaporizhzhe.ua +zp.ua +zhitomir.ua +zt.ua + +// Private registries in .ua +co.ua +pp.ua + +// ug : http://www.registry.co.ug/ +ug +co.ug +ac.ug +sc.ug +go.ug +ne.ug +or.ug + +// uk : http://en.wikipedia.org/wiki/.uk +*.uk +*.sch.uk +!bl.uk +!british-library.uk +!icnet.uk +!jet.uk +!mod.uk +!nel.uk +!nhs.uk +!nic.uk +!nls.uk +!national-library-scotland.uk +!parliament.uk +!police.uk + +// us : http://en.wikipedia.org/wiki/.us +us +dni.us +fed.us +isa.us +kids.us +nsn.us +// us geographic names +ak.us +al.us +ar.us +as.us +az.us +ca.us +co.us +ct.us +dc.us +de.us +fl.us +ga.us +gu.us +hi.us +ia.us +id.us +il.us +in.us +ks.us +ky.us +la.us +ma.us +md.us +me.us +mi.us +mn.us +mo.us +ms.us +mt.us +nc.us +nd.us +ne.us +nh.us +nj.us +nm.us +nv.us +ny.us +oh.us +ok.us +or.us +pa.us +pr.us +ri.us +sc.us +sd.us +tn.us +tx.us +ut.us +vi.us +vt.us +va.us +wa.us +wi.us +wv.us +wy.us +// The registrar notes several more specific domains available in each state, +// such as state.*.us, dst.*.us, etc., but resolution of these is somewhat +// haphazard; in some states these domains resolve as addresses, while in others +// only subdomains are available, or even nothing at all. We include the +// most common ones where it's clear that different sites are different +// entities. +k12.ak.us +k12.al.us +k12.ar.us +k12.as.us +k12.az.us +k12.ca.us +k12.co.us +k12.ct.us +k12.dc.us +k12.de.us +k12.fl.us +k12.ga.us +k12.gu.us +// k12.hi.us Hawaii has a state-wide DOE login: bug 614565 +k12.ia.us +k12.id.us +k12.il.us +k12.in.us +k12.ks.us +k12.ky.us +k12.la.us +k12.ma.us +k12.md.us +k12.me.us +k12.mi.us +k12.mn.us +k12.mo.us +k12.ms.us +k12.mt.us +k12.nc.us +k12.nd.us +k12.ne.us +k12.nh.us +k12.nj.us +k12.nm.us +k12.nv.us +k12.ny.us +k12.oh.us +k12.ok.us +k12.or.us +k12.pa.us +k12.pr.us +k12.ri.us +k12.sc.us +k12.sd.us +k12.tn.us +k12.tx.us +k12.ut.us +k12.vi.us +k12.vt.us +k12.va.us +k12.wa.us +k12.wi.us +k12.wv.us +k12.wy.us + +cc.ak.us +cc.al.us +cc.ar.us +cc.as.us +cc.az.us +cc.ca.us +cc.co.us +cc.ct.us +cc.dc.us +cc.de.us +cc.fl.us +cc.ga.us +cc.gu.us +cc.hi.us +cc.ia.us +cc.id.us +cc.il.us +cc.in.us +cc.ks.us +cc.ky.us +cc.la.us +cc.ma.us +cc.md.us +cc.me.us +cc.mi.us +cc.mn.us +cc.mo.us +cc.ms.us +cc.mt.us +cc.nc.us +cc.nd.us +cc.ne.us +cc.nh.us +cc.nj.us +cc.nm.us +cc.nv.us +cc.ny.us +cc.oh.us +cc.ok.us +cc.or.us +cc.pa.us +cc.pr.us +cc.ri.us +cc.sc.us +cc.sd.us +cc.tn.us +cc.tx.us +cc.ut.us +cc.vi.us +cc.vt.us +cc.va.us +cc.wa.us +cc.wi.us +cc.wv.us +cc.wy.us + +lib.ak.us +lib.al.us +lib.ar.us +lib.as.us +lib.az.us +lib.ca.us +lib.co.us +lib.ct.us +lib.dc.us +lib.de.us +lib.fl.us +lib.ga.us +lib.gu.us +lib.hi.us +lib.ia.us +lib.id.us +lib.il.us +lib.in.us +lib.ks.us +lib.ky.us +lib.la.us +lib.ma.us +lib.md.us +lib.me.us +lib.mi.us +lib.mn.us +lib.mo.us +lib.ms.us +lib.mt.us +lib.nc.us +lib.nd.us +lib.ne.us +lib.nh.us +lib.nj.us +lib.nm.us +lib.nv.us +lib.ny.us +lib.oh.us +lib.ok.us +lib.or.us +lib.pa.us +lib.pr.us +lib.ri.us +lib.sc.us +lib.sd.us +lib.tn.us +lib.tx.us +lib.ut.us +lib.vi.us +lib.vt.us +lib.va.us +lib.wa.us +lib.wi.us +lib.wv.us +lib.wy.us + +// k12.ma.us contains school districts in Massachusetts. The 4LDs are +// managed indepedently except for private (PVT), charter (CHTR) and +// parochial (PAROCH) schools. Those are delegated dorectly to the +// 5LD operators. +pvt.k12.ma.us +chtr.k12.ma.us +paroch.k12.ma.us + +// uy : http://www.antel.com.uy/ +*.uy + +// uz : http://www.reg.uz/registerr.html +// are there other 2nd level tlds ? +uz +com.uz +co.uz + +// va : http://en.wikipedia.org/wiki/.va +va + +// vc : http://en.wikipedia.org/wiki/.vc +// Submitted by registry 2008-06-13 +vc +com.vc +net.vc +org.vc +gov.vc +mil.vc +edu.vc + +// ve : http://registro.nic.ve/nicve/registro/index.html +*.ve + +// vg : http://en.wikipedia.org/wiki/.vg +vg + +// vi : http://www.nic.vi/newdomainform.htm +// http://www.nic.vi/Domain_Rules/body_domain_rules.html indicates some other +// TLDs are "reserved", such as edu.vi and gov.vi, but doesn't actually say they +// are available for registration (which they do not seem to be). +vi +co.vi +com.vi +k12.vi +net.vi +org.vi + +// vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp +vn +com.vn +net.vn +org.vn +edu.vn +gov.vn +int.vn +ac.vn +biz.vn +info.vn +name.vn +pro.vn +health.vn + +// vu : http://en.wikipedia.org/wiki/.vu +// list of 2nd level tlds ? +vu + +// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +wf + +// ws : http://en.wikipedia.org/wiki/.ws +// http://samoanic.ws/index.dhtml +ws +com.ws +net.ws +org.ws +gov.ws +edu.ws + +// yt : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +yt + +// IDN ccTLDs +// Please sort by ISO 3166 ccTLD, then punicode string +// when submitting patches and follow this format: +// ("" ) : +// [optional sponsoring org] +// + +// xn--mgbaam7a8h ("Emerat" Arabic) : AE +//http://nic.ae/english/arabicdomain/rules.jsp +امارات + +// xn--54b7fta0cc ("Bangla" Bangla) : BD +বাংলা + +// xn--fiqs8s ("China" Chinese-Han-Simplified <.Zhonggou>) : CN +// CNNIC +// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +中国 + +// xn--fiqz9s ("China" Chinese-Han-Traditional <.Zhonggou>) : CN +// CNNIC +// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +中國 + +// xn--lgbbat1ad8j ("Algeria / Al Jazair" Arabic) : DZ +الجزائر + +// xn--wgbh1c ("Egypt" Arabic .masr) : EG +// http://www.dotmasr.eg/ +مصر + +// xn--node ("ge" Georgian (Mkhedruli)) : GE +გე + +// xn--j6w193g ("Hong Kong" Chinese-Han) : HK +// https://www2.hkirc.hk/register/rules.jsp +香港 + +// xn--h2brj9c ("Bharat" Devanagari) : IN +// India +भारत + +// xn--mgbbh1a71e ("Bharat" Arabic) : IN +// India +بھارت + +// xn--fpcrj9c3d ("Bharat" Telugu) : IN +// India +భారత్ + +// xn--gecrj9c ("Bharat" Gujarati) : IN +// India +ભારત + +// xn--s9brj9c ("Bharat" Gurmukhi) : IN +// India +ਭਾਰਤ + +// xn--45brj9c ("Bharat" Bengali) : IN +// India +ভারত + +// xn--xkc2dl3a5ee0h ("India" Tamil) : IN +// India +இந்தியா + +// xn--mgba3a4f16a ("Iran" Persian) : IR +ایران + +// xn--mgba3a4fra ("Iran" Arabic) : IR +ايران + +//xn--mgbayh7gpa ("al-Ordon" Arabic) JO +//National Information Technology Center (NITC) +//Royal Scientific Society, Al-Jubeiha +الاردن + +// xn--3e0b707e ("Republic of Korea" Hangul) : KR +한국 + +// xn--fzc2c9e2c ("Lanka" Sinhalese-Sinhala) : LK +// http://nic.lk +ලංකා + +// xn--xkc2al3hye2a ("Ilangai" Tamil) : LK +// http://nic.lk +இலங்கை + +// xn--mgbc0a9azcg ("Morocco / al-Maghrib" Arabic) : MA +المغرب + +// xn--mgb9awbf ("Oman" Arabic) : OM +عمان + +// xn--ygbi2ammx ("Falasteen" Arabic) : PS +// The Palestinian National Internet Naming Authority (PNINA) +// http://www.pnina.ps +فلسطين + +// xn--90a3ac ("srb" Cyrillic) : RS +срб + +// xn--p1ai ("rf" Russian-Cyrillic) : RU +// http://www.cctld.ru/en/docs/rulesrf.php +рф + +// xn--wgbl6a ("Qatar" Arabic) : QA +// http://www.ict.gov.qa/ +قطر + +// xn--mgberp4a5d4ar ("AlSaudiah" Arabic) : SA +// http://www.nic.net.sa/ +السعودية + +// xn--mgberp4a5d4a87g ("AlSaudiah" Arabic) variant : SA +السعودیة + +// xn--mgbqly7c0a67fbc ("AlSaudiah" Arabic) variant : SA +السعودیۃ + +// xn--mgbqly7cvafr ("AlSaudiah" Arabic) variant : SA +السعوديه + +// xn--ogbpf8fl ("Syria" Arabic) : SY +سورية + +// xn--mgbtf8fl ("Syria" Arabic) variant : SY +سوريا + +// xn--yfro4i67o Singapore ("Singapore" Chinese-Han) : SG +新加坡 + +// xn--clchc0ea0b2g2a9gcd ("Singapore" Tamil) : SG +சிங்கப்பூர் + +// xn--o3cw4h ("Thai" Thai) : TH +// http://www.thnic.co.th +ไทย + +// xn--pgbs0dh ("Tunis") : TN +// http://nic.tn +تونس + +// xn--kpry57d ("Taiwan" Chinese-Han-Traditional) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台灣 + +// xn--kprw13d ("Taiwan" Chinese-Han-Simplified) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台湾 + +// xn--nnx388a ("Taiwan") variant : TW +臺灣 + +// xn--j1amh ("ukr" Cyrillic) : UA +укр + +// xn--mgb2ddes ("AlYemen" Arabic) : YE +اليمن + +// xxx : http://icmregistry.com +xxx + +// ye : http://www.y.net.ye/services/domain_name.htm +*.ye + +// za : http://www.zadna.org.za/slds.html +*.za + +// zm : http://en.wikipedia.org/wiki/.zm +*.zm + +// zw : http://en.wikipedia.org/wiki/.zw +*.zw + +// ===END ICANN DOMAINS=== +// ===BEGIN PRIVATE DOMAINS=== + +// info.at : http://www.info.at/ +biz.at +info.at + +// priv.at : http://www.nic.priv.at/ +// Submitted by registry 2008-06-09 +priv.at + +// co.ca : http://registry.co.ca +co.ca + +// CentralNic : http://www.centralnic.com/names/domains +// Confirmed by registry 2008-06-09 +ar.com +br.com +cn.com +de.com +eu.com +gb.com +gr.com +hu.com +jpn.com +kr.com +no.com +qc.com +ru.com +sa.com +se.com +uk.com +us.com +uy.com +za.com +gb.net +jp.net +se.net +uk.net +ae.org +us.org +com.de + +// Opera Software, A.S.A. +// Requested by Yngve Pettersen 2009-11-26 +operaunite.com + +// Google, Inc. +// Requested by Eduardo Vela 2010-09-06 +appspot.com + +// iki.fi : Submitted by Hannu Aronsson 2009-11-05 +iki.fi + +// c.la : http://www.c.la/ +c.la + +// ZaNiC : http://www.za.net/ +// Confirmed by registry 2009-10-03 +za.net +za.org + +// CoDNS B.V. +// Added 2010-05-23. +co.nl +co.no + +// Mainseek Sp. z o.o. : http://www.co.pl/ +co.pl + +// DynDNS.com : http://www.dyndns.com/services/dns/dyndns/ +dyndns-at-home.com +dyndns-at-work.com +dyndns-blog.com +dyndns-free.com +dyndns-home.com +dyndns-ip.com +dyndns-mail.com +dyndns-office.com +dyndns-pics.com +dyndns-remote.com +dyndns-server.com +dyndns-web.com +dyndns-wiki.com +dyndns-work.com +dyndns.biz +dyndns.info +dyndns.org +dyndns.tv +at-band-camp.net +ath.cx +barrel-of-knowledge.info +barrell-of-knowledge.info +better-than.tv +blogdns.com +blogdns.net +blogdns.org +blogsite.org +boldlygoingnowhere.org +broke-it.net +buyshouses.net +cechire.com +dnsalias.com +dnsalias.net +dnsalias.org +dnsdojo.com +dnsdojo.net +dnsdojo.org +does-it.net +doesntexist.com +doesntexist.org +dontexist.com +dontexist.net +dontexist.org +doomdns.com +doomdns.org +dvrdns.org +dyn-o-saur.com +dynalias.com +dynalias.net +dynalias.org +dynathome.net +dyndns.ws +endofinternet.net +endofinternet.org +endoftheinternet.org +est-a-la-maison.com +est-a-la-masion.com +est-le-patron.com +est-mon-blogueur.com +for-better.biz +for-more.biz +for-our.info +for-some.biz +for-the.biz +forgot.her.name +forgot.his.name +from-ak.com +from-al.com +from-ar.com +from-az.net +from-ca.com +from-co.net +from-ct.com +from-dc.com +from-de.com +from-fl.com +from-ga.com +from-hi.com +from-ia.com +from-id.com +from-il.com +from-in.com +from-ks.com +from-ky.com +from-la.net +from-ma.com +from-md.com +from-me.org +from-mi.com +from-mn.com +from-mo.com +from-ms.com +from-mt.com +from-nc.com +from-nd.com +from-ne.com +from-nh.com +from-nj.com +from-nm.com +from-nv.com +from-ny.net +from-oh.com +from-ok.com +from-or.com +from-pa.com +from-pr.com +from-ri.com +from-sc.com +from-sd.com +from-tn.com +from-tx.com +from-ut.com +from-va.com +from-vt.com +from-wa.com +from-wi.com +from-wv.com +from-wy.com +ftpaccess.cc +fuettertdasnetz.de +game-host.org +game-server.cc +getmyip.com +gets-it.net +go.dyndns.org +gotdns.com +gotdns.org +groks-the.info +groks-this.info +ham-radio-op.net +here-for-more.info +hobby-site.com +hobby-site.org +home.dyndns.org +homedns.org +homeftp.net +homeftp.org +homeip.net +homelinux.com +homelinux.net +homelinux.org +homeunix.com +homeunix.net +homeunix.org +iamallama.com +in-the-band.net +is-a-anarchist.com +is-a-blogger.com +is-a-bookkeeper.com +is-a-bruinsfan.org +is-a-bulls-fan.com +is-a-candidate.org +is-a-caterer.com +is-a-celticsfan.org +is-a-chef.com +is-a-chef.net +is-a-chef.org +is-a-conservative.com +is-a-cpa.com +is-a-cubicle-slave.com +is-a-democrat.com +is-a-designer.com +is-a-doctor.com +is-a-financialadvisor.com +is-a-geek.com +is-a-geek.net +is-a-geek.org +is-a-green.com +is-a-guru.com +is-a-hard-worker.com +is-a-hunter.com +is-a-knight.org +is-a-landscaper.com +is-a-lawyer.com +is-a-liberal.com +is-a-libertarian.com +is-a-linux-user.org +is-a-llama.com +is-a-musician.com +is-a-nascarfan.com +is-a-nurse.com +is-a-painter.com +is-a-patsfan.org +is-a-personaltrainer.com +is-a-photographer.com +is-a-player.com +is-a-republican.com +is-a-rockstar.com +is-a-socialist.com +is-a-soxfan.org +is-a-student.com +is-a-teacher.com +is-a-techie.com +is-a-therapist.com +is-an-accountant.com +is-an-actor.com +is-an-actress.com +is-an-anarchist.com +is-an-artist.com +is-an-engineer.com +is-an-entertainer.com +is-by.us +is-certified.com +is-found.org +is-gone.com +is-into-anime.com +is-into-cars.com +is-into-cartoons.com +is-into-games.com +is-leet.com +is-lost.org +is-not-certified.com +is-saved.org +is-slick.com +is-uberleet.com +is-very-bad.org +is-very-evil.org +is-very-good.org +is-very-nice.org +is-very-sweet.org +is-with-theband.com +isa-geek.com +isa-geek.net +isa-geek.org +isa-hockeynut.com +issmarterthanyou.com +isteingeek.de +istmein.de +kicks-ass.net +kicks-ass.org +knowsitall.info +land-4-sale.us +lebtimnetz.de +leitungsen.de +likes-pie.com +likescandy.com +merseine.nu +mine.nu +misconfused.org +mypets.ws +myphotos.cc +neat-url.com +office-on-the.net +on-the-web.tv +podzone.net +podzone.org +readmyblog.org +saves-the-whales.com +scrapper-site.net +scrapping.cc +selfip.biz +selfip.com +selfip.info +selfip.net +selfip.org +sells-for-less.com +sells-for-u.com +sells-it.net +sellsyourhome.org +servebbs.com +servebbs.net +servebbs.org +serveftp.net +serveftp.org +servegame.org +shacknet.nu +simple-url.com +space-to-rent.com +stuff-4-sale.org +stuff-4-sale.us +teaches-yoga.com +thruhere.net +traeumtgerade.de +webhop.biz +webhop.info +webhop.net +webhop.org +worse-than.tv +writesthisblog.com + +// ===END PRIVATE DOMAINS=== diff --git a/src/test/java/org/archive/url/PublicSuffixesTest.java b/src/test/java/org/archive/url/PublicSuffixesTest.java new file mode 100644 index 00000000..e2bb288a --- /dev/null +++ b/src/test/java/org/archive/url/PublicSuffixesTest.java @@ -0,0 +1,193 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.url; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.url.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)\n" + + " \"ac,\"\n", dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"com,\"\n" + + " \"\"\n", dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"com,\"\n" + + " \"edu,\"\n" + + " \"\"\n", dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"*,\"\n", dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"!hoge,\"\n" + + " \"*,\"\n" + + " \"\"\n", dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,tokyo,public,assigned,www,", + "jp,tokyo,public,assigned,"); + matchPrefix("jp,tokyo,public,assigned,", "jp,tokyo,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,metro,subdomain,", "jp,tokyo,metro,"); + matchPrefix("jp,tokyo,metro,", "jp,tokyo,metro,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From 041d79fb20471f4028430fedfefcdc4c830dd45b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 13:46:27 +0100 Subject: [PATCH 04/86] Had to prevent old version of hsqldb being brought in by hadoop-commons. --- pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pom.xml b/pom.xml index a0389be1..f96bd1c6 100644 --- a/pom.xml +++ b/pom.xml @@ -93,6 +93,10 @@ tomcat jasper-compiler + + hsqldb + hsqldb + From b10a06f3c76e2ef0edd47a04c157cf5e60001549 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 25 Oct 2013 10:19:36 +0100 Subject: [PATCH 05/86] Updated to preferred name. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f96bd1c6..abe0fd25 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons - web-commons + commons-web 1.0.0-SNAPSHOT jar From 5cd5696f9e9c337a08dbfe6489cf750c7d6759f0 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:13:31 +0000 Subject: [PATCH 06/86] Added necessary info to POM. --- pom.xml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pom.xml b/pom.xml index abe0fd25..90297de8 100644 --- a/pom.xml +++ b/pom.xml @@ -17,6 +17,40 @@ iipc-web-commons https://github.com/iipc/iipc-web-commons + + The International Internet Preservation Consortium + http://netpreserve.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + many-devs + Many Others Developers Proceed Me + many@dev.org + + + anjackson + Andrew Jackson + Andrew.Jackson@bl.uk + + + + GitHub Issues + https://github.com/iipc/iipc-web-commons/issues + + + scm:git:git@github.com:iipc/iipc-web-commons.git + scm:git:git@github.com:iipc/iipc-web-commons.git + git@github.com:iipc/iipc-web-commons.git + + + UTF-8 ${maven.build.timestamp} From 0d4739a879d83226a39d72998b1710120ff385e0 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:18:35 +0000 Subject: [PATCH 07/86] [maven-release-plugin] prepare release commons-web-1.0.0 --- pom.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 90297de8..1f73b8e9 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,4 @@ - + 4.0.0 @@ -11,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0-SNAPSHOT + 1.0.0 jar iipc-web-commons From 33910fcf562a29f7a3ca1a96dcd64039c4a22101 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:18:42 +0000 Subject: [PATCH 08/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1f73b8e9..b0258bc9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0 + 1.0.1-SNAPSHOT jar iipc-web-commons From 5c13423f99b7e11aaae97074a78cb8c6db48e575 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:26:30 +0000 Subject: [PATCH 09/86] [maven-release-plugin] rollback the release of commons-web-1.0.0 --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b0258bc9..90297de8 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 @@ -10,7 +11,7 @@ org.netpreserve.commons commons-web - 1.0.1-SNAPSHOT + 1.0.0-SNAPSHOT jar iipc-web-commons From 940bc1b14c7bbcf5493e318122ab3915932f5f2d Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:27:48 +0000 Subject: [PATCH 10/86] Removed erroneous repository declaration. --- pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pom.xml b/pom.xml index 90297de8..6ea15e57 100644 --- a/pom.xml +++ b/pom.xml @@ -225,12 +225,4 @@ - - - repository - - ${repository.url} - - - From da9f80e191a6986c681d17c97667488e50752ad9 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:30:21 +0000 Subject: [PATCH 11/86] [maven-release-plugin] prepare release commons-web-1.0.0 --- pom.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 6ea15e57..3949245a 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,4 @@ - + 4.0.0 @@ -11,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0-SNAPSHOT + 1.0.0 jar iipc-web-commons From 33d0559d5c9011fe3a46c9acb0635b273ed5c698 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:32:08 +0000 Subject: [PATCH 12/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3949245a..f285a382 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0 + 1.0.1-SNAPSHOT jar iipc-web-commons From a732a9ee939fe44031fb6a493641598ca120b6dc Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 16:03:57 +0000 Subject: [PATCH 13/86] Removed older PublicSuffix code. --- .../java/org/archive/url/PublicSuffixes.java | 363 ------------------ .../org/archive/url/PublicSuffixesTest.java | 193 ---------- 2 files changed, 556 deletions(-) delete mode 100644 src/main/java/org/archive/url/PublicSuffixes.java delete mode 100644 src/test/java/org/archive/url/PublicSuffixesTest.java diff --git a/src/main/java/org/archive/url/PublicSuffixes.java b/src/main/java/org/archive/url/PublicSuffixes.java deleted file mode 100644 index 7c3df6b8..00000000 --- a/src/main/java/org/archive/url/PublicSuffixes.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.url; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.commons.io.IOUtils; -import org.archive.util.TextUtils; - -/** - * Utility class for making use of the information about 'public suffixes' at - * http://publicsuffix.org. - * - * The public suffix list (once known as 'effective TLDs') was motivated by the - * need to decide on which broader domains a subdomain was allowed to set - * cookies. For example, a server at 'www.example.com' can set cookies for - * 'www.example.com' or 'example.com' but not 'com'. 'www.example.co.uk' can set - * cookies for 'www.example.co.uk' or 'example.co.uk' but not 'co.uk' or 'uk'. - * The number of rules for all top-level-domains and 2nd- or 3rd- level domains - * has become quite long; essentially the broadest domain a subdomain may assign - * to is the one that was sold/registered to a specific name registrant. - * - * This concept should be useful in other contexts, too. Grouping URIs (or - * queues of URIs to crawl) together with others sharing the same registered - * suffix may be useful for applying the same rules to all, such as assigning - * them to the same queue or crawler in a multi- machine setup. - * - * As of Heritrix3, we prefer the term 'Assignment Level Domain' (ALD) - * for such domains, by analogy to 'Top Level Domain' (TLD) or '2nd Level - * Domain' (2LD), etc. - * - * @author Gojomo - * - * this version of PublicSuffixes uses suffix-tree data structure for generating less - * redundant regular expression. It may be even possible to write a light-weight, - * thread-safe matcher based on this class. - * @author Kenji Nagahashi - */ -public class PublicSuffixes { - protected static Pattern topmostAssignedSurtPrefixPattern; - protected static String topmostAssignedSurtPrefixRegex; - - /** - * prefix tree node. each Node represents sequence of letters (prefix) - * and alternative sequences following it (list of Node's). Nodes in - * {@code branches} are sorted for skip list like lookup and for generating - * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char).) - * - * as is intended for internal use only, there's no access methods. procedures for updating - * prefix tree with new input are defined within this class ({@link #addBranch(CharSequence)}). - * - * terminal node could be represented in two different form: 1) Node with zero branches, - * or 2) Node with zero-length {@code cs}. So, root node must be initialized with empty (not null) - * {@code branches} unless empty string matches the overall pattern. - * {@code cs} must not be null except for root node. - */ - public static class Node implements Comparable { - protected CharSequence cs; - protected List branches; - public Node() { - this("", null); - } - protected Node(CharSequence cs) { - this(cs, null); - } - protected Node(CharSequence cs, List branches) { - this.cs = cs; - this.branches = branches; - } - public void addBranch(CharSequence s) { - if (branches == null) { - branches = new ArrayList(); - branches.add(new Node("", null)); - } - for (int i = 0; i < branches.size(); i++) { - Node alt = branches.get(i); - if (alt.add(s)) return; - if (alt.compareTo(s.charAt(0)) > 0) { - Node alt1 = new Node(s, null); - branches.add(i, alt1); - return; - } - } - Node alt2 = new Node(s, null); - branches.add(alt2); - } - public boolean add(CharSequence s) { - int l = Math.min(s.length(), cs.length()); - int i = 0; - while (i < l && s.charAt(i) == cs.charAt(i)) - i++; - // zero-length match holds only when both cs and s are empty. - if (i == 0) return cs.length() == 0 && s.length() == 0; - if (i < cs.length()) { - CharSequence cs0 = cs.subSequence(0, i); - CharSequence cs1 = cs.subSequence(i, cs.length()); - CharSequence cs2 = s.subSequence(i, s.length()); - cs = cs0; - Node alt1 = new Node(cs1, branches); - (branches = new ArrayList()).add(alt1); - addBranch(cs2); - } else { - assert i == cs.length(); - addBranch(s.subSequence(i, s.length())); - } - return true; - } - public int compareTo(Node other) { - if (other.cs == null || other.cs.length() == 0) - return (cs == null || cs.length() == 0) ? 0 : -1; - return compareTo(other.cs.charAt(0)); - } - public int compareTo(char oc) { - if (cs == null || cs.length() == 0) return 1; - // '!' and '*' must come after ordinary letters, in this order, for regexp - // to work as intended. - char c = cs.charAt(0); - if (c == oc) return 0; - if (c == '!') return oc == '*' ? -1 : 1; - if (c == '*') return 1; - if (oc == '*' || oc == '!') return -1; - return Character.valueOf(c).compareTo(oc); - // for generating the same regexp as previous version. - //return Character.valueOf(oc).compareTo(c); - } - } - - /** - * Utility method for dumping a regex String, based on a published public - * suffix list, which matches any SURT-form hostname up through the broadest - * 'private' (assigned/sold) domain-segment. That is, for any of the - * SURT-form hostnames... - * - * com,example, com,example,www, com,example,california,www - * - * ...the regex will match 'com,example,'. - * - * @param args - * @throws IOException - */ - public static void main(String args[]) throws IOException { - InputStream is; - if (args.length == 0 || "=".equals(args[0])) { - // use bundled list - is = PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"); - } else { - is = new FileInputStream(args[0]); - } - BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); - String regex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(is); - - boolean needsClose = false; - BufferedWriter writer; - if (args.length >= 2) { - // write to specified file - writer = new BufferedWriter(new FileWriter(args[1])); - needsClose = true; - } else { - // write to stdout - writer = new BufferedWriter(new OutputStreamWriter(System.out)); - } - writer.append(regex); - writer.flush(); - if (needsClose) { - writer.close(); - } - } - /** - * Reads a file of the format promulgated by publicsuffix.org, ignoring - * comments and '!' exceptions/notations, converting domain segments to - * SURT-ordering. Leaves glob-style '*' wildcarding in place. Returns root - * node of SURT-ordered prefix tree. - * - * @param reader - * @return root of prefix tree node. - * @throws IOException - */ - protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws IOException { - // initializing with empty Alt list prevents empty pattern from being - // created for the first addBranch() - Node alt = new Node(null, new ArrayList()); - String line; - while ((line = reader.readLine()) != null) { - // discard whitespace, empty lines, comments, exceptions - line = line.trim(); - if (line.length() == 0 || line.startsWith("//")) continue; - // discard utf8 notation after entry - line = line.split("\\s+")[0]; - // TODO: maybe we don't need to create lower-cased String - line = line.toLowerCase(); - // SURT-order domain segments - String[] segs = line.split("\\."); - StringBuilder sb = new StringBuilder(); - for (int i = segs.length - 1; i >= 0; i--) { - if (segs[i].length() == 0) continue; - sb.append(segs[i]).append(','); - } - alt.addBranch(sb.toString()); - } - return alt; - } - /** - * utility function for dumping prefix tree structure. intended for debug use. - * @param alt root of prefix tree. - * @param lv indent level. 0 for root (no indent). - * @param out writer to send output to. - */ - public static void dump(Node alt, int lv, PrintWriter out) { - for (int i = 0; i < lv; i++) - out.print(" "); - out.println(alt.cs != null ? ('"'+alt.cs.toString()+'"') : "(null)"); - if (alt.branches != null) { - for (Node br : alt.branches) { - dump(br, lv + 1, out); - } - } - } - /** - * bulids regular expression from prefix-tree {@code alt} into buffer {@code sb}. - * @param alt prefix tree root. - * @param sb StringBuffer to store regular expression. - */ - protected static void buildRegex(Node alt, StringBuilder sb) { - String close = null; - if (alt.cs != null) { - // actually '!' always be the first character, because it is - // always used along with '*'. - for (int i = 0; i < alt.cs.length(); i++) { - char c = alt.cs.charAt(i); - if (c == '!') { - if (close != null) - throw new RuntimeException("more than one '!'"); - sb.append("(?="); - close = ")"; - } else if (c == '*') { - sb.append("[-\\w]+"); - } else { - sb.append(c); - } - } - } - if (alt.branches != null) { - // alt.branches.size() should always be > 1 - if (alt.branches.size() > 1) { - sb.append("(?:"); - } - String sep = ""; - for (Node alt1 : alt.branches) { - sb.append(sep); sep = "|"; - buildRegex(alt1, sb); - } - if (alt.branches.size() > 1) { - sb.append(")"); - } - } - if (close != null) - sb.append(close); - } - - /** - * Converts SURT-ordered list of public prefixes into a Java regex which - * matches the public-portion "plus one" segment, giving the domain on which - * cookies can be set or other policy grouping should occur. Also adds to - * regex a fallback matcher that for any new/unknown TLDs assumes the - * second-level domain is assignable. (Eg: 'zzz,example,'). - * - * @param list - * @return - */ - private static String surtPrefixRegexFromTrie(Node trie) { - StringBuilder regex = new StringBuilder(); - regex.append("(?ix)^\n"); - trie.addBranch("*,"); // for new/unknown TLDs - buildRegex(trie, regex); - regex.append("\n([-\\w]+,)"); - return regex.toString(); - } - - public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { - if (topmostAssignedSurtPrefixPattern == null) { - topmostAssignedSurtPrefixPattern = Pattern - .compile(getTopmostAssignedSurtPrefixRegex()); - } - return topmostAssignedSurtPrefixPattern; - } - - public static synchronized String getTopmostAssignedSurtPrefixRegex() { - if (topmostAssignedSurtPrefixRegex == null) { - // use bundled list - try { - BufferedReader reader = new BufferedReader(new InputStreamReader( - PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"), "UTF-8")); - topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(reader); - } catch (UnsupportedEncodingException ex) { - // should never happen - throw new RuntimeException(ex); - } - } - return topmostAssignedSurtPrefixRegex; - } - - public static String getTopmostAssignedSurtPrefixRegex(BufferedReader reader) { - try { - Node trie = readPublishedFileToSurtTrie(reader); - return surtPrefixRegexFromTrie(trie); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Truncate SURT to its topmost assigned domain segment; that is, - * the public suffix plus one segment, but as a SURT-ordered prefix. - * - * if the pattern doesn't match, the passed-in SURT is returned. - * - * @param surt SURT to truncate - * @return truncated-to-topmost-assigned SURT prefix - */ - public static String reduceSurtToAssignmentLevel(String surt) { - Matcher matcher = TextUtils.getMatcher( - getTopmostAssignedSurtPrefixRegex(), surt); - if (matcher.find()) { - surt = matcher.group(); - } - TextUtils.recycleMatcher(matcher); - return surt; - } -} diff --git a/src/test/java/org/archive/url/PublicSuffixesTest.java b/src/test/java/org/archive/url/PublicSuffixesTest.java deleted file mode 100644 index e2bb288a..00000000 --- a/src/test/java/org/archive/url/PublicSuffixesTest.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.url; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.url.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,tokyo,public,assigned,www,", - "jp,tokyo,public,assigned,"); - matchPrefix("jp,tokyo,public,assigned,", "jp,tokyo,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,metro,subdomain,", "jp,tokyo,metro,"); - matchPrefix("jp,tokyo,metro,", "jp,tokyo,metro,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} From 5226f18045b2f6eb01e3526198f79e8149964cc1 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 20:21:21 +0000 Subject: [PATCH 14/86] Added config for deploying snapshots. --- .travis.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 52ea3bf1..c2fc63fd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,20 @@ language: java jdk: - oraclejdk7 +before_install: "git clone git@github.com:iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" - - "ulimit -a" - - "ulimit -u 2048" - - "ulimit -a" +script: "target/travis/deploy-if.sh" + +# whitelist +branches: + only: + - master + +env: + global: + - secure: "qDKjVdoe4Qcz4WfXiQydU7tyl51T62FUJrjqu4FUPBcgeQhFQiggwhpaE6xCOzOpxbsuBi2R1c8gMQf5esE5iDL5jZMu+kz++dYbuzMTd13ttvZWMW5wRPH0H8iHk609FP/RDtVKKBr7WO0JvvIAZEhWNHZrLXBrrKgdTey171g=" + - secure: "FXGBKJNP9X7ePJfS4eYTZtoFo4RT1sxor34XxncSJr7uV6ggtZb4B4WNd16IlLcDk6E32sx8YoWdltaOGwQ5Vg/kux5Ko/wKZCoccS018Ln1bRT86dD1KoPY34rGoNJVQxe7J/1MPqpBKwmi2XCKfzpsEh3W7bbIqg8w9MEOOZA=" + From 70a49689f3db72649079208e5fe1947b25f90f5b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 21:03:53 +0000 Subject: [PATCH 15/86] Switched to HTTPS. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c2fc63fd..9d7d2e50 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: java jdk: - oraclejdk7 -before_install: "git clone git@github.com:iipc/travis.git target/travis" +before_install: "git clone https://github.com/iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" From 8e98a2a32069e1e6b606cf1a6fed7ae130b8e888 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 21:27:33 +0000 Subject: [PATCH 16/86] Attempting rebuild. --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 9d7d2e50..fc98b3c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ language: java jdk: - oraclejdk7 + before_install: "git clone https://github.com/iipc/travis.git target/travis" + before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" From 1f246c9257d6a8828150a165a2b3cb43a07508b2 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 22:01:59 +0000 Subject: [PATCH 17/86] whitespace tidy up --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fc98b3c9..88c5f0fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: java + jdk: - oraclejdk7 @@ -10,7 +11,7 @@ before_script: - "echo $JAVA_OPTS" script: "target/travis/deploy-if.sh" -# whitelist +# whitelist in the master branch only branches: only: - master From 59045f16438881a0e67ba3846c8420e56c27c74b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 22:47:39 +0000 Subject: [PATCH 18/86] One more tweak. --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 88c5f0fb..c1dbf348 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,13 +3,16 @@ language: java jdk: - oraclejdk7 -before_install: "git clone https://github.com/iipc/travis.git target/travis" +before_install: + - "git clone https://github.com/iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" -script: "target/travis/deploy-if.sh" + +script: + - "target/travis/deploy-if.sh" # whitelist in the master branch only branches: From ac864906fc365c7812e2209f3db18722517d141e Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:09:09 +0000 Subject: [PATCH 19/86] Upped number of open files allowed. --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index c1dbf348..1cdf7339 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,9 @@ before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" + - "ulimit -a" + - "ulimit -u 2048" + - "ulimit -a" script: - "target/travis/deploy-if.sh" From a3e17dd37d9472c1f5d6c29b3b0feeb44cf22efd Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:33:08 +0000 Subject: [PATCH 20/86] Added build status. --- .travis.yml | 5 +---- README.md | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1cdf7339..0dfd3f7f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,12 +7,9 @@ before_install: - "git clone https://github.com/iipc/travis.git target/travis" before_script: - - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - - "echo $JAVA_OPTS" - - "ulimit -a" + - "export MAVEN_OPTS=-Xmx512m" - "ulimit -u 2048" - - "ulimit -a" script: - "target/travis/deploy-if.sh" diff --git a/README.md b/README.md index b70f8318..ae865f7e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ OpenWayback Web Commons ======================= +[![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) + This repository contains common utility code for the OpenWayback project. From 7653cc0dbc5dc75167761a98de01bec79bf530bd Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:44:14 +0000 Subject: [PATCH 21/86] Added link to parent project. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ae865f7e..9bd2e12a 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,6 @@ OpenWayback Web Commons [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) -This repository contains common utility code for the OpenWayback project. +This repository contains common utility code for the [OpenWayback][1] project. + +[1]: https://github.com/iipc/openwayback From 3f397e63873d5c7ed38c2190d55f2870498c6ea5 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:49:11 +0000 Subject: [PATCH 22/86] Bumped version number due to large number of changes adding functionality. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a1d3de27..0c1a06d4 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.1-SNAPSHOT + 1.1.0-SNAPSHOT jar iipc-web-commons From a75863022c02659e6eb590b586a4822757dc8d8f Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 11:12:12 +0000 Subject: [PATCH 23/86] [maven-release-plugin] prepare release commons-web-1.1.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0c1a06d4..cfb3a0cb 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.1.0-SNAPSHOT + 1.1.0 jar iipc-web-commons From d898af145b827d4ebf08fdc858d90dc24b702485 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 11:12:16 +0000 Subject: [PATCH 24/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cfb3a0cb..b6c8994d 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.1.0 + 1.1.1-SNAPSHOT jar iipc-web-commons From c94e14b4f0ce990a147b306a9f6986f9f79183b1 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 16 Jan 2014 18:45:02 -0800 Subject: [PATCH 25/86] use iipc-web-commons consistently as the project name --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9bd2e12a..ee3f62ce 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -OpenWayback Web Commons +IIPC Web Commons ======================= [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) -This repository contains common utility code for the [OpenWayback][1] project. +This repository contains common utility code for [OpenWayback][1] and other projects. [1]: https://github.com/iipc/openwayback diff --git a/pom.xml b/pom.xml index b6c8994d..21bca897 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons - commons-web + iipc-web-commons 1.1.1-SNAPSHOT jar From 2d5ab076e8d440f2352c8fed0874f5eb5c548383 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Tue, 28 Jan 2014 16:18:31 +0000 Subject: [PATCH 26/86] Switched to webarchive-commons naming. --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ee3f62ce..fc45fdb3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -IIPC Web Commons -======================= +IIPC Web Archive Commons +======================== [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) diff --git a/pom.xml b/pom.xml index 21bca897..5855e09b 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons - iipc-web-commons + webarchive-commons 1.1.1-SNAPSHOT jar From d05415f1a20dc42d091ed9b84bc70d14a234cd25 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 12 Feb 2014 16:34:07 +0000 Subject: [PATCH 27/86] Updated to new name. --- README.md | 2 +- pom.xml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fc45fdb3..72858a52 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ IIPC Web Archive Commons ======================== -[![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) +[![Build Status](https://travis-ci.org/iipc/webarchive-commons.png?branch=master)](https://travis-ci.org/iipc/webarchive-commons/) This repository contains common utility code for [OpenWayback][1] and other projects. diff --git a/pom.xml b/pom.xml index 5855e09b..cfd201b0 100644 --- a/pom.xml +++ b/pom.xml @@ -13,8 +13,8 @@ 1.1.1-SNAPSHOT jar - iipc-web-commons - https://github.com/iipc/iipc-web-commons + webarchive-commons + https://github.com/iipc/webarchive-commons The International Internet Preservation Consortium @@ -41,12 +41,12 @@ GitHub Issues - https://github.com/iipc/iipc-web-commons/issues + https://github.com/iipc/webarchive-commons/issues - scm:git:git@github.com:iipc/iipc-web-commons.git - scm:git:git@github.com:iipc/iipc-web-commons.git - git@github.com:iipc/iipc-web-commons.git + scm:git:git@github.com:iipc/webarchive-commons.git + scm:git:git@github.com:iipc/webarchive-commons.git + git@github.com:iipc/webarchive-commons.git @@ -199,7 +199,7 @@ jar-with-dependencies - iipc-web-commons + webarchive-commons From 2465516d2f077e20bf147fdafa022ba72ee41dce Mon Sep 17 00:00:00 2001 From: Erik Hetzner Date: Thu, 20 Feb 2014 19:56:58 -0800 Subject: [PATCH 28/86] do not read past end of extra fields when reading extra field --- .../archive/format/gzip/GZIPFExtraRecord.java | 7 ++++++- .../archive/format/gzip/GZIPFExtraRecords.java | 11 ++++++++--- .../format/gzip/GZIPFormatException.java | 7 +++++++ .../format/gzip/GZIPMemberSeriesTest.java | 5 ++++- .../archive/format/gzip/IAH-urls-wget.warc.gz | Bin 0 -> 43582 bytes 5 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java index a4ed6260..0a9a82e0 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java @@ -98,12 +98,17 @@ public void writeTo(OutputStream os) throws IOException { os.write(value); } } - public int read(InputStream is) throws IOException { + public int read(InputStream is, int maxRead) throws IOException { byte tmpName[] = null; byte tmpVal[] = null; int valLen = 0; tmpName = ByteOp.readNBytes(is, GZIP_FEXTRA_NAME_BYTES); valLen = ByteOp.readShort(is); + if (valLen > (maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES)) { + /* read in what's left, but throw an exception */ + tmpVal = ByteOp.readNBytes(is, maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES); + throw new GZIPFormatException.GZIPExtraFieldShortException(maxRead); + } if(valLen > 0) { tmpVal = ByteOp.readNBytes(is, valLen); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java index 7dc0de44..e5920552 100755 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java @@ -53,12 +53,17 @@ public void readRecords(InputStream is) ArrayList tmpList = new ArrayList(); while(bytesRemaining > 0) { GZIPFExtraRecord tmpRecord = new GZIPFExtraRecord(); - int bytesRead = tmpRecord.read(is); - bytesRemaining -= bytesRead; + try { + int bytesRead = tmpRecord.read(is, bytesRemaining); + bytesRemaining -= bytesRead; + tmpList.add(tmpRecord); + } catch (GZIPFormatException.GZIPExtraFieldShortException ex) { + /* not enough bytes for the extra field; move on */ + bytesRemaining -= ex.bytesRead; + } if(bytesRemaining < 0) { throw new GZIPFormatException("Invalid FExtra length/records"); } - tmpList.add(tmpRecord); } this.addAll(tmpList); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFormatException.java b/src/main/java/org/archive/format/gzip/GZIPFormatException.java index ca627a88..3916dafa 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFormatException.java +++ b/src/main/java/org/archive/format/gzip/GZIPFormatException.java @@ -21,4 +21,11 @@ public GZIPFormatException(Exception e) { public GZIPFormatException(String message, IOException e) { super(message,e); } + public static class GZIPExtraFieldShortException extends GZIPFormatException { + int bytesRead; + public GZIPExtraFieldShortException(int bytesRead) { + super("Extra Field short."); + this.bytesRead = bytesRead; + } + } } diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java index 95c7e96f..2eec46ec 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java @@ -374,6 +374,9 @@ public void testAutoSkip() throws IOException { assertNull(m); assertTrue(s.gotEOF()); } - + public void testWgetProblem() throws IndexOutOfBoundsException, FileNotFoundException, IOException { + InputStream is = getClass().getResourceAsStream("IAH-urls-wget.warc.gz"); + new GZIPDecoder().parseHeader(is); + } } diff --git a/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa248f8d52a693075d745270d0013059b0afc6ff GIT binary patch literal 43582 zcmZU)V~}K9*M(cQZQHhO+gi<$K-PsNSa93S3$Z7+M_}e4reD`COa~Zk8YK~utK|*wGavXr(*(e)$uWdA38-~ z`a`tuU2-Y6l>1Ad@E{O!xBbp-6pVn50cm%r9ZMY85I8G{}MNG08{j zZ*ooTbMN?QNnVi1iRI=HMOip}GxmFJ1<~I(5>{Zb|hPig#^IAzHO( zUM$3IDOW#3$O$t!z)oNXi^sY#7OQQ?30+mnEOq@yy2_@*Kby_}Zg9LS{>Av582HKH z7R&X>zdi571ZX$2p-V)Eq1;hdT#SW{Q4Z=`?&6ekElC?q1mz@I*<{^eu%-+JJt-pF zF`keM+L@R(ZY^xUn#l*zbmJ^Lll6@vm}BDWWGeYH^1`Mq-*k$O50~}c`<%ijTLgVV zQOSPoGYt(?fsgyT50Pm&)K=>@q~G$jI<8=UW|zVfWyIuV+0^MU_PT@@Gy)_$5>G82V7K&FYLO!@9;b)qf}8!_4Z{e72O zy*UcA<=JLXM*?f`N9`l0)+Yfwq{d&q)6vc)t946A&U17~$YTit@#A~!MF95$ zBM$bu_>vF1oyb-CvJ@*9e~WCD7hb%tYWo)4EM3<-v+EQo;ooN}tK(6!A?N{SvKEqz zGU)i#FA=Ttp%sj37=$ZDx1BQ8?K?6?UEPb4KbS zvj7dn`HN&?5oVGQ5~to~zg)i;^DXUK-BdF<6c5p#aF#Tz85YvJdEBIQ%8_I*izI?s zN4#`zQ`3XJF6GSWB^!LjinwELr*_&Ju&w?Zf~sjz;;kz#kjl@WjOIs`z&1~Yjm zp}$!CG168qnT8JUHJ`(XnzH6vc{4hq0oSy(o&F49E~NTGf+)0nIKf_?b;Y8a{3viQ z_-rC@VU_If#rz{l6-SW0d3+BkjUp`mPy7EXisN@>4ipH;RYl*qkO|G#eduq(={-e>{5A`UZ^_ z?1D*kS&DV;>O1wS60>4Sg2gn~Q>@!&CL%l8tlxrQz4QX{xSaG9h>M<>oM{mt_@ad+ zSNBGZHvaaHW~ARhe&pjEaEQQ%puX@U~}Wmj_06cov2Zw#4u z2uTW4p%^~nh#Vx~%8%uxAtgHPUQ_OGmBf_k44xjH5g<|#mT_d)%-Yyi=(8V|&`LcR zt%NIjsXiQ;F&Ae9yNaZZR0KxKQX6?F;2z+C@JTnHW~-B7VFZbd#S3wDCQLr3iIvK| zO$EXME;`WVTxb~F+aJFUHtg~yXHS^Bmlu6=`!tljIoxP6wOW8e_dcP&#s!mhPt;|g zV4ovtwU7I}2CLC)oCWs0aQfV+es(hNnEu-sdEplX z69{4jj~Apub>O-7oqFM(cS+NqD*qB9O#Eba#dT;XkHe0@-*e|@&sPKQdj9FJ-Qsh; zs%MVSQUJa2KEiss_hh;>j1FHbnW(4$_I1&;d)XMhaVvc6W)jNFyapQ!y5Bz-Nq7U&`Os4n(lOf$)jkR*%e=%GDEC2rHMp{Iw5xcBL%7Vg_8%iP zbnbYZgfwjvu6dTH{9~>E8ZQ1j1yKG?fl-D2e-m2p1xiX`kKEN7gW3_~572CwTFHip z@mvH7)iDWk)$)L5ghS!skupViSVpJNz(SD7*dbsN zBygl6JW>J_g4%a}qU4e}^c-@$oC}w6_x94gV-u1-=sqTFNe~re6fDxLKA{kiT4@?8 z>FFo12JV);o zty607pr0%kvVhb>BUs3E6*s1}wY7Y{4n;P_mHvMk|6e&ozXd{p00K%?TaR1kK=Q@h z@z2P&7q^sdv{|)M?P;@VnBw8^iJb31Yh(ko%0`1oZ+~^AFizUl3$@J!7q{kQayyrc z`zN*d&0BN9zDt;w!H_DI(`s9F;814Bswcd&1qr%U#<=ksi22oQUpEF5+#0Wu3sb^L z`M6tb_t8D}#S95^e%`~>8qpjd%J@Ofo|^qwk)}GG>me29g_gzxsJvsl|13*wLOI(e zLeW0rEvaQQrN|53aMhJ=KUPh0dxwTja*H1DOF(9{q?BG6Ps&f-6f&3lGn?>wAP^MG ztbWi2Xn@}~QLSvMI#9vcWRrA`Rm`ljQJS_#-X>khQx>#pCSm;Wf^Q*A_EXksbHT^p zu-xIKhR=F9iw~X1lNwttmk0Ce|MW@uNk=OOq@LN4a_e5@!&-9+`_e@sC>QqJ1y@lznWb= zES#}B&U}FwCueh5JHB4S{?%uBY(AW3l$dlu4MfWo2Z9U^Q!3c5J*^IqEB7Kd-n;g{ zf$15`q!KqFTF4X^0=NopzgzHGgH{o21wPZRn+({7@8Fm2SY)E002fj#INPe-`>Ud} z)6T!)GHadmP3RRNT;=0g4HgtSCt#X$D^w-DW0 zF6&4MQu?K+*>Y(%kkAM|!ylV=Bed~bu?W`X^0TJ2C$dQ;yUb*#$q-vg5YFz4EnhoT zEon2o^~d`K!@u@xtg3-wjs=@dXy9q%zmNb;H_CQnm_OF2OxN?o7dIBWL-Ph-19h#h z7b^v`zNiVF+uxdx7v{b&>l&t3{~4D5j!@KhqCxzdX!?r5-;YqrUBL%sd6>pVm!)ggq0aPM#+x zZD{c2DsD*K5+1wt_s^{{v${Wmp^IZ<9I>^wDL)s4iLTr9^$#lTaJ_w;ZnrdLNHzUa zi}Wm6+!A0DL78Qk_ZAKjyt$Dy3V)hGA0tDi2iv?bEs#^Xi12S0B^YCf1c5?Aq_9L# z9*_JGh$O+#BLq8=JxMP?^A&t!f<-m`AxG!a!`dmn*#Y=V*Kwc0%sAi>6bT9qj}4cB zduqM&V>)&dsi;W2);gLKYcX{zI@m9pQ~;h+ade*|6XJpchRVCk(7ryK%-}WpJ)BG0 zulrnqEp^Q)Ai)BslsQ(_C|Em-!Iff!ZK*S1riLv!ST3B&4VsJExwQs2gwB||taKMM z^n`7HLVz_^n>X&3F`uws0?mX7k1pgSzk7b=gge*15df9+PtX6Wgxz;#_&dzq2L!cz4L~^{P#9we=+i4L=ToRuwy8XTQ;iO|n!62?lWf<}5k1VO@bb9#W4mGC zi?i2^iWHc*n`1zF-RSMzek)dM4T&QhBZF@lu(@>CILS~5)Q6FUi3-LLGmC;r5@QRG zKZnl_)JveQ<2{3AtC_0bhyTrYkf=N7Ar2Fjlasob2@SLsr-8$}(5gc>b}51KOuh^? ziVcDOTiQ>s7I(ceGh_z^jUb1$0)V}B2$5T+R6c;vOd>-EPj=|cQdN>!Rq5vtCIktw+Y&K?OG3Be-AQeH1CeP06omhZTg#cH`%4<-(ks-gmcWlNt*f1C^Xb8!OIAz}CRsTL^=2h*P<$tZO%LTJEZX}>T zO)w&BvkPf)29a&VE*n&q@e|Z6p`*ol)o2ainp#r77dtEe=G{(PT)E-@=V|R`BugX> z4N11{XRvk>-D0p%4-2!_4d}RVAAlaSqWr9Z!^3Q^2Mk<-xi)+YpHK zCTBF-edi1(gCQvog`8Kq#8&q&BRKa-l8x4qyihNs3B80XxZ(6FUyS{)bDhueYU^}dTeDz|QtV0hnXm=BV1P3n`~>kt8+M4I zq$*$yF%N9vgQC++&`eG1XL?hL4_P54dl&)UUUSrWT={H>P*jyM5#9+2feK5^caRAl z5GZ4g3}!EJ?6vh&rgDzx`3i@-7IE(+^KC zxuD-45On+{&iQq$sb^R%lV(B#rzg8A+SRb~lP_11NljVxpT_@J4)*T~-nU5lDwlTI zOlY6+1H3Q5=Ek7Fd)3K3A&CI(pCU=6f3aG?*qaiW_3xipsTY^Vk`CjYyQd0iNT6^a zScmc)DdLjyivfEVNrU%CF{J8P-FW7=>hEzwb}3s3PrM#@!|?yE!o#h%8aHVBbFR;OQ<%alxbOoRlG_27S=zMk$Uzfj_Q*XngZP@cc5r1r5a>f`x z{M?Vb5~Y82Y-(REX%1)mOtk^vuFJ}cwih#C=o{0mJuRe!T7l)kIn@}n>73@B`@j03 zZ7>^D6=^WT%k{I(@l8Ld8SN0krR}Av6PetV?BICwWDkXnYEnS&xb|;DVxsIF7@pdv$DlPW<(0g)x$_wfYh^P%h30u2Pu=ap&6!Z&v#uMpu*V0f@OKwN9hERYwRge_Rj7=_Wljy*}R# z;~gTiW!Cu#amVuQ*4e1B1@>-RGW&xH-s?6K{eyZtT;~JB&Zkthk;&~kVFvU}A{*ft zqF(4;8!KkEX@!ss%s*7SG)6mKNbtB}ZKe!^ftRGPUX`dSh?Dsi;RJ9ZGN9hf*(8LU zxe&kIX6R*m-~ARG;Y8|$q1Z>D6&KN6?>D}ZYmW=|R|EoCMUabCw*zxcz-bG;%sk0g45@&$;CkBt*R26|B^hwVV}W%tz2ceC@ol{;`4DXj?9EgjWmu@rh1TP4 zdi&|z+s5#W60BG&4qL*{<+aN8F^nNF=du6v`@bsO;e2mEynui%^>v;1M_m0Z7C*ub z*4mR$fWdmQ#q0={n&gL0 zdVV7fe?yMgyUC7SK#6*?xp=d*J^8crp~2b|^8|bu4BZLY4A~fU$n_Tq&{pDN1&GmtuCKX_pzCsg!AO-B5|~lCQ$0 zY}b*m!XE-NP}*+X6v!j5^VX|b)(NXOQDZAnv0Y3p@@j=AjJJ~b(#zAH#U}7a4K^swXPgzH7qIf-lAf;tl(CuBP$!>QZ8szaCfoSC>-|V z1Uyy+S_^k=S?=&F9}Uxb9w`LqU@WSdI&6#iL8576#;rRBHYb->ejJ1vTFyvgGA>-dD@x#qX~o zO2<1O+NF(y(~1 zT$60J##Y@vyQ|nBVJJm}()}zokBJRoE}MjUh@zMLoBW&dd2UGGnF2!`?)N?6-K5v$ z340`WqxPDc$VyRR>O#tmO^PsusR^ZsXtE#@#5@=CR+!Z)q!ny#?*Tj&L8IN?hN;N< z`}~>P3fzax8oYLjW*hgJ0F*@UK|0}nGYYp7;cBdVw6mU{{8S%e+{Y$Itg_r$Y$LBl z_bTf2RZMzb3(*@)H2X$+eMZe(-SwGNPM$uVo@)Xp>QxHms+()r7f&@<-EC20Vg0WP zzVRMxcmyd=5rdKoZiovpZuu@}k5-q$KmH~^X;<5Bq*OF!CZ&Coz(WLCNx~vC$N#X!-Kq;gf9@sU*;-)Lht%I?XgP43E}i|z-=^5 z-q@q9P`t);@&&f!ii8#v{42A(lr%iYJ3}vqqM7iZx0o+zodU=10((Yc+1zRSbSi|>D73Wq$?gM zA*EUq=(5qz$#qwl&c%qNZhFY)q7^rW1haC`y!F{?7BwCeaaAb@nhR=65{Tk(Du^TO zakk3Z!I2t~R*do{8-RBrDClU0h9Ak`M);y%cA0CyheXR6&@0%fwsU7^d|a4Zg__CqS) znhEA`ria)a_|cnC$5f!1wFRS6X-*VYY^4@E`E+|<)R51|US)9Y~1Ikf2JC-B5v5_6jT0Ce_&n zGE#xu=VAOH@{NAo6G*!+Hyb=G#wG!+V+sEvmWHEj6hj?xX0D@Xkxy0#Oj;`vW-)Gn z(IR77fwe0OIr0kuKNK@7&yB8MW1L;cku7k|*uBBgJL}=6dh)=!Q&xrvHF$>5PAlnH zN|I(*Wn_%E%)Zs)zR2Q!BgGOsI<}hpd!hT}ssf*(t>i_cQA_w?NOivpyEN`1K(7bl z<-S}9J~ikTl+^gJx9hxa{L#-I4FLP&b~7Drohp$A@3lD~c-^h;(8t)T3~t1Be0%tA zpYr5^CK4_G@xGO<!>&t z;0D*wCGTFp#r1Vz@nI*;wPjY#dlhKvAqg5LJ>iJYD*}`Nq*EA2qUU2`s+-pT1cnxG zA8Am7oSj>8Qc685AD&Dhl)<=|MvH zLK|#HL$VtF8ene<;>QqR7Mzd>Ez+>-!{9XtJpaQtKur2YY@{yYz=SRp^`_9ROhxje zo=gW519B!sLIJ-v-rIC{W{qk5wm?DY%vlgedcoa;q&jrMFc{0i$0*M)HO2bNLzp^b zwrew5ghI9{X*z%|9i{UKfs19xCms?&MNKQHc$IvA(943t=*d|9K-!YUXyynu&J<4}Zi9%I36bvb1Om>27rReF z=elFn?)$M>?lLunE7mg2SJGL$i`>u?WQ4+s>iN_fZY+aU7zbL1<|8NN*ys23fGC5Ya5KQLDQpmI2~#fMnNbTXb>4=jL7! z^2B~WbR_VH)ffd~V487@^Phyw{`D3+^25MKgcSE)MmUOQ07w}6>q4MJIrI^4{bbi= zj7Agca)5c3BfL|Px}9~DrznT9fWE>T7sKvvNu@8~Df?Y52B}R6_*>GMyF!9$kB9e# zC(RkL0B960%o1r;jP5_s(-sI=;3R=&F4M^*qahy}g@G3DZdkxdP?SA_ON2rdhw^&n zG(wmRdoaz-{hv%);uv|dn_a1=NPLG)_`eStlG9pa*MRtrZI6gw`hu*TJm8laqVNQ| zVu!$(25M>f-kf9v2Sw&$r{`s%hMaFYJO+Gxpdkx`U^X9xGdIyEIrm9!t*&T96WK(s zL=X08@xU4(e`)6#x3yqT7p5iWDgez%&&rIcUU$Ul8b}s`I@H<4@3PsqLPvvx-7!&n z!AfUmO~E{c0YD`#?ZB*`S}mMqrJ&myb1sxi(LBLukVJ9}8o&qaNVIr2PeHCLJ+jg$ z|5&~rju9|^9!QOt<&)b)!c@L&qF3z^SnSg9qZ}Ogf%7;jzhRtZ3Uvo`55hDFEMx6l z(>2QWA^K)SJdl>bm-}#^7NhkVH!Fcr*P(|!U^TNJlq%x2FP*gon9>HhcSMKeUtoAK zz~Zl&OpVpzV`@jUhk&_*1J}F!*y}UW=?-FZPc?DhS?Rr@PZ`+<%5_9(TwomqX=XkR zem}~ch7`~TuVSdh;*KBxGq{T%lpMm>K+-ChTIQLvVKAntql^#U)hPl@9dbOp(@2sL zmee&Zc`$l7c#7jf>MW(7{>R~JTBm*hXJBB1&S<^u~~5LTTcg-a{V)`<5hRTdIgq0Te#@b zAs{+1f$=$4571<@orF!_sIF$9YuOsWcy^-bis5>W<4?u#S3Wb*V_GDGK4t)PM-;KV z9B>9Q2Fs02))2&UUGrJ?%8GQoZ1|Lacz^W1H5D18?K=E~G?fD-xelMmDrj`d9%w7A z9;F*i)C+5~#g=2>M@P(d)F-h&(71hejC5)ADM7*n)wH!^uzJ?+@gZ&hO4EI_YTNh~ znT(7wJlEdptnSouZm9zP0YmTA6fit9x-qeMg zgP@?s^+MDsD^tKrt_>^YQTm3PG+CN=GmA;6lXjy+E`k<|R*N65XNHyd8vvAng@gC- zW>YeG>{r4#af<}Vy)0q6Onzh?yfO_Q3rqJjp?!UaeClCRN`3!l_JmX8#kTV8P3B3H zUYJ4CWUX6e^l1cD=Z;6FMGr^v?iMZE%m;oebWG0vKEN_FA-<1s1KM4?5C0d8DvwQbc%~~@DcjhH$`~szLD;P&AHutIDPoA-3{i4yPb%b~W%zUW z+r$%Lf}|wE21kQy+GmIN5)2AFI9+fhy)NGYV1U5Jqn#716<(?xGYYzrd? z1;AK+ixK`YV>gCvh=5|;$lcBsy5UZyO=B)5>C&JdPKE5vfmJO=hARX^7m3B2tCxsP zuiWzB@7I@MWHmOr<1DtwO!Ent>X%v&Tf(MR_EzQ~aCrVjA(IyZd(FYpJF-ELvp_J_ z9TUBfK`lW(MQ!2{dlP>R_PcOgL+~#M!!%V(G3KMKe5e(Qp(P4NAF`(UJ&3e}iIaP8sfK zXcJu#hY5D-uLYKRk{&#^j}ow=#y%;Vz9H|I2pg4PX?^e{DL9ZEX)4adhIR6`uyu0P zyo^aF)JWS<@b>-ad-wWMNTSI)s0;(CT-NPEhD`|erqEThd7o1AWE(qjT&-xgDyMc; zt>|`?Ks+>hJBe~z8Ys;^lipnk>Du%~WYbrB8C?JDC9K+;7345qHtr@4&Y^wf172L+ zN+Div0np#qNdm4QX;L&>XfeDUXc1$vo7gWnn82=67qI5;HLRx~*kfb_Z3Fh>WKbk{ z#BIh8B2ZcL1$s%Q)n&uE0_VLQ8G6m- zLWmOoEK1wJwxvngGS5s)*gz{27m^pHWHPDMzeY9a#yiQ@>&Y|Qjc`g_qYuz72zRP|OAYf$gp?{kfs?3}8w$R>!bq7^YIcG+WEagM`7k zA_4@qpkcee1#y?kg`zw1BOE(_IhOWU$hmb^=>C+#!dyO~ z6M|k3NvbTh&(h64{3AE@z(Jw>OtLP_qGT=*>Lnu+<_s5vhxH^~e=TifXZ7409IZf! zrlhGB@hiU??d1U}Rkm;)X&#C#eqEl0-8(A*KF%M0(>zoQW5o>E=_IOgI|IRAK-lI!gioILue8VONK4%*B8Q+r39F==nu6-`EoRt)x z;5jW5?Ti&pEPr7EIh0KwPi4EQzWE&%S0~TBrNsl9{&kq2x4z7q()6-4DjOJ3 zz(A&aKGPetwLdkjzBsUCLr^I(_X40%tsKsfV0%6$iJ`;zK&*#1#9+Wd;GDMhP0cqQ*#X_ye|i@xR5CZ zHph`f&=la;XZ(GeLH34!(%D5*llDTIq7vGc?CBAW4&HWotbln0wNDuBU_F}vOHn4M z^eojU2ny4`5P|B_oGE1X;1LA-q>ku}$}AUJl*E%=Cl$N+s{r5a(r})pMSVsVrl=Jp z&6CgcT{s8wr^Ikav;!F>@jT-#oA$^Jkw?+rPxplLEcKQRL3dI7oK5H}(ULm+@&@PO z4@ldlAB$9lD6If~hCju6b8G4dR@ew>oqxtTji_r07qT!qZGVITvmB(V5}sC(WP7;O zl$l|D$hAVKI1c;_BCNc)sPqlb@}dt966+QKf_lU7nIb8+QJ8|GNlLq$%0JPCW=#hZ z3#pq+_=s4OnJ?hrMuzrh*kuq(cXeDKmuqu%5|(T^^EgU^C14^+TOU;6Vf&eg$zfDQ z>4vo9pKFmUxo}FGPJBPY$|zPlySrhGaC`vOLCy`Im?T6TAa_L`_m=dkx4pTk-WXMA zZN=mwOya?cevW#!eo|B0fS@c?X{xi^9;U9YKA$EWC_XJiD7{5K8ee;6n;yNVH*x)L zH9OZ_aNTdL+^Mp<_2+tzg#`-6XXY74BC?<*oXKkB<0XaRd%6Y3yrC>1TmZuIR(yLs==|nlP5o$72|qQI>)qM`OVNh!)74pyFo+*jX3`P56ZkJQ z#=uPl@hygk#7|k1J(v}D*KmdyV@FBx5(`h*btHS-3xJn{>2B3gZu3WD1;)$Aua9<9C(aZu?tY)d4gFdyjve2@<$qlg@~(W6$Y3%gChVkh=f zcgq>vyQ_@9NpC3_h}$3g+4fr>SsNe2lX-Su_eb5^ALEnu&>wGk*@7D%Pe z26|t&NA3rcHQPR-c3%%idAs+2`;N1>yI#vU{}K`W74~U){r;j_IyBXh>}{ad?eALz zUKxvh?%z4LJM8%$dO_`S7rsdn1q9&>$!aE4hq3e-Vt?nVn<|j5qQm3@zknL03ZrTo z0!pq`7t-NX3c-yHC1y(85o20hU8TNtIVfAZPns#?k6ntc_PP?8`>l;PFW7lP&St<< zexN6bV_gBlG%7-jIv!TfZaW*nvf>&$%VnA-V0|&+(%q`9%1npBLZ|w-owomTT7z$O zPjU_H_fM$Y?&OV{{{|aTtPqQHNOgzne!)^L-jU9sBd<2n0K3n%hFCPiCJTzMkC9)r ziuZvxWi7L(XjOW32SWnu2!e+BkYR{&uCs<{5?u z=^Yqe09YHHRzc}?hr;C$BonZzC-Hm0sJb(gG8hq$1|GON`IVtWAR;E!BX^!h@H*=5 zH3v0lAZjJ%$z6%i!Ou;LPM8{c?I;mcPvl&2d0Wl?`0@48#Pk@J(L*~W$a27?cXLa6 z+jif_I{PGge#3IMJN|lqeQgT+%USMTOJAZxucd7S{2tHot+!Id)=_U^59M`#?G8oD zg$2tfQ4OK>o;|%ukpC}|mtIgHi(8`=GM0Tp+0MkETf#J6o%XzkO*l;f#vXNHQrU#t zh$xK17&#r=BKYw9WamQK^=luNfU}pDF@K*EA2S^!>3VVhnrkg6MvC54hempd0g*Ll zE>Oe;$0i~e;XFUfYwpL}pGmgTO_fUx>ByFv>!id>36c`mp5wr`kAUgQITrN2w8twN zwM3c+W2ydFAqvGG0x!p=pr}z!?XkFo3ifLp_<=<@Fp!C_H=3oq60g3eQ}``;UidD@ zUv0T=rrb7n6$PWB><5x_U$QDB`?<8Uzqp8sJR({Z2a1{en@3DwYT;*aa#3lfX-||0 zY?8|q1&kJq?M!cJ!j>QI>{2C;o(I-Q1bm*mO8w$}ClRh7Be_C7JJ6n4Zl+75&Pxby zCm4M~6Sg zk6XNx1knmgecLbz9~6rct|zZ~N3O9l2-27|3q3r}APVp$ZtLpO6T{1783pQoX1%cX zOL5=hqc?0{7`;u@9}?3G<|BBE-?I6>B;8PeL ze}FK`PC^ZwC!zYZvkOq%cd{gKzxvEVA73}wa-Lu!=FN$v%4N4AJqTzm7tnM8r4vZc z?p*<@k-a7c7P|69>6$s>JvuvPZ^W81LGpd?E+Z*+dYHqsR zph&)H_l%TQ@8Sx+#Fn*rShm#~-tEnwNevV13e;Ctf(w~fhZ=Ir+>A>J1Kz4}z*kWn zV91PGW^s_lubyv3&srq&6*9O)n26^))&^7|ag9uWCteiTqa$=(BD!d*-qK=7AN2(j zMM|gd*(ko?f%`$n*Ez}DqN>HK%;fp8D-^Y@N4VFvztkU&G45u)d*atJ-Knj!<4RoW zpAhL%5cbNIkV%tRAL{(sy$L+2D8Ozfb0+rlnhnsJ9nq~RYcsHJv56UO=p=W?wzOP? zsKmYBP8!fVhU?rA+S*Qw!h@$hRP^5(Q z10YA#d|t){#Tw!TWIQib2kxMrp<%8{-k$TbOv0`*4pIt@)=c{<&D_ws-ERK{H3jud z_~&fvzb9UR?=vuof7t%YG8F3Kq*jOUq~zczp6lqJT}4YN7n_g-vMVPyH%Z%~f0d#7#kluej@2sm+&F!{j2SVpYSA-eF z5C*9FBb3vLkB8Zb(Mxh@TB5OE+T=iW(*VX(L-)xEMEd>sQpPAf0drUs!dN6C->v%V zW?RcEVxXR!ajZVv%p{C4G|;ChuzcRw-|#`SYm2h|Oaccg*9aEtgY{{4G9r5x32~J%h0x zLrMK@fAC)pfB(_vzpB}OS7yG?kX=>Qoz}kjB8xEpnS<7P`cX_Jd~+XJ5ehaiIZF>! zo5iOAEdpYY4_~uAwseJI#HFiTejaa)b0g%;k=n4}CV}3E1t%D??WV%jTY=V{%mnV7 zA+#6o)tS$(v5HVspHwacymbX2TS#ptC`T}Q7M$V8OJun|X~kM|ma44nJ=joQ1-|K6 zCcGc(b7+Z>qVWSfaQYWXA!1ma%3_71Pbzm3y=p#l%x;}zjmTme`O?PTV3KjD{ibMP zQsW{DXdQ{gau|BOi^ej-6>75b_YPcGI{Mn34x%G{zq4`(V*xhrX&KbEPynP^OraQ+ zm~v67dJcXgo^at@>tQ$i3D(+JxENc|=f_%%Ro0lQpldYn_9WuB7h0)nj@iY}y&uve zM;21%rrhnmWU?KOgc^?9+!kdzwI9ycQ9E3Ell`ZUXGcqZQczv4HJuLl40dxjlE#RT zYt*iQ(+-)XZ_J2UJ#y$m;;U;DT}G2(+x=t7VK`eP6>UYV0S=PWl4qF_6xjG1lxF0E zq}~R6gVKv-yGEmju<2-Q;){5^`GG_Iy;X*+vp%3$f$ws^Izc<{R-FHOzO70PUtcbA~;o|Obq)wPIN-yN=Yg8S@27y9FQD<;`zGwX`YD_g8a-N#V z%yh;GUlEV9(6vDA{dlJ0&Nzq{pL(O$lxteJaanPm`N{9JOkY}W=`SNG-?Wa9>f<m8#Mz>AvF(4B;s; zxbAuf){o<#e0Wm~+AJ3CF@_Vpz=gIVD-0kz!eyS2k};0iA}cgV#g|)KyBO^Kjt9yV zGH(8yAjWl5)JuYEf@?yKlwSZai@=vQPr>D2JgZHI04*b3K$E=ljh}k$`27S|h|M|M zICRunih~13+HY+_pYo_vOmXaE$aeim*|}m{9FBrf4tu))AxNu7*UJZmn;L0!hDR||6FKrN zF_0sUw$>&JOuQU9z=P`&QE7$*=^Y7`Aq^pc*S}DHpPwboGC!dPp>R3rgLOux_e6vU z#7L|lO5wE{Tzs`4**K{$?5X>eV8sRp3}GSE9g9rIgJOY`F)v3L%M6hXoyZ`|D!a%y z7W|<2ODWui8D_^7f+`0uZ6wuKkk{iVRl!MyETQZ}KI)|I2%neE3gM>1X~V?y>+%fL zy|@yhiRv|c{vr#OZ?uUSv^_%TKqQ7GCPNWRWF6Kht*@3YR@W907Vm->(qy_f{qIl4 zXwHlUbbEHtO9n}sRuWx%*<1jd&g>0!Y&v5045^9#Zg=rHuUCcK{l3A`;jxa4`Tgtk zCC^rfv{ZBSj89w@(dt zyq=nuh1!egdi@{Ev=Va~Bq9b7XU`pSsaCJ)(B7+fK-1n?4?1XJ<)9>@%ghL1%5Spd z<@*haXpy`QP|f9@O}_WF!Xv`9ho`P?;6VmW`sL>7Rj@1ymelB{R9Y^KP=HQb7 zTt-E_Ak0n%h%GLAqb^SDwI#-kF+sgs6bVU)IZK`!{Jo3|rRR3kHd0@f_&b|XPq4LV z)|qJc_dkaIz=szEj{krBsOo#h>HohO|DR)4Bv^KEw_LMjHc&L=*C!dE7r$2lT3~|< z85*ze8CFFG2kHCV`K-xI4=$>&<-5tS)tdO zZ36CsC1oz>?Fxkwq>|2_`ttKLi>apzCp91L+s8{QQ;*&CmS%L=kP$*?LNPEgV5A_s z^Pvt2O>^)YB_JJ!4^(L-)&uYbWH7W7uK1eZ~<=7?9M80On}{Juo+KEcV#6 z`;IpUMAd^Ia{NB0>2A?>J}Q5;L)K&Nnd`%!e5vz46l=?Ut~SsPjfL}sSoIQW_i6P#)bvAT`7l`a=?Xb_IawEv(bAX zJ(VwPiAG!r%qxB@=#&DL$Qy}Md(5l7X>yVrC^LGT^C;_&kFr#M&@PWI@(da7esJKL zb%qi5NK#W`!2_E2Dz9yT-hKQh;)eCTK4a^8^I_Pt-QE{u7oTB!gH>?56q<7}dxbp^ z{}JWC!yWa_D4l!$k5R(qcJs4H{RloAl$e@W4Vce_YuG|4!Q^#Vj4&zpP$C1&!)Ml=<=vnW3gN#J`Uj-J&^V+8?b)NjiNibFJW>%GXsd#5wBWA z#9b3E@&apy)E&}zZ1MVe!W=1jm6dqS%DKS9*Du+mVQa5WU>)1IOFp9 zlJB&@WkbLgXF`I1hNxBE*}`R;_yfyJxJnOVDeMWJ*3n-{V>Iq3ln4`zm-;+yR)ITQ zr+N**WxtH3_q25hK$oBrFp5K7RGJP~q`|WA4jP`lyq3ssB)me>n{Y;7$q{tsqT)|jUPB|@17hyDIZ<=5Ur72#rD9$%-|*Z$k0 zVjS*e@S5YNWzkdWY`Gv!fQtw*&aBVEJ>Gh8!Q>{3hO-44oQOvQr|mLVzv>Y}-;ZcD zTHV;AqQ0WOWQAkA2jOrp^^sM~c$oBw?jL^9EuibZq4qL7WGN(GL!0u(CkGR7p)C<8 zYN|9_x|i zLg12@+KUYDzn~ItM&rn9^1V~18ojXdsWPKee|c2wZTZZ~*)svUn;1y#Sxb<}Vu9emfnRZG|sA2)x}la-{fw$kE-3 zMGT&UAHGdcvAs>$wz5(70jrZZCBKzHvc>l~nN2g@DU4fwO3#BWBUzyR75XuC+hvx5 zXm~Y~l)VAl}9zU*7X&f|g9V0*TrT z%M9b3d)FgELO@AX-lf)`*vKmtYG4A$T& zLb6~yEy|3zP=DC1rZC^3|wDie-N?B54jpXUoZV~Sz&Qx)?=>a48Mwk6`i3rEZSOzbJz(T4CI zlu3-SrNz5K4}4XOo=NB$^tr{G_gtt~<5`7I1TL8<=vLf|L)9sSI%vgurtpy4m-&}E z+A#J6r$hO9!5xn(!s=fLX(nct!iQGz2#qF6k-@u0FytT zQ#Jl<^0^hAef-{&=^#X8b}|8=WpAS=^c8w+Qx20^`Rv?-3aEkq`iDA7 z`=*YN-3m(Vdk+&L%e*$d)hE`9ZidYEg;tBE(x-Qi{*OAMsfiWweP6SgTnOW>q_7au z?forS#$##y`|2}glODFeeZ-{-M=GqDwuea>kF%Bp3<)5JiH{fHvMSS<@@rR(> zbEgy}Nv;{LFA`(B7I<1~(Ad<2+7_hY|g@NdUK z?DaFQg-~&@U3+-05?bZIiO=eC^py9FcnOrpOF@ifk)s;n7;fB^GQ;81HNo6um>-Sy z&xHx2YT>v1MB|}i_AQ`cG0fZhf%Gb46NW?6qn;ne$dTzCWh|di)#j?&c)cue#B@(G zrOArsVmWEI#^lMC=BdAFQs~Sw#KzUk#_^oO-yzW&X#tQj_}i#QIsM-+{2i#HG#aHe zh5Z?eqd~0Bw~qe)|B&cR9rxNu+yyq!v;BuA#d>5+4>(R95P$H@dw}RH4IJ=IGdwHFF^euZ_GkuQdWW4tE9eIJjj`Rc z4sipwV>}Bwdj~d?3R(;u-;b*pBzT8-q8p9N(O9W#}(RuDR! zBx^gJPP^scKy-%FJpLko`2jRAi1i9_d(ZNgfFTwA8r^$nL3g@9wjLDg7-L^27BHj zv%_*cB{|(gqIl+UeQ{>{5s8ViXlcSKv?(jFLre!h$YCduGX|@OZ;T1^(P5})yA04B zLIEO|(9Z*VmeAPW*F0fh$SVzO@OMQz4!$nQ)up-?usVOdLZDz z&@~_=<8bf*%m+>J1m}8Qor+Aw)#!IL9ls zw&0Rh%h`y3vT&tSYy8Wbqd6ocUnY8#pjai5G9QHa;{U_eI|T< z*VAt=%7z4Gfob%njLIS9YBNw@*`v`e5DUmutL7o1mtE)OI6OGMWY)-4I7Y*LZEJOq zc3Qmw9Zw?i~A7y&- z;=edyR<`#V9cUG6h7WjxkTbZOh&XstN#3*jbjHVctDkbp|`E0?ADc?J2RE#Hal$j76vd4R4IUo?8;+r~J z{Wa0|Iqx21!MHgy6+OOGGr9R^%cT(FOmOEuT8UF!JVCMy>nv>|5p%%2sJ)=) za8}m3qv9U!6_-A=GekK*x0f!M1*6s6%2Bh44tO8^@S$mPR&nXz%D5-bPrsEEt5mSn z3w*)Lw6%02D7T!C%Lq%3sy=aOnCp?QUjDfG;mE@E-MI&q)H+%9gB)$BTr2$`M<)Ph z=`>8l|3!{UJlPbigB{Y@>~omaHr3dDvE4DS{~lA z@4T=&Se<{!QO9%*n))``gn`8GwZCuD7MvQ(*Lkhqf{Pk5LUfuYYW34ZCj}Dcb$ze9 z^Gv0Z@D}rGiOY>YV6l_|Nle;<%v45hi)@ViYVc%M+l=$xJ;O_|pi6Sih_1eP=;aeV zN=$E@D7Lx=OUs@hRrd?oi5?nTrAO7kcln3TS= zO?9M@x#Y*+Z3ao!&tRf)M6;op`6i3~gh_4?v<;!aFoFYZ8KPg;*I&UWsD)&-oI-YQ zXxVyEI^hJcMylouTn+5IyGhz8qBHqNOZm=h~l$-hMg(&(i5O{AktS+X$eY+tCO+fz(J{lSEgr~Fmr zLYYQIu`=UiL;eNR5L>d*>TNKRc@AyHE$f3uAlRbV~xu%QFi8=9TfWvuz zL(!rVV2JA=Dwb~$*GO>57*NHDt`PL31lz*0bZFw8aR3Re8{&R z0g5=#{HxIu=+bTFQ*8n=)(bX@2fRwQ$aFR%&})yXRE$+h>Cs(Df!3%sHlyS=eiLXx z#)5`g8u#gFh{YZDP}Y1}Cm8nj8;Uty1df$8b+h8d678sOP(U^|>@)Sg(Vi-j>(v!4 z7UY%R2og}CyExt`lL9)+R6&=h)+qi3q8t}y(Y9(aMgTz!L6Bz9AOx9`P^5*ZPq;;a zFh)(lo#0&^!eEx8^EXU9oXXHq;`gXgt09rb892p7m8`7}Lm;WI;|{ZySu!Y_0)I1) z{Y6OS*4Ub?034#hsl(9oK#@vP@dz?d?v_YtCw-e9anahLo$y3c2~boDop6deco)#g zaFOCp*FexIRDmylAnx`jXq0a-~lx!Xb`-2?$ zqEU>B+n>EC9+ju8wgn$q+Yk)ZQ>;rx$rr3gHZpL*p)haypMj_DGI)hx3GBIV0TfBm z_p_}~DjYpTaFoCVqxxvK-DAF|RoD?Fmawrv|AO3CikZeqs+FK9Q+;rDH0)}Rk4WDh zBskMC;t~g&pA3<5lfsB&AT>Yaf~at;uMgcdH$4n$VS;S497v*T#_BHGuQNI*QcadqBsb z^-?W=nyH0_LD}QDD#EzSN@Sg)A=lRPO?G2_&yr6^-V4i8c2_rTnMsMf$64^Ou$-C1 zQqNneH-EQuY_RDSGML=7$dra9JNKC#-D~MlmdvR92>>{>SSV=Vv%41N(@&;-JxI9} zy|AS~cUMoJb=K({b|#aPh4@P;jeH&;RB%0rary#U3T)mq0ht3E`O{1m?Hy(d)TTB9 zF8CBIuzl~09;Y2y_0Gm%W=7{R)tES;ljK?ZWgrLk1HG?$Yj)qxMma!nT2=>ddAUb; z?hWD2oT&Kvp(Cp7n&v;Pb>i}-bt*WbYgroI>chPc&i1B(+j7e~{b)^Fz^_|rplrl% zzZLo6Zk-k8Q+l=LVM%;IpHuzuVwwM92=jp$JN4nMPwr%Zht_&zlX_&cXvj?tw#FC47WsNRxPrRzeoR zn+N1pigp}4XvVD*tKbR9H$RXh6|?nMX-y@o38^}c@?` z%D;|-RV;f{>lS9Y)AjHNI!Y{Jwp0MDx1nH4`v)DBBufgCmMUfje|Y*+HY&Ly$O5Jf zy5q6yNcbluL83_h&GpZAs_c@D9%=oYDIg4%WDpqte1_8V0Kn|gd?*kEERp!Cpb#&ojBPj&Kr4gro?FfglR=o zeg#-HVOhRwCC8Od1m>*g>tvc%S2jxjgG=N=$e)a^6BdJ$#nt%;>4(&XxuA(k(1g2Q zNc>yIuaF_z?EOHv@&$wt01Phf1j96wOOJweEdG+oVJQ2HOyG)H_ z04NZimEmB52z_zb>{61REfXGz{4?oQ%a%1$qqyW0mDI#5P5c@2(YjIW;K!7pP~vtN zv)gi2iB6d+QtR{o{@a1r>O=DaH&w{pJMTn;9s0dc+JX6^MdmJn-5WXxDC>zQY@)Ct zH-8#)J$R#Mp{=oBoZb^KKz{2D_hR2MM;Nj4Yc}C@V>_Q`39YX{#Eicxl5ip7ePo3o zDng5F`0BGi@{@3N>ieWVv!1YA4OR~Ux)%xCB8o8u0~t%c1uNEv2)B5B6*#1_%*G|$ zvmF+AH9&-WkxHML6k8*}m$xtxe*CBy$sL-wuyxg2H*tJ$X{*<>cu}`H%bA}r%v{Ni zB?1xE3xEIw1`_x)%nfg(*HvAFO$YDw8m1gk+BZO+5h9a&98kbWlUIu+mkc@^;t@im z(yOZ|O{B47$#xzrFWNP~?m@Tc7(QO^5d% zatP%48~VAW$MZhc_DiSpE3mcg^M@j0_(ihtLlFVB?QDPPbpNA>-hU{fZrjR-zHIxH zMKw@?5pvWgVpS&``qfl9LSOH~n{yzWqPsD`ZYknwO9hp`hZ3WqAGDH~M;8`nxsEeTX@8B}e}okpKqk}@l8V7A?qA9mh_L8bRn%gIOt ze20u2wSDbTe*@2`~fI$#Nd+k9h!YnJUT(>JpcoEz@IiFBfM1Ff>`VxWOFa0tawuz$Yb%Iq+R&KnJY+m3 zDdKNV6j3!pwM~A(B*1y9kdxWJ`Ll0`jfpK*5M`Cr_Y|Q85%_9}uBEO5B4HEW2}3gJ zxKSyfjUB}3k7Ci*=_c_K(8!h%3ay3+MMA7Xp?Q6Xo=9eRTCLa`pXdtluJUrXj1py> z;9y5S4$TMQdJiRN(~iT4p;AaWkxV(KWaD`he^L9pkGR%hr;HLhLL3#`m4>1SMo-Mj z4Z9TUfDCIMa9}vrfgCV_t2K(Bm%+LPXh1ZGhjW`Y3(5S6w(|=%F`veCVB(soXURPu za1`?z>Jodvk!7CP9%VhXJ11KW!9&qKdg?ANLmwRv9swZg!g>bo8*$Q>vN6%^YShwC z=BnSKr2PB*NQSv)L$W6W?u9lu!|JA!j8=xwQt7IipP`grK?u7dd{eQx{J^0;aFRC( zwZ9&^kV%{w(<=uX>@U9ck$f5>@2%VJ2!B*M2e9SdT5?QLkHh-9_-8bc?%)^4+dqKF zpL(tfgP^cA)?exeAS!4XRs0V?gm*+4+Kf9MD1G;_2L7Ni8BL=j_MD-$T{Azbhg z@2I1NB^unyvYER5PLbPzCgL3D#hC+T=fC*S&y;6o;+fD7J_OuTv=d3R^Z;&cM}?1i zN;gQfgntK7F=xdKG4qJdSI~}MSeQtN=Y0ntrZ_b`GWH*Q2%V|nlYrcTCf z*u5;Xr&*%cbs_VD&Z!cC5oyQ7HEjP(;^`ouYP^)|jM)b(*vr&qOW+Hk_dz%?9cAcK zkqzRKUct0OM-&eakmp-1-&{^ECf?#Kl;n=D=Rfq21l51(p;6D5-_4K5s!!K#Qru>L zA#2B&bWB5C1-6x4x6in5-RlyWS5@)zVKDtsa!9MrC9I+}A z%$d-$w!tenjfS?Y#@Y}=H*t4uO}eiQglHaB7h7)BcHU{EvWpn;VkP|t%BX$QPmt(# z8kF{PP5yRsmLDvFW;CM;bd=~$3m=l0YD&%cHJIn#BV0wicL?#B0C_<^-r6PpjS0Jo z7c+D#!+~}Sxsys*C+z#v^E|0{W4c@9RjY%TL#oiBX|7YVKj-%g2H#@H@&r4$i|-k} zT$A!IoO?q1O|OIje+j!E@>O9k-M+e_jCz;ABL(>9>GewE*;I-~-__pqSaDAh%l$K_ zp8i3y)ru9S=CvUB8sjU3D3!4Ul6r-WbrTLR0?SgSvbv%(;n)AMLscd1-AfK@7&CI7 zl2;1z<%^nsY$kmrk(iR19xR~UeI+@;=lH`q+I|TW`;du){;)$e1}p}j7i4^P?E*g1 zXkCe7VW51P(1Wq3E@FM_bWi+~T-3+>H<0cT_itjinYfJ6&}+Xc3^NXFH+iX)Zd~pD zu|tK_!3zK0oc{MEs{7~BQ0eD=%2;08_Q&Yr2MpkQv9B`f#n;=bN_>%;SE2s>XZ|oc zKoSFmi9Yh}+O$Mzf%A2!`}rlybA|(?V^lQ75DVNp@O6WL9b;%lUFxh$C8|m8#=hln zw`j4~`rV6Hb4V>GL$^P{(3-7Ad?gaOfS59Vx7z%0KIl@V347_>2 z0tSL$eoFyixO$R@&%>i&?UbIJa$#DDNT_-m5wxMjU&-&h7K=CZSAl*_(kOSt5Z)5F zBAY?IQSr*$AA%?$4x-RCyf)DodS4W~fIUqHbWg{{OFY)xW7FGIwgtW=O^@yCsZqV* zorKu(x<>oCpk^r?c+hKsKH51U@Ou(8lY7R=WRw|?oS{E&nU910Brb&tG`HM<=B7Ogo6s=$}sitDeKpm+c?e!&^bycJIIBmzp;(`7Jo) zivmeP8k!P?Sm5lIX2y$PHDOE?<<0f^tX(nBX;P@0tLaEXgHCKpel$(#hI%G)lx&k8 zf4KIqifpq)$@t&NeG-FyJ*uc1*O5_(oL7ItJn0E!Ak6vfcKtwbka+jfLbm;oZC8;h zk(1GuY+r->aqVUtg<#&(IOCoKg^uD!gZv}4`z6BWf{Bup+f{!nVGZA9kQ9Z9@r3dh z=D|}w58DYt1Yh}qk=*syZNSr!*cUkD|Fz)I*DG|MvU;~;R}iP~VHpXoWQkMj9+}?02`l4CeEUEOnnO-5&MUn#Mc2U-SADF0no);z(v3<9)7%hzVFCO53cu zDt!PacRSm5@8eY49_h<)FAAqMGGr6KfxhQQAb<c& zu5tHg&dsIcT5pcA3($-^m1LQXEnKExWF}c|hCndZ8@AJyHPMcZ

q z0*%OiPfOa`OLBMP@uug@B?l@O8Lb2NM)@Rc3ZIF9Ka#F$-HBToJUEu zX2sFGFLI1h_6FZlw%Vq=OTB&fWVSV_!@R-#ivA<_kMZicj?C5a*O~cgqrT0ziiD+g zL|v#?7<+9Zq4@kRz}oaZ!CsUY zc!*&_0^16pJD~aoRUJ*H%z<72PE>ecjhC^{l5Ff9ez@I8A%zru6vHh6WCFw$F(1o} zcbHJ_JEZ6g(z`q>u#~4^SczWSVwexqYg?-!q3k@-vFY~!N10lP>;dk#I9~lni}v-< z;wdyJ1yQrHrmZ{V7ZbXXv3go(ob?u+pulpv3=|deYzWd#LDkZ;kKia_%=3J++Ctcx z(q=1LbMlyV6<_~5DQ~z5bZ7t<+eULxSD9Y|~$nI!YT7LcnfWn|j6a<2EaaUTi zT4W@If8*kWPsjCCXBLiCs_99%v z>tPG8E*7uw-`m^E-*I=|x1W8qaVL^P-UNA_pg7PxfezO^Q1UV53Ux|Z3Usp-l**X? z@H61&gPLG8ii?Jb2H&hn!G-o2`QxAo0WhB8(4Nc~-wOEp0z`XTIagG`VJMiO$_KoPcTcu4Q&d-j7=rjUW;9RS-Hp*e zGUc!+W-9k?F^^)gc#>JJc%H;N=JB{f)!iRt4zxfB1~DgqkuZSJZ$N;+bY&T!2IQbj zCB?k2hBX3iBFIZW`QYF4b6?{$h8kVb+a(J!ky7A?&?Jb=Mjt>O5kG)N`~6Z6LCeiX zlR$?K8N}F3&$wq<+$X2vKGp_bmanCm_jhK@}5PG4+Y z?jG|{`qXW2SX)Jihbn}OnlEFXVEzM@KU6?viv>m{jO;5Gx(X5K-&=WoqM^jWN-QWT zAkXthd}#SMA$0u%mesWF#m5&pS1=<}FH(c_AA701xy(XfKj~IE8W1}-06Beh`Zv(p!*Zu^3TbZEA{6C6^FR14@nb1Q^~upo zt!rxPvW@9x*2bo$wzj6n7Y7Fi7k68BM@L5})Z-}FnUpb!e-d%DzjU#A0Ofw%qxi*p zIwqaVMGq??QlZ`wJ7v5JYp?y8$8Q zC0I?F@E>iaIdORot*3WN1TRJ3j8V^TR#Y<!yzxB3z2u0;_=F49D_=3i{0;?cOvdggRcJIEwoX(0-J$q#$WA?($%>b2nNR zY)JRxME;GylQiJDs(#O0$-o*PPGz`|0Q~bOy}P#3c_?K5#d|wS;ur3n(EK)kJabTI zamV@Wzz|g|j`J63sV_?OWQAAJd#i6;tzh;{+v7IR!ESSluXz$^Q_Hi<&SuJNIHzb* zV@LgpdyaO!VRV=4#P5FfzB<~S421nRV z2g#-@T|*HG8=Cl5wbY{(PafVL`!FpbYLxDb?c>Bd)HQW@w=}Qsn(q_d>OW4Wo_xx- zcBomoZ7>4k3jx%LME9qGFQ!M@oA4t=qgIFh2@h|K<8&^BzaGfaiEGAgGTzv;Je(z? zu?<_II32FvNYGlQ*p(ok4$HB)KOcXgQc3;zsBy$JK2t#p5anO zotumQzzQG-WX=Ti+w`zZJGO8;Pp{RDleqAfbTv5Bm#UA?B<+?utHKLrnq-G*Wpp?c zxjG(ZcfrEFc2IGb;C5t~i*TZJbInzm_Y-gMEDZM1Awl<$#Pvx~`*@)3ezmvc!}Ty2 z=NuuM7Vg!se#V+s%)BGc^s2H z-m(R?O*KtHQA!y6!2dlD^L2%n+#f5#mb<9l&JzYVcF~%M{A+Sc(dVy`#0(nU3UrsP zr%L+>>NUPi#T40y2JR#a69%$R;7nEK;XwNl$DAn5hEDa}K_g=Se)fRFAT(Wwi1f$b zhhp#L+^dVnoC~ME$g)H^A8THZo`$)<*@B^gA2qS0C@;%$1?)^he84zGf ziSvWbr}Y#s3Kj7`cJ!wYeB!dN?q5&XANnWCA|~tU<^G-Nf~&OzRTa%TX~iu%4hLGF z#V4Zp=XqB`(!-Q1^D3mr!j(D^9>1ArR_MC7GuV2Bk2p_oj2a|0k7pw}$47I1&R?DE zh3xc|$Efd%_~(;USACTiAwiGmm;<%`m#f#E9;K4 zt;g+B3TI$hTs}Gt%mQ1r3c=T6;09|p{D}73~Zf) zU3u23KnWU;Z8<#Yer`J6@{&JuFBqM~ zxAT)`;r|9- z;NHL{Qgh_#Yju&am#|e}vnbtqCMek{mlQX0j@QaV-bj3yB;hvEW>8HFr%1grk`iza zk_{Xm4eafiqlH}}oX?1K9zWF=lacBh(A#8Zh*dM%zE)V4piuO{5yNo)-Af-KM6ULY z^+qf>nCWFHU$|j$v&rUW*(Gpsj6o78B=WF1Awnp02AH9dVh1U8$Sdg3{X6~S7htS# z&~tnHY3u1-etG?ce+w<(8h1teqJjqE>!Tm62G*v7)WeBW;xokmDNP|oZGXtl;d7Cy}9iax_pC->YFAX=g5;s+oVJ*YkN9VCU_FMHa7094w#TyLKnUx^l zuvk{(^|RVU3$6HT0L0bG6znH=Xp5lu85U_Gp>c;A(_#x@XBm$kd*j`V(a~BeUaA-8 zjJOr4^%Sd&%L6BsZJ?=Dv!JM%2Dz8d?jD~9?xw+1i$Pw_R_Rt7;kyk<`n6U)S$sH+ zMV|iz`(GJ-e!l#)@ByL~<{bXJHNbcD!5x@HAns>t0D#IydlPL;gSRB1h)Fy>gUaVS zY>JKbvvDZ6!!^(IW};&QT<4)+}LiqPy$5MSDz|ww~yn;fPGiB9?q8E+`fdios zg_x57rA-!Buh~yO2OHV-*JMj=Bu$hyw5zkq*acgZB$K#e?S`FjY2ecMBi>c64p(RqPM&{ek|*A)JHPv;y?AQ573MV8Fg88 z4P2%|WCd$(x+F9Up7pG8OSq_us)Z>AKf1Pu4`kg2;IR;S<>wcGtaNYMzc~Bf=Wgg{ZFjWVIfz+S23T@; z-<>rQ%H-Y%720FWZ5t{S-d$2aq5^W0HSbF4m57>y&E-{ki$R?xWUfl_0fc7tAhmD9 z`jr`$RLt+FGn&a#^An3|PLB1=#nR(q<&3ts4Tj}^38d{`Xdn@vb)dY)9{X!8I@su8 zDc-nei?vs43n4-pMD2qZ&>74|R%5~d2M`j$nS{lU%2xMefzHE-N;5?ouX%+9^BC!0 zsJ~3AzJ+eNC4H-bq(Tw`4$u$g0Q2M`wi4S^=5_#p+w3t$ZU1r!tn|ci<-`%`bJzUJO!PBdi2mLM+uz+HpE-uhUcZB;K~Wj$z^B3-B=eQbne z7_b=|c5ANfSys8I67og2ONrO;Q1**Kmxp!e_1CKC@CqSD8(v&|fSk5b?Ekd-Uj_3( z0RZa3e_;9~b2(&FjFCXbl#-I<^1=nq%jSKK`mrmM#9$Hn2{M1X5QH$ddJ@S5EEl1A z0zGFMIjHeb!gZN*e{|(Ylv{(v7{o#aXl6e+4H5o6<}-x*-IFVS=C_ ziKsY=Bw`|sag!nrdY!_4K;#j1`}onzNu%k!;L0fq#!>~*by~)HJqON_IvBGeb`Q*C zQsxZB5Y&S!JRE)kHJE;cuwD(2VNwH}lu3NNJcYoZzFjc0XPAK?2qgQ8D5<$W^5FFo{k30WpHNjzB-?t5hzxDQi1uAi}~CZuZSI!K?=Whs{hFc@PP4 z2zGmjP=*+Ne)cypc-VxY(f30_M9+8IScG+$LEQ0?g(52T#ui=RU<_eRQkty$0dubJ zg153HJKa&+t3^Z(u@ij?W#vESAld6tN^1&?@9F3R>c*S1bWGqd9DmmwQFMhrS+P$K zYuFJakArM|1r#Bi!MDvo=C3TMz%9x}%@L<$4b8d0=gK9|^Z8|rW{!Gtk2*HsQ`kYL9yK!DGS`205W-wVM z0Y8cWPwzFeMC?-^>+Do^XeTnf+=+;pt10rjTdt5l$DlSHC#FK7iY)-cuK~4362He( zcc}K9uD`iwA4Yn()|icF?2F&?4gC%MHJ?+}tQq=Sm)mpR0Q!#x%^6qQv1z1}Y(ee- z1+NAF*~85j$!IcKtt-P~3Z9owdZi)oLK|sE2V=|g`+UcA`7nxoARtFosO;(AH=`2} z(9c>mr4~6iJKk1Rws&$3FVWbnqTPMTyaDK-cjP{c?46vTwP-SlLP*PI5N1uF%yfcO z=jw^ytyV>mip?f}G#ZFB8;Cbj#Bw$B%{B8mS{Sc3%U|wuPuK6!o6t$LwJf`phP9|# zbhh<+?wpU3qbIo7yw{t4JzE&}E!jeQTK2U2>}Fy+Yuezg`Ha-P+YI#SZe+#QxN={f zTT8_=XVRlHwUoAhd>y8ke!AxJ;kdnSH1zceY=2+&nBnt0KHYVGa>LK0$GyVbb$XR- zRuZgujhd`D39xJh_qAt{n-?#5vjVBpb>&uPBC^KFRafn*8|!ANvoVRMVK6>U5nan) z-_OfTuVO0GU=0P%Q&G2!hN)V45Ccvcg@T%z}(`c6}R+$t=CF0t?$Myn!% zuCAhc^{Owoa+2Y`&Zajl@R8aFvy%{-tMrc9aBnSk=`T!gQ-<4VY%kwccjxQw%iJ7z z+{ucEo+Qkg@x~7lO_7>V=V>cG*I1;hH$G!%+RwJ_h&UvzuC99hyHD1%i6zNA#u*bd zJ{ke-e3sO*I<3rc9Tu<3n*vU>~zV`>-B!qJ$-mK-h zODf+v>~-w zn~iRyAQTzTgw`?WGq8{<{PLcnswj`wy>J|6I-Xp-C2Wd6OcSlLt$kx+03vyBs#W)1 z{Bw~OP%0$QD^eD|`t<10^v8eW>oN9jFBYMunh}VX_Q8)`bqk3da@M;hh<%dS z>_jtQlmS~X;W`OLRI_{5(T)v>2;_cqvQ>_1_G~|1J^d4n*ZGPEC2x0xXE+7?0Kt%5 zAP=~Q4^}^k6Vs;@5Ke1brojVYJWRo<{%{k1gx{+mBISo-ZS1+@Ym)JCs^QPVwQTCF z#Z$>P@98JLueByq!%|w1JbGr>1-Ef(yX#-8%F1QPLM8SY=|T3Td`NQVVaw&OUjuVVNP)Shsq7X~Qy=id#d^)qM3PQ=xvQ!(tD=daLxCETO`H2PoVY=6FN{^xmV?cWTlcWjbx?Hf0k z6$}YXG)fAMHWQ_MiML}P@E;K3ede*96*)$2QLJ0PYloN_o|TPL&e?bq7;sJJfZH+? zT7=3n_oZ6iHWNs6O05vuo2LSY=XWCKsxs?!_Fg+t9Au;bhAb=s>$PGsR_qiI2zkMp2yNlg{!p#uVnXLoza=2}J`b%mIg z526bBOUlc@LR87WR{6<4U{@Qw{oMr*A_W|%`8BS9(4aj9iYnrc1&5dBX50^|tfDm1EHAYGcXz8sQ|A-l zXG&ezW!t~OQqKq5H3mBH3%nOWgAOSl*3~HW6F8 z6S%J0Cr{u=Ry3}Ef%U)P*7lPqe*b@o;{S%!e-g#;=D$QCA+BJ}svq2FOX~(1@Na9MiK#SVrtDuiPlelew=zf>_wn9u`WV8n<^Y;I4GOASOlGkmwY+i# z-V$VWd6nGc)oeOc#1dU)USy!;rPQKhl@U^HQjT2MrQ++6ty!G@{w=~-;`D!8aHtpx zawOxQy(HQ1HAcbEFhO(CIp&Q|u`=3ygtMaf*>?2AI|)ML@kQyQ6G(2xh9&wBCaMJN z1MC;Xd1oqQ^z;L0qQZl1y^2%MhS6_j`eW}1ZWLjJq;P)4gV957O8Fw=zuR1uV01~4 z1pe_%bAH2%2zVWTfP)`#bf)OE&d~n&3@JMOt9IFA;M(CUgNKjzopU}qqM}X7?{A4! zQa2@e^VO`UnnHew#u#0iE{W079vATYCl7*nnWcTKMMJ0$I`C$QYOQ$9W4bwN>sGcjhbm-h)fdrkzfh! z5riq>#t;hV!xJIF7d09UmJFe*odqn>2SHK)6r(kYm78g9{@$|QI#Z~l3ezxeatTnY z;K_FU>U{nFbUp5V-+3>iiqwb@*Q=vYehK&^h%V%Zv+u&8&bH8Gre7&D4R*=`!oEIG z7c{b8ix?&tV~$D!W3F6_)sM;!M`@1sRL3DQ@bpaKrA0U=5>9>AlU&d9jmpe7tKUZQ&k7Ybr{)~1^ZY_g6> z=$6+|^Rt}<@(lWj<<+CFY{=#66Mvf$-*Ww*OB(QX?1=WcK2}g{~ zp)_);pCgFyBct9aLt8CQ#F1iFs7W|W(paaw^YwuvcBnJszC0oGParn z`bF<^dKn3JF6zi@r-^_*f$Dw}3eeI@i{pA0e_1lqw{C&%tr{olscC5!Ev6Hk~f% z=k5eZaub<2(29hnA!OXdNQ+lLTe$_DqCCoEQ^OmXkcc!?K#vFjV9+IFh-ek(`tX3h z9e*in-sjX%5k&>?hxYU_?TU8Ndox8CY1N+jql6x?iZc=szyuD|uizEnCbU-Rff~R~ z55O!P3SiIz0Kz$WDP0U2?z_@H94qhsRBok*I1M1xCIK73BiJnzzP7ot$CY8HxSo$3 zNQ$G?n-Ab6XU#h8A)8F~sTRB9k_P5e{{upJuFr?wSd=Hr!LwxL{5_oTk zzV&*>-3Xbl&aHrxPn|zv{{U(to-4 zv<)2D0-GE)X~Q)tiXO7M_?l6;*G;b|J7i&?Cg{v|2zpXdv56PmzHm-$JZH;xdXawc z7X7)@hxgpV%VmkI!f~$`NFXrpnw59zZGMpIx}FYO4A}M={uqJAPDz@EHRIOQ)?Q$) zzdnqhp6Xy5^3dTb(_T$bUao)MPg2HI(wi0A%rl@m9Oz$ue$i2|kACSZq}z`3O8XUekfet~LqBJ+3;) zOHYLcKEz5Jb>qJ7)nH`pVp6<~jOW{7A*Fcj?RqsF3}5}g^|j>si5^P2)nO(j?R$^1 zd?{%3U5{&;>XfG1g{#$}Kjig%65cW#1ZQ)#z({T%AbkI#pg^rgP?nTU0uO#LQ|D5_ zsG4mlD^Lpad~2D}K<21fHQ6#%mDRk|!qW5}24yy8OX@V7%mwSyanu}wo2kv~;;>YU z{FrzRm&IcC-q!-=W!E&)XxVp3rl|D9;;di$@;6S_B55rlq&WUIeq^3a$p&ZJ@#IYr zxS*S_yYyf-=GIzitC@9jJ_~<#ZvVOKnl9GZ7G5K_Lr%N3y~#^>E=9bGSx>}K+kAKN zs5ZrjDqyWG;GRYis3A0J>;V=B+jp7X>Ur033HNBe^UYsG_NcPG!Rlgm*gcdrgGxp7 z41d|L*zItBI0J#T(CWVQD>cB{XwZ3Qc>PXJ@tpr#IprYwdD$3p2ye)8!7cNds@@?= z4qdb;=Q)Sc3z47@0h4eb82d=w&vB+?YP);E zGLe~UwCujK>zhq+mZ#+3Ir6`!%izzoz~bl3@%~xE{8K;tx6QB0sCR$qLY*E<(zu2y zEq?kW+6PU6qG-r^)}y;%qU;QxOgD2KZF1B_c}mC{uYamhMy2+VfdLrsskKU@>yd1h z9#kqM%1?eMEOqbDhLQZI<0~9ftMO=ZP*JyGt@4$=R^!Pl$RH=WEYs=NQK3;`r-SMO z^MiAf;AHaUR%~0cg~NM|nSuo9Fj_rfJ%K2w=wK7%UAHzMxQgRkFAd@DCy{79jkgfZ zZ{&%s)!6}kC|tI1!e}A*5c7*tfxR_}1MgNOBaR|z;#KY%&TfBPBfSfZ7f!mRfX|mv zT?se+vv-{+eLq}Di9y~dFE06=(Se-obp}y7q`BU6CaE`-OOf79<*-4}tz$Ua=S-`6 zJBO@}nvK}!=Xdb4*16OB)XN>m%eB?<7yy)@_7cmDMu6`Qrd3pt>IC6-7X^B8O%Xx< zvI5NxxF5%hz4=%O%ozQyB^HepD}^xZd(jmaFXtDkk9o5_TY>uN4>rF=58ff1(t<9maB|NCCushVrd{0!CRe`@`&hOR$f9{$tg|G#B= z|0;=2Fe^vOZDynvvQt*;3?z{=Y*4y9q^Kyz_jeY^wUVvW|8JdMQwxk!5#~UjMz)@p z6$WTP6Ksv>lqEp8&rZcV_OZ|aBv?m%fu%F?qa0aK%6IKWqK^nTB zZF=XzMaG|*zJ;}0qxKg71DaM?vyDLDZ*Y*xCmX9|h>-N{av+^DErPWg{b{<17&+)z z%RK(9F2o{8V6)I#*zNMW!Y%pIf;-)DgW;p;K^2El0fOTpXKy||D@pAIn+(yBTDIw{ z$&{9lNvf!l#$%Qg>nfHJCFGFEWig#5RwQvY#g-jDUxW)F)q8k#Inl@J_9UK~ zTt)0oYS6x{>N9xL=n(thp9XJNz8tXLW#c(;D6S92S{ED=OUlNXPstXzkbP{{waUl0 zj+g0RV=JeP1^mq^K?nMnW6;m42>4-Oj=V+~5gMciM2N=E*l{|kuG8}JrYGKJ0a*^< zz)gFOI==wvfo96M-N9+A{l~b!XyxkzmDX98dKBWKWyan*v7vY|EcF!1K4HOQKe$(R zF29~l3WXHQUn#>{q)rEOIbrWaOcT&5J);zEA_Alx{3NEY=zq|m2D8O7JXhZoqy2aq zSq4}?0>`VAKhy3pVXlpoBY#yApf^b^M_WuYg=LgT&F%c z+qP?ye1Oe|1}U%!*4lhVy#N=)Z4EpHXKHsUCeKgFl%q00go!)NO*KUV+6 zRlVCb3VD*O5oWzoe;P_|1G%tuz@ib5@{9H^I#O zC{yQJtg5>rV}5XCE2Kd!Jq+hS_`@)giVnI$LhB~U6b&dTzn9U-wIl)Dd zqA1Xb&Kg1t3<{x$QowMpxKi@A+R_5OA`V+qzqViT=Ko)3KS0%^Pu~aOr?bC80~3fc z0O1Mu@WC7;abf|T1;TEZ{?mT2ikB+DB*f+HLIIf<#;B)sjc8=Q*9ZNlc8)K!fB7e!B&fwhBK?7|&cK5o?ukxbr=XG$Hj39GmJ9yj= zJ(CApEQ)SD4vY@5+tB6&h4u5ieE9%~*Go41)9HWJQzQleIAjC_xb)U^#Fj|e?jU-= zuYaWHLY4$@@5v*LKBuY^L#G|HiQf3DhvHA9P7DxS1te^b$7;}TJ>upQymRpwxPp8R zJJS88D*io84h;Yr0HD?iwV0p~$FHn%G?tjY<9erbH0C;;uS%@^@iNEtqQudCHtd`< zSDcqAgD>g7Jd-7t|)$@=!?&zaipD@XUdzm3r-7 zAMCoIGTv!=KAffqGARMC%52;|7+O|YYsl;6>^GHqFZXXzbvN_UxtaRbg$kU~{dD@^ z0Rp?vh#6dkzkvCu6nKn;za7ZZWCtucVhS&~uatebH){$bb!s$gmiX%<4}`R&8}2pm zOz{&$vsSRoD^S@yB-YD}+;uK>oI){Q<}lEWXhB7sBpJkHO=?Ehh(SsJ5|MAMTF1jd_3OA^Y21a`92OhZ{d8=v~Y0nyWI(O zyLnZ>ueV`yxx=b8%ojUfE06KIzg|I$HoiLs@Oi#*jbqL`6GCklC12l$d~RM4PpVWY zs?1jZY*>>5#+jNv?%I4<^pDUf&E^q?^w>B-Jrg7rA&hfTDA)O@Q+;2x@!n|- ztLMJrdB0_^JQ<0NY50?vJETr}!aQ#>&rwr2EYfq&YlFt&1SYK{cD>)K8LlxK6^sAb z?GUFktiD;cpOQ!`_w$n8_~Gkkp~>kC~>^hX%(mD>|&U>7nX z)R@OpLHzchWd@%oSmuTlwW(~kO-9Goh~eZ;=%V<>|}^}KP-cBpW2-JU7Dx?$9CgQ-GwP9y^@wnZRpJ@Gge!2`-7V7%qR^kw-s z=XLHz>0Rd5t9ex`llY>fN7c(KuNlwzAkP6z0#6leHT*pDerG(XcE!T+2s(pBsXKgL z9w2ORfeBL`o5P}{JiLvJNpN>mdS}G!tw=)QKq0Xc+K4nr?##YSmcWWx!I3h#g%G(_ zQ+3Yr@C1&yJ!WoqjKkVdn;g)2`j)8Ro0aw^5TXg!TvHc@9YhvI{;5Gn$6Ba`ND0b-Y=VkY;WI{T7&#gb$2Lm0$zzB7($OiM*%O_fnZ)U>({(8# zm*;eSdX}TcXzW#_X1X1}hY#53Y)SK0{n}GJnFXwiJr}eFEztUrO2qqD=NGHFAf4oN z0=p-x(1EUqwI4OLGoTt8)7{>BYRq#byxClk%`z7EXVFu|{q}P+nf7}h@QG(EEdY)c z`Sd=92!25kYId{1B8XbZa)UOlCHhO4-sbwmXY=Lv)I+`DUpBEg05E`u6Vap*UrL?RU4ZTP?#kf>Z@pnP3o&7bn~92AkXa1owI!45okDhCjYu6OKs%}m41!qkjkN# zI!?h-d+N)q4SP0#@lICxiPu=&A8>s>!Z9fa6)4BQeO}Ux&*%|?nFXuYW}Zt1I%G?T zE>^c^U8v^&3AZIk+T&i-jC#=?I);MLfggS;YRb@^)6F4q4^$0JKdV1}83vHeIS~7D z*%VKXvYCkq#MA|@6EOu7EWE;(^EA79K2_aMYuEQj4*+Gf(!3A-3V>mH-yij#S(4AX zelp6WH)xtCMXNn~GEU?lE2l~C`hKh6?PpIo=SC>3p429Q0eZv*idyICvnOCfRLGO( zm@%e898!-H?R(t8Mu$u}IwZ;wR0}3uB^)|J4V%L?WL?A5oru+pn^i8P5TyZY?1_7V zSJx6hz}42_YTvdFvZiH}fU#lmYV#O2)jwl z+s%=+k6%GAn+Ihj9^PG4JMd0M!k>WfCsz0q5dH*&Ke57309-r$ki@qTF73vh9J)j! zJ{csKz_?=IpGWN&UBbYR&{>pZtt-?VOgaFEQZTEl$cs9E17AUp`{=U3rb4L+%uCK; ztfSL@&wjaxnvln3n)NQw-V^&LU_xs9^0@l~)G54m6ORIK9{hJevl8ucKa;=I^6%w0nDgj(&pnOab^V z`t8#WfX*Jes8Y#S2EaS}#-dnQBEyofDC%I_4ZI9lZ$3$O*Lsl1iL=&X=*_ar2rdXcf2xWvwBbjDpI8KLJacv0o zLc%rk9V4)Cna_zQg|DPB*yRpi(MC@%a+rEZOQEH$^c&CI6FYrO<_Amtt2q|dYl`3K zhbtK4H}WaTJG?Gvotv+1j*p2x^TzUsMH!W(1Xd9*bg36O++c^Y&`2Cg76#m<#>mhU zSTRQl>Mo8|FJh>Bj9z5P*%{b<2R$?wrluy}U4+UT?Fu7;GF~|^BtIVr-tCD}*7i{x z^`KVv1w+3BcI|7Megdmrr_ifzpwVdx)^h0>zL}~641i%AKOeq|8r^OT zZWpo*ljOD2t4gO)=`?Gsifd16v$C3Hf=}%iYca#;XzgwT{2V5V-O;rr%<>Y2Z`>f? zjM&A9x+!+M8=sdD`&vqlZ_FENQ&heX1FoX%nbU}YQ19cb%QS1|&tE2mD`6z;_4Rej zJLldCGYvt6)LaT&g1jB>IWA$;XPBr!Q0xnzx?bpU>;yzoPlsXP2xoEf{`9Y({`aT< zfj<5B&%b>BFQ5MT^Ityyf7^DmI#KMzv<8NS+7weckI z=g*)1`sp8wB#`I7z%^Nu^X!04m%O_s1#6zBC!J0NBe8W7w^)>C6Q)GeZzky+zg=;E z!iad2qOt0IF&WC61x2p4$znD!$qqVzsBi;}di>O$1n zUGFicD|;r(mgkEpqK^<46#TbV!M0;1=N+&*pM9*hyX+dZ3Yl(F$Dpe%#CU}2C4E0@ zl6@_HsA;lHxwgW~6sC!0RFxqB%7qtrEWlI-f-~RMLJ-1$1{}!@Cxi+u3=da*8{B{i zsbEI2@~?x(9=R{?gRez?a5H^#>sQA6<0>zI$FeDj`?AhOos+Nr_V-%yS=U!sze2%i zg?}XwOL*ES*X1szjvrv9oj`fan80Bg?gW8nR~bvOIAVd3_zReX^VisoCE8t7dL92U zieAJgFtZ4;RDNYG?{8N(1i#{2Q<%x}yL>|{1I8`AFA5b?GAIcWU>V9fC13Mh=q=&b z`R;RO=0(e{#kI1WIk&iAYq2747klf5@jDwj8 zjY~bxrGcxJC*%?)<0X4~~Ecx`@q$O2L|({_|}YJZHg)b90K z!cV^$dYI7C=KbiQ%LIzrZq=XEY;J8@QfC!a^V`9gkDSzCG)KdacCeF;BH;9Y(FzOaJJ8Ug+n zY+T>V_gm6Vy}HWYJJ7M9oa+)MfswA`8jPue>9z8Ot3|*)pKHarC*6>)-uID ziZj4e)QoNDfed)=^a?a%7m6T1g3gY9N^@Xo^eTEE!;MxuALD870y4hXLrh~70**jN zerEacDA^>=7(oi5;#MeWw+msN!>OP!NYw1)<9Aq46BdjYqZ69^pC&n4<|M#Niqy zz!av$So?g5ePf^g>C<07|M}BDqfh^8*LZ*WNA&rZ&%b>7KcD~d>3<>k{x6^Y@8>^# z{;#8j-T(CY&!7J?cR~45`Ry*j^GcGpY*e-iA%gsyUmA4Bv~_$T`8Z(@)|Bs|cS#3E zT0XE!*2Ud7n`lwWo_xAIQ;WLzs!ND&O1^p0oL=58a^-YM*She2#yb0zNpnFZuVs1E zM430gF3)h1YeuOex(vg4Cd6DD$5RYunA{1N)-E$X2?RE-2tDlBrBh2RjPforK)`nw z_RAG|Wf3jvp_ftTw4YreH)^%AG;Lc)BE`V&*i^{et<%fpx4R72xcnk#^{f>&h@7Cg zxR=H8B+C_Q7;0g1IsOd|O=@NjxkXJtK}5iewF{CB+_#uPqE0hQn&-eeuRXyqvVlxt zvIk?oX(uTef_{l?A?PUW!1VU#%Aq0XwLiFaaFMij=@r7PiW=Ef-iul=(N4O6_|@tHp5;PkwQ(hF zVl}^a5jWoF{Wj2T2rW1GjY4=T&JMC<_AW2G?0z{)`TTgCE^j`=0X#y>X8B-@b8PA@A+$*ZapofM7iS`098cy*>1W#s;JP zqoW@Kxc}z((D9DZuL<(x0iI|lS(auNz=tlm%gf8^XbBUH zg?@9`=?7O}?y<0_)r(%cwdl68FS4)9L8qrzSLnshFgYBCJ%?(Kwvg?Wm_Pb|_|Qkc zw_DI`pS&|N=}`mhfx#z`qGeyrK=Okug>H~TIX@gw^z;6~>Ql&t8)D&U<`N~8rauhy zhf8gF`_dI=lHaxa>_*SsL)|PLteJ+^5Y*txAHXteUsc+1mL;v$TD)(IYiWr`Rv;0= zl_;~ARoW2I*mqgzds<=&WWu?`QietwDgxIPzVC6lB!j&10JaYfgQI;vcys&(y2HQ+ zfPaPd-~9A;-wO)tmP7*WVT~0j$rdZ-zT?cG?+-KR`^gL?NZ_ab;P>(Gk8g9r!tlSw zUCfK?eJLW)n<;&4|<_^5=(gphxP#P%{ARerEY2Nf zJKO;n2&_&yEy^sYy_k^PZPgAUyIDF<^@;)h^K}n~P6^kce z2sZoe?r`W1LgB>d=n_(aT=s!Mi;pk3CT6hUdMfJnR?(_<2 z9#Dt1`ESNBS{c1(x89;CJujz)(D<%1Y^E#zZcMh%?2Y%Ly`mfCduUwhy-`Qn8|z7H z0qEHZnsaM~l~x8&vIO4x!T~d#80|AfjUP&n`2e?)fY}-vDYay-!?f8X-gcwTaOf6x zQFb+LgUfpGwq#K|j<<1sd0l~FU!i!*czauupbg&*r^Jdb8CedNLXQ7#vQ_#ltODBNZ+A$Yl(KTL$IS<;{fPV zf_?3Wd^vZ>4{rnXyWq{6!2kDXe}5nO=;hlVkAl~Z=X2}5MhE+USGXd$mPQdtm@erP zUnS?Ac5)DmOV4_S!4kiwqhkqi@4vHSuCsi1d>>1+%@Y2PrAk?d62v#+16e{-OFp)O zADwc}Oc~o;`ii-(N8HetTsk46PN)KpXeFdzn0qSpoMmkD_G!|roDoXQscu#{82c3- z5nVB-oI?c}q2lZ;5@c=|{RJ{v4;?&yq%p>iA8mpbN>^rr35v`nnV^w=fG(2thH|WZ zD@0LfZK9M-*up;?1TO}^5f!-maj?H(ctzmJE2GaxZ{LjL@#qtJm7c@5ex5Yk43PVe z>pS^tfxfsH`Yw?1B7n>8dSN-WiB}BPT)HmhQ?B5kAcRX^=KL_GB$J_Qe1;8oNGP#z zU8Q|j!fzRB$91W3J#TgU`G$o%c&EpUB_KCf26Ce$3uwx56WIvpn+6EOK8$Q%gR! zO2jCJIVt0u(Y^*~^##=d4Wy)$YQ-^j0EB&yY2Xm;Il(es{iqdxjD~N3RfY0-lyE!k z?xOpotarb#ES_j&(PeA$O{HV+pyJkI?-0kbTop;z=DfJwh>M}n?dKQ47hL9b;F#L( zhL<9p)y#C{Sce_OXU&+YB8soalqy%oj$2dgepyrO!J67xYl>~fnqr%^rm*~GtSPpf zHO00#E7yi9Zc4@al3IBo$F9awgsyf%hB2pZ=(;rUVXh9p+y!tWQ4@CP2*SsvkA;Ss(`9>I0Nz8N0j%kc=`=B!*xY04~T_I%GZ@MGT&dM2Qn zga*6Pc#07b+OwHF<7<6;2VG<|y0?6tD zxyUf&ULbkMm<*&>sCNU!Wb+QJr@xkcTFEi%IjeElslBTYVRk(s}- zWwODR-y(B6r-hK1eVr{bL#c0Wkr`fG^A?#&1B6WCRLfVVoN z%n^SS01ef z(KIJZ-z-{<2ZLQ#7}zOlUlg^bX0Z@l(L3O3y0X_03U-R<_a+qVnq)6~a>wAI9Dv>q zq$g{E^J!I>a&3jU9a3S;Fe`O(rETi-hgC*>)>L?U>AH5&og(D+TQ+gr11mQCwSh z$c~%ZnSEykQ<-s7CO{HroEcwI;xLc6uHwF3+^dDbjvP z*WAI5^t9^)o@*SC$YrkYam=M~EP-%N88HstFk_6ZelgNW0CR@Pn)zaG89UAv>VC0> zdUzYUvu{JyR@g*smQBdFv%*F#$0llvD{KfcvQ%*IP{|Oc9MWNwdY+^XCr0@Jkdfy| z-wQQkV5vK5V{BrkFMUx&Dx>w<2&G&xJI*QkesPL^;FO)^6ulKr(VOKI{O#jJjVo%M>oqU)6V%}#vb#d$$Dv} zOi4S=EarYOi+S*EeKp^f*$Q)*jWGwS3sdLnn_&*K9CMiMF^ABCOQlLMhtj|mfSR6g z9N$$!QRxaL9gG=aKJ^F}SaB!V@NDF^-^m!f=BdcsGIpCS$o-19OCI9w?(95-Y$e_< z!JB0h_RYL0Ak5i za+I`EM5}Zj8+=K#yJ&7@1QAaL>m3$D>60SmEv!CGA`22({2_@iuQ3i#>Th~+cJ&bz zY7rPJ+kp$H8#UhB6+u}H7}t%{ZY!>$k?QQap-y`FC8jRgKSNh(A21XUuaowmz2U*; z73z1J783W0yMSLqP%X-~cv0Xrd`;1)yAN)`LwOHu7aSvTyB#jGOYJ|?R7zR!E zJd2ufvAij2wT6Nr$O4|IR;$Z%#6WwST_5`{=|F9B6nu;u{hrm|%9wq#!z_x$lTR;% zv5UrE9BBA%q@5h6D`Du(8cuv&!S_O`;f$3rp=ltQ%cY|+_oRh6hf$1uOfe0JA26X@ z8ir&E=gVQ*cprlel3v2NWgihy=v4|plcRTW+_iuI0UgHY=Shr?p|ieD0Rhl|KtFcg z!x-+MpwmbJ`v2oR%v@U$y9>F4Di$?lABh@NZ&sKhp<78wOeJZ3}ScMvmThKmMCs5{s+CLdxJRT9YB~pbWx>} zuZ(ibw$Y+^xO_Q_ASb;mM$W8;or#^jk-|$L;<9xdt=+$F+*{aJep|z;G8DaPgw`Ky zr>k8QS4JBhiffB)bWj*CxzS-`wJE#xAu0xJVozpW17$8uE7EX?O9CCJU|XRptlOk_wVl`%^UAs(!7Bze*oEj7LtQ?EuedAd#6**=62IbBR*T>< zQC};7#X#)I2o{5@YY$;Dp`&^CRtjQK3Tz#Fgmz%#zT4QP($`!O*WpmO^dxnCVoxYh z+F8zkAKKI5+ryznJ=3)Q@}gKMCIq%vUvgdSnhM!*>5wgt_Mu;&?l{xniigOJ2rR!6TsiSQ;B<5igXd8HrDXFPXf!wwfPoq=3 zo!vq>joAg=i)b2F{2d6UQE2fYmc}BnG;Xxq3ZXP6@>vLq#aD`?VP&2Oq+u_uJC4Q; z)y%xHS`>{k=M4k`&{Y#py9#SRAd&;P8p#zEj#f&sP-x1PUIJU=Q-B|^PezB}6wO;9 z7;1M+Y2CInKk$$3(vq<|MAkX0Qv!G90_qxa-(3`ax9B~B6OCeZFEDYd#Dk0d(f{7s2mku?dR38`qYc`#luJa&TtH) zk@}I!dN@3->MM1Z%<6*mOS@+Ntt#G zmjO2LS%mN$af2Y_4r4M1i9XJ_!$d1VFqTaC0uy^o6ERX7i3t{( zEoTNFd2f%8k$d>|-5Ydx(>7H`O*?!~YiS)HoRzeJ*HOZb8^HJLct7%Ryx;B`z*{-q zkBH40!1S9rdYmkGydT--@qV0BEz9hGH991Y;vVKcCBpG2QNpLnrQB1@qdXv?QVw4O zlURqxk4Gb)$1K#p5%XRNPMDxdu*GR*Eye9P$K0Y&9K-z34XCTZKIg8{DpVc|LPJ8|W7L>HxGr_Jw9jVH^jq0x zGq0$7vCC#qfgRXnGt}h69-BKh#!NKW3R`S$<+KPAms}dM{v9?0sV6qrjKF@q`)ei? zH0Rc8N5qzaST~f$afB3D5v{ndh~v7#XFd-DFLbovTzgcKwL5$?ru2q8rm&2!1HwbG zl5(=L*JrU}cHAt#UuT(+hqFv}*Y&fNvrLHEtl9qU#GNP0on=DQHrLV+N@y9^k8~v$ zE)QMDCkAWDJ%SysNN8l}P~kXIxmpKHY>#(ZFmli<7CH`x)H)vgRq*%97|-jyZlg8=Hyj;QdFj5t8!wia&i=(6jeF#;}fXL2|h6=Se=}# zL=6k84jra#u&bgDa?|vE6*-MK%I$&YQPu+_j=)0i79T-J2q3bMAVLIJF%0r%TqI1Y z=%C+=d&%`%*a)Er7z*_}0PQ*G0sS%QqDCL=yj~VZ=k`SPBCB{ZEA(ra^1Hu5dDKVU zL8ehFKDV!Hqii5n1fR2{7p)Xu^4o9~fi(>9hWsHou1rA-_tr((XNdyJ6OXEkSts*C zr|W?%UfOP_oxBc^3LPPSKD3%fw;O}og~S7TX~OVGI<-H>Sz04iTzguZmDL(Jj@V`O zgJA$2>BdmryK^M(jUFt<$R5m`p=IDN*w1^{<8lvWcDzT!#4sFDD`}rb*O#*a99)0s zj8YcaSd7T+L)WO$iaO_g=%n_6%Bj7*hu;6;bPt{Nx>fj4&C)$o)NA->klkANdEvga z_!BEQ)3~Na;WDfoYU75UbOULK?-3RToTz}h*!Xs-tHHiQc`(U84P^G8o> zSq}Pq{0KjOH2C-b0pl3*p5-zD03VA81ONa400ILF0CQ|V0RRA}0RRA%kY7)OFcih# zN%#(Yfv%K4rE81{rMO|{RK|w-*07EdXSMGO1IDrl_~|^yC?Ro1 z*zE+Ug=so<(0>qko+k;;0ziVu9}!ssSQ7GaWH`WK9%cdhK~%!D(t>SSQ13%pu@_cQ z2wOGGv}s|l5B4IAgqP6;N7cZGbY0x#n`&mSc7kwW$8>>@LV$b_pejH#4k;pUlYyOd z^A32|%HC41KMF)2`0LXy-GqGixLB`wxp=8@x!e@b_e;KA;ic=s@5K<7ZKNSoqrbKeE)m_I`S{{rvU%}ABzYC000000s{&Fb8K4y006TA003Q( zZ)<}v5XRpNeh2aWRTKZjhHYT9+qxB2oKChkBiPi+tdWm1!Z~Y~qg8cPpSo&1icSp+bx|`aYX~pe2euT*b(=@ha zOqD+wEbia40RR9WiwFb&000020}23hY|#V&0Ld2s0PR>^Z`(E$eHW1bz?T6jP*NoI zEvy44u_6zu%*g6wQIn`R9s1CpvHiXNl3glxv$~ejWlk1g6JUY1uCC5K zJcqjGbZ{J*jLcG*u-Eg(vc_NgR&`!;x>}k>d{`$zX(s!^p;T5FEJ7^PR{au9HmSDisQ! z93O^gAyhnzRUDdTS~NsfsaeZ2wTvraE+NWM;_~6xR@7eUHXVE0$&e5xA&w$!g zTUblJ5*NxW^EfN6t~?b*s^%ebT-)0-_KfGHD6%3&`kCk~%LV%M@%O{Q6WmmJja$$f zK1qN3^zjd382V?TE&$SDaUSQ{44oW5MYEy~m&+xA$a!`yGzTF%De9&yWu?Rn)yZ5e z+T0KdnGB4tYDb*w+Np~%LUW}`U~(gyS;@CjwNEyVK~wp;2E7qP2mwYt!fi%4w+Yu( z7a~zQR>{rq4`0N72z{y(B$u~{88MzrCNDt2zcw%hs0Q{PqIel+Z8{KkDT`X@TP(`a zkvxYoCt?8w$5ocUH=fEQR+%iqbxGUHL2u4vp35a9Jv9uZ4?0FSI~b+kp@2~9=n<`r zYdOA_-(GHyMN{h_ago(6B`fBjkRx{bad;(k(KBwC! z!0QqDZE|`8k!HnAT##~J?p&iRAiP_nUWZ0Mie%;ea<7H~vZ+s}*E8Os z;abHNDU0-e%65%z_e%8$yRlO5g0oF=Qs%BzSr+N-{*(4>;&iFfMf>`4uPOnnsZSlM z?AtA>MRlGfz;OceeaR%Q>iYKE(Z8+G9;A1y)~=nQe~HU$``3$Iw69%52fD#=o5bl) z(nos0i8p=(z2#O5_-Nm?2=CoVg2RHje|-4t@a2<(5!yCyKE}pGsyIi>IL}d{_l=Rt z76R-JPaer-k;`}{W+C$09p3sOVm4Hj6^Ie1hW7&pC$=yQy(Ro7)K=0z$rk_s0E2|; literal 0 HcmV?d00001 From 7c15265b89769d00569c7cfa9284e37461c3009d Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 21 Feb 2014 10:43:54 -0800 Subject: [PATCH 29/86] set version for final build as ia-web-commons --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d2004a27..d91de709 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ org.archive ia-web-commons - 1.1.1-SNAPSHOT + 1.1.1-LOC jar ia-web-commons From b82f48888276cbddd61f67b0243c0781b5daedb4 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 28 Feb 2014 09:39:14 -0800 Subject: [PATCH 30/86] remove sonatype parent, add distributionManagement back in --- pom.xml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pom.xml b/pom.xml index 918a434c..68523d98 100644 --- a/pom.xml +++ b/pom.xml @@ -1,12 +1,6 @@ 4.0.0 - - org.sonatype.oss - oss-parent - 7 - - org.netpreserve.commons webarchive-commons 1.1.1-SNAPSHOT @@ -239,4 +233,11 @@ - + + + + repository + + ${repository.url} + + From ee3105132883cfcd6e2a1b8b9781897e9417a6f5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 28 Feb 2014 09:45:39 -0800 Subject: [PATCH 31/86] add missing newline back --- pom.xml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 68523d98..7616a17b 100644 --- a/pom.xml +++ b/pom.xml @@ -240,4 +240,6 @@ ${repository.url} - + + + From ffb68b8a87b6656f08a31f5bc2438a5d9d360645 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 28 Feb 2014 12:07:14 -0800 Subject: [PATCH 32/86] pom.xml customizations to allow IA and other 3rd parties to tag builds of their branches with a special version number, and deploy to a custom repository with the maven command line switch -Drepository.url=... --- pom.xml | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7616a17b..53095b83 100644 --- a/pom.xml +++ b/pom.xml @@ -1,9 +1,15 @@ 4.0.0 + + org.sonatype.oss + oss-parent + 7 + + org.netpreserve.commons webarchive-commons - 1.1.1-SNAPSHOT + 1.1.1-${build.tag}SNAPSHOT jar webarchive-commons @@ -47,6 +53,7 @@ UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss + @@ -233,13 +240,17 @@ - repository ${repository.url} + + repository + + ${repository.url} + - + From d81936cb8e4ab06f9ec7a05a763cfaa5d543b527 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Fri, 28 Feb 2014 12:24:51 -0800 Subject: [PATCH 33/86] configure properties so sonatype repositories are defaults for distributionManagement --- pom.xml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 53095b83..9c8698c7 100644 --- a/pom.xml +++ b/pom.xml @@ -54,6 +54,9 @@ ${maven.build.timestamp} yyyyMMddhhmmss + + https://oss.sonatype.org/service/local/staging/deploy/maven2/ + https://oss.sonatype.org/content/repositories/snapshots/ @@ -243,13 +246,11 @@ repository - ${repository.url} - repository - - ${repository.url} + snapshotRepository + ${snapshotRepository.url} From dbb2ba5ac8a6001afc11baf911a7ac4dcfaaca27 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 6 Mar 2014 21:25:43 +0000 Subject: [PATCH 34/86] [maven-release-plugin] prepare release webarchive-commons-1.1.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 9c8698c7..740f30b7 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1-${build.tag}SNAPSHOT + 1.1.1 jar webarchive-commons @@ -53,7 +53,7 @@ UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss - + https://oss.sonatype.org/service/local/staging/deploy/maven2/ https://oss.sonatype.org/content/repositories/snapshots/ From 45bc3f5827ee528e71be86f6c214fd46f466dfbf Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 6 Mar 2014 21:25:48 +0000 Subject: [PATCH 35/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 740f30b7..0be3ebe1 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1 + 1.1.2-SNAPSHOT jar webarchive-commons From e564b044ed1f70f67ef89a2da9dd6fe433de955d Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 6 Mar 2014 21:34:26 +0000 Subject: [PATCH 36/86] [maven-release-plugin] rollback the release of webarchive-commons-1.1.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 0be3ebe1..9c8698c7 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.2-SNAPSHOT + 1.1.1-${build.tag}SNAPSHOT jar webarchive-commons @@ -53,7 +53,7 @@ UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss - + https://oss.sonatype.org/service/local/staging/deploy/maven2/ https://oss.sonatype.org/content/repositories/snapshots/ From 13db0ccce48ac92773c06833552a54ebab6e8ea7 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 6 Mar 2014 21:42:12 +0000 Subject: [PATCH 37/86] Added a changes file. --- CHANGES.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 CHANGES.md diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..d5c720ab --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,6 @@ +1.1.1 +----- + +* Renamed from commons-webarchive to webarchive-commons (https://github.com/iipc/webarchive-commons/pull/8) +* Cope with malformed GZip extra fields as produced by wget 1.14 (https://github.com/iipc/webarchive-commons/pull/10) +* Switch to httpcomponents, and add IA deployment information. (https://github.com/iipc/webarchive-commons/pull/11) From 9f749399b1eaf3488ed5379126d2e7d479c3b7f6 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 6 Mar 2014 22:04:03 +0000 Subject: [PATCH 38/86] Added aliases for ID's too, but this would probably be much neater as a build profile. --- pom.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 9c8698c7..172b6310 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,9 @@ yyyyMMddhhmmss + sonatype-nexus-staging https://oss.sonatype.org/service/local/staging/deploy/maven2/ + sonatype-nexus-snapshots https://oss.sonatype.org/content/repositories/snapshots/ @@ -245,11 +247,11 @@ - repository + ${repository.id} ${repository.url} - snapshotRepository + ${snapshotRepository.id} ${snapshotRepository.url} From 4623d26af6131d9f161fb1d32e19ec8b4705eddc Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 10:13:18 +0000 Subject: [PATCH 39/86] [maven-release-plugin] prepare release webarchive-commons-1.1.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cfd201b0..85c30fc9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1-SNAPSHOT + 1.1.1 jar webarchive-commons From 9a77285299d54b8e0c73a9006fab729d8c920dd3 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 10:14:22 +0000 Subject: [PATCH 40/86] [maven-release-plugin] rollback the release of webarchive-commons-1.1.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 85c30fc9..cfd201b0 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1 + 1.1.1-SNAPSHOT jar webarchive-commons From e1b40a47147c89e66dfd62d9330e211e62de1ec2 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 10:20:37 +0000 Subject: [PATCH 41/86] [maven-release-plugin] prepare release webarchive-commons-1.1.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cfd201b0..85c30fc9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1-SNAPSHOT + 1.1.1 jar webarchive-commons From ede9097dc04e35bb3e034c2f35aea96e5f5f2761 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 13:35:01 +0000 Subject: [PATCH 42/86] [maven-release-plugin] prepare release webarchive-commons-1.1.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 172b6310..d4de13a6 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1-${build.tag}SNAPSHOT + 1.1.1 jar webarchive-commons @@ -53,7 +53,7 @@ UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss - + sonatype-nexus-staging https://oss.sonatype.org/service/local/staging/deploy/maven2/ From 08175bfbb124a7f6ebaeb1b2095804174f5fde88 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 13:37:26 +0000 Subject: [PATCH 43/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d4de13a6..52cef01e 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.1 + 1.1.2-SNAPSHOT jar webarchive-commons From 08b944291d5faed4001714a8ced78ca354933207 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 21:53:35 +0000 Subject: [PATCH 44/86] Added tests and initial fix for opening uncompressed WARCs This initial unit test just tests that the first record from compressed and uncompressed WARCs can be accessed. --- .../archive/io/warc/WARCReaderFactory.java | 10 +- .../io/warc/WARCReaderFactoryTest.java | 34 + .../archive/format/warc/IAH-urls-wget.warc | 3156 +++++++++++++++++ 3 files changed, 3197 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java create mode 100644 src/test/resources/org/archive/format/warc/IAH-urls-wget.warc diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index 9c6c7e77..a02adf03 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -103,9 +103,13 @@ public static ArchiveReader get(final String s, final InputStream is, protected ArchiveReader getArchiveReader(final String f, final InputStream is, final boolean atFirstRecord) throws IOException { - // For now, assume stream is compressed. Later add test of input - // stream or handle exception thrown when figure not compressed stream. - return new CompressedWARCReader(f, is, atFirstRecord); + // Check if it's compressed: + // TODO Currently relies on the file extension, but this should all really sniff the content properly. + if( f.endsWith(".gz") ) { + return new CompressedWARCReader(f, is, atFirstRecord); + } else { + return new UncompressedWARCReader(f, is); + } } public static WARCReader get(final URL arcUrl, final long offset) diff --git a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java new file mode 100644 index 00000000..25028797 --- /dev/null +++ b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java @@ -0,0 +1,34 @@ +package org.archive.io.warc; + +import java.io.FileInputStream; +import java.io.IOException; + +import org.archive.format.warc.WARCConstants; +import org.archive.format.warc.WARCConstants.WARCRecordType; +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import junit.framework.TestCase; + +public class WARCReaderFactoryTest extends TestCase { + + // Test files: + String[] files = new String[] { + "src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz", + "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc" + }; + + public void testGetStringInputstreamBoolean() throws IOException { + // Check the test files can be opened: + for( String file : files ) { + FileInputStream is = new FileInputStream(file); + ArchiveReader ar = WARCReaderFactory.get(file, is, true); + ArchiveRecord r = ar.get(); + String type = (String) r.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE); + // Check the first record comes out as a 'warcinfo' record. + assertEquals(WARCRecordType.warcinfo.name(), type); + } + } + + +} diff --git a/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc new file mode 100644 index 00000000..1125fe98 --- /dev/null +++ b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc @@ -0,0 +1,3156 @@ +WARC/1.0 +WARC-Type: warcinfo +Content-Type: application/warc-fields +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-Filename: IAH-urls-wget.warc.gz +WARC-Block-Digest: sha1:I7UCIFZZDYO4O55ZOG6X5PRMVWMPZWMJ +Content-Length: 235 + +software: Wget/1.14 (darwin11.4.0) +format: WARC File Format 1.0 +conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf +robots: classic +wget-arguments: "-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPCUG5OU46Y5YHPTFCZLZV465AFPFJYY +Content-Length: 126 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:06Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:3L4DY55OVKT2IEHZEKOSIXRCQKJ7MNIE +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 435 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:06 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/robots.txt +Expires: Tue, 22 Oct 2013 03:53:06 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RQBBTMHS45XDYLYGRCT7YQ7P3UORCEQU +Content-Length: 122 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:ORAXOWRNZAEDKBOJUW2PYNLDX2LRDCBK +WARC-Payload-Digest: sha1:ARS5OJBVROJW62M7JMB3BCHEUUEBVMJK +Content-Type: application/http;msgtype=response +Content-Length: 1014 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/plain +Content-Length: 727 +Last-Modified: Mon, 21 Oct 2013 18:55:18 GMT +Connection: keep-alive +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 +Accept-Ranges: bytes + + +Sitemap: http://archive.org/sitemap/sitemap.xml + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## + + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Disallow: /control/ +Disallow: /report/ + + +User-agent: * +Disallow: /control/ +Disallow: /report/ +Disallow: /details/goldenbull2007john/ +Disallow: /stream/goldenbull2007john/ +Disallow: /download/goldenbull2007john/ +Disallow: /14/items/goldenbull2007john/goldenbull2007john_djvu.txt + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:GCYSQOYQGB7JDB57XMUYWFQERAKMNEQQ +Content-Length: 116 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/ +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:WDSM4DEMHGZEOPEG2HMQAIUBQJ6WRRN5 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPMG7AGNNEDLYK5UOOZLLRHPI4JLEC3U +Content-Length: 121 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RYQILVXCYAVUO7TRRO7CQ7VYKSD4COHM +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:08 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:09Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:G6KJJNG7G7HVRFGJJZ7ELDMO2ZZEX4WR +Content-Length: 112 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:09Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:VRAITOLIHCUNC5A7LDUBFHDSYQCUO7JM +WARC-Payload-Digest: sha1:WDT537KNDSUIRPB7R56KBDX3K77IR7W3 +Content-Type: application/http;msgtype=response +Content-Length: 30849 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:09 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Set-Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87; path=/; domain=.archive.org + +7756 + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
+ Universal Access To All Knowledge
+ + + + + + + + + + + + + +
+ Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
+ + + +
+ + + + + + + + + + + + + + + + + +
+
+ Search: + + + + + + + + Advanced Search +
+
+ + Anonymous User + + (login + or + + join us) + + +
Upload
+
+ +
+ + + + +
+ + + +
+
+

+
+ 361 billion pages +
+ Web +

+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + more info +
+
+
+
+ + +
+

+
+ See recent additions in RSS +
+ Welcome to the Archive +

+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+ + + +
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Video +
+ + + 1,411,240 movies + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (movies pick) +
+
+ filmcollectief-00-060a
+ + Unknown movie, found in a cannister which should contain something elso. So if someone can help me... +
+
+ +

Recent Review

+
+
+ The Stars Look Down (1940)
Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by band) +
+
+ Live Music +
+ + + 121,538 concerts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (etree pick) +
+
+ Grateful Dead Live at Jai-Alai Fronton on...
+ + Set 1 Ramble On Rose Black Throated Wind Mississippi Half-Step Uptown Toodeloo Beat It On Down The... +
+
+ +

Recent Review

+ + +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Audio +
+ + + 1,744,979 recordings + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (audio pick) +
+
+ Various Artists - phase01 [hi001]
+ + Our first release! This compilation includes all the original Heavy Industries collaborators... +
+
+ +

Recent Review

+
+
+ IAA Top 40 Countdown
Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Texts +
+ + + 5,325,972 texts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (texts pick) +
+
+ Sommaire du plaidoyé. Pour les abbé, prieur,...
+ + 7 p. ; in-2 Cote du document : FOL Z 588 INV 518 FA (P.29) +
+
+ +

Recent Review

+ + +
+
+
+ + + + + + + +
+
+

Most recent posts (write a post by going to a forum) more...

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SubjectPosterForumRepliesDate
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
+ Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
+ Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
+ band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
+ Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
+ Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
+ Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
+
+
+
+ + + + + + + +

+

+ Terms of Use (10 Mar 2001) +

+ + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:10Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:DRAV5TKA4765LYFANCFHVNKEWGLRKUMM +Content-Length: 171 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/index.php +WARC-Date: 2013-10-21T21:53:10Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:YXATLZCFORQS33ZVB3M3SMJY3S2Z6QUD +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:11 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:11Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:D53DT5RU7NGDFBHOJOKLF56UG32P7AYF +Content-Length: 167 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:11Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RS3Z4Z3NZ6BS6ANPCRKWA43E5O5YPVG6 +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:12Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:HRBVH5XQCN2OWGMQ7THZ675AZ4L4SEWV +Content-Length: 158 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:12Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:24OHCKJGVHH4GDPS65MSGZAS2FWN6U44 +WARC-Payload-Digest: sha1:7DW5UIXJ5NGLWNQ5WYE7AB4E5L74X275 +Content-Type: application/http;msgtype=response +Content-Length: 30679 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:13 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +76fb + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
+ Universal Access To All Knowledge
+ + + + + + + + + + + + + +
+ Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
+ + + +
+ + + + + + + + + + + + + + + + + +
+
+ Search: + + + + + + + + Advanced Search +
+
+ + Anonymous User + + (login + or + + join us) + + +
Upload
+
+ +
+ + + + +
+ + + +
+
+

+
+ 361 billion pages +
+ Web +

+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + more info +
+
+
+
+ + +
+

+
+ See recent additions in RSS +
+ Welcome to the Archive +

+
+ The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
+
+
+ + + +
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Video +
+ + + 1,411,240 movies + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (movies pick) +
+
+ Baby nursery (reel 5)
+ + Description: Amateur movie of the baby nursery at the Peoples Temple Agricultural Mission in... +
+
+ +

Recent Review

+
+
+ The Stars Look Down (1940)
Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by band) +
+
+ Live Music +
+ + + 121,538 concerts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (etree pick) +
+
+ Grateful Dead Live at Uptown Theater on 1979-12-05
+ + Alabama Getaway-> Greatest Story Ever Told, Dire Wolf, Me & My Uncle-> Big River, Cold Rain & Snow,... +
+
+ +

Recent Review

+ + +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Audio +
+ + + 1,744,979 recordings + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (audio pick) +
+
+ [Miga_v16] "Nice summer"
+ + extra video for audio-release [Miga32] Rominger "Music for camping" [Miga_v16] "Nice summer" video:... +
+
+ +

Recent Review

+
+
+ IAA Top 40 Countdown
Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

+
+
+ +
+
+
+
+

+
+ Browse +
+ (by keyword) +
+
+ Texts +
+ + + 5,325,972 texts + + +

+ +
+

+ + Curator's Choice + + (more) + + +

+
+ (texts pick) +
+
+ Outlines of European history
+ + pt. 1. Earliest man...the Orient, Greece, and Rome; Europe from the break-up of the Roman Empire to... +
+
+ +

Recent Review

+ + +
+
+
+ + + + + + + +
+
+

Most recent posts (write a post by going to a forum) more...

+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SubjectPosterForumRepliesDate
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
+ Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
+ Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
+ band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
+ Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
+ Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
+ Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
+ Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
+
+
+
+ + + + + + + +

+

+ Terms of Use (10 Mar 2001) +

+ + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:6PZOFZFFZRY7XJOJ2325DNXHG7LEP3G6 +Content-Length: 178 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:JN3EE5W7CY5PSNTEJ7A6ORMLNNMNWS3J +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 441 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/logoc.jpg +Expires: Tue, 22 Oct 2013 03:53:14 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:Q6EXPKA6ECDPIEX3MXCWAH2S4JEO4ZHI +Content-Length: 174 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:6ESWUQAIQPTXYPDSKA2NGLDTHEFS6FLK +WARC-Payload-Digest: sha1:UZY6ND6CCHXETFVJD2MSS7ZENMWF7KQ2 +Content-Type: application/http;msgtype=response +Content-Length: 1951 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: image/jpeg +Content-Length: 1662 +Last-Modified: Wed, 13 Feb 2013 16:33:25 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:14 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<) +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I7TKK5MVPSOGRVZYP6L37NTE35F25HJQ +Content-Length: 190 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:H5UU46OLZY33AQRBCM7R4BKJBMMAPKHB +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 453 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/go-button-gateway.gif +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RDRO3REQIV4EDZDNVASSXC6W72SXUQSP +Content-Length: 186 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:OV6P7Y4LCKQ6R7B5EWRGUHFYUGNN2NV7 +WARC-Payload-Digest: sha1:72MRTMYOLSPFXGOTSETEJKAANDRDIE5O +Content-Type: application/http;msgtype=response +Content-Length: 1412 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: image/gif +Content-Length: 1124 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:15 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +GIF89aXWSݡQ͗KʔJYWמOZ_ozăƄŊERNjFŒďӋE~?WvݑH͆Cj׆CۉEu;r9L컓Xߔ]}yw@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C; + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I5U6LAWZPGDDZOSTJEHZT2BWCOPFKDLV +Content-Length: 177 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:5CUEYG4YEO3H5SKHN4UGZDLKCDXJTP2W +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 440 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/star.png +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:5OLRWKI5GCDS6JF4CCKLWJM23GJZBQOQ +Content-Length: 173 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:LUMN34VHUXETNH36JOWNHIBNR4DOO2I5 +WARC-Payload-Digest: sha1:CECJCMQ6SXDRBZX5COV7RTTQTHTY653H +Content-Type: application/http;msgtype=response +Content-Length: 1304 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: image/png +Content-Length: 1016 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:16 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +PNG + + IHDRagAMA asRGB cHRMz&u0`:pQ<bKGDC pHYs   vpAg\ƭIDAT8˕yHAzo]4BJۥ\)[eJ̲RH5,(%K4R+s. ="йNMWZ#3ٌQ}s"~GS.DN x.5B v~cQU2\(~_5s8jW)-a]|@['Ի[{^jOO%fCy $8f.?Z(&%мǮLT3Rv ;nlCy)E/ Һ`PTeru8|$>_@?J'"ڱQm~@&łǠOR 4Ųn]dBŶORPb%tEXtdate:create2012-03-28T02:07:14+00:005%tEXtdate:modify2012-03-25T15:54:33+00:00oIENDB` + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CTALP42WLFIFYU44MXGJNNLYA45BUQVG +Content-Length: 189 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:A24EOC2MZA4SHKQTCFE5RWLN3EG3WSO2 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 452 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/services/collection-rss.php +Expires: Tue, 22 Oct 2013 03:53:16 GMT +Cache-Control: max-age=21600 + + +302 Found + +

302 Found

+
nginx/1.1.19
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:VD75DH7UF5EHTROCENIJAVOE6HWRMYAL +Content-Length: 185 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:GVOLA26JHJVANCRZ545PTRNG6HMT7PWW +WARC-Payload-Digest: sha1:CBRMZGMT7IQRUCDW23ABAL6RN7H6MGIE +Content-Type: application/http;msgtype=response +Content-Length: 78007 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/xml;charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +7fa0 + + + + https://archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + info@archive.org (Info Box) + Mon, 21 Oct 2013 21:48:07 GMT + + https://archive.org/images/glogo.png + Internet Archive + https://archive.org + + + gov.uscourts.mnd.126519 + gov.uscourts.mnd.126519 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.mnd.126519&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.mnd.126519 + https://archive.org/details/gov.uscourts.mnd.126519 + Mon, 21 Oct 2013 19:50:15 GMT + texts/usfederalcourts + + 03337F0F2C418DC4A098F37A8F17A528536B75A2 + + + gov.uscourts.ded.45655 + gov.uscourts.ded.45655 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ded.45655&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Image Container PDF, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.ded.45655 + https://archive.org/details/gov.uscourts.ded.45655 + Mon, 21 Oct 2013 18:08:34 GMT + texts/usfederalcourts + + 3E074A11E37C54C6725043593A22AAFC4A71EAB8 + + + PC Longplay 319 Spec Ops The Line + PC Longplay 319 Spec Ops The Line + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=PC_Longplay_319_Spec_Ops_The_Line&mediatype=movies&collection=opensource_movies"/><p>This game was a fairly decent shooter. Can't say I would play it again but it had a worth while story to play through with the ability to choose your own destiny..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Matroska, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + Mon, 21 Oct 2013 15:19:16 GMT + movies/opensource_movies + + + + + 38F8748912CF1483DA9505B3D41C65D76990B4A4 + + + gov.uscourts.ohsd.166725 + gov.uscourts.ohsd.166725 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ohsd.166725&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata</p> + https://archive.org/details/gov.uscourts.ohsd.166725 + https://archive.org/details/gov.uscourts.ohsd.166725 + Mon, 21 Oct 2013 14:52:49 GMT + texts/usfederalcourts + + 1CFB2CEA490A23F960F63FE4B2996FC5073A752B + + + George Griffin Pt 5 + George Griffin Pt 5 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-315234-georgegriffinpt5&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-315234-georgegriffinpt5 + https://archive.org/details/scm-315234-georgegriffinpt5 + Mon, 21 Oct 2013 08:32:22 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/SeattleCommunityMedia + + + + + + + History + + A4CAB132D3017E520D5D2BCFC40AE162C8FFEA5A + + + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=3dbo_nnassil_el3asr&mediatype=audio&collection=opensource_audio"/><p>No description available.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/3dbo_nnassil_el3asr + https://archive.org/details/3dbo_nnassil_el3asr + Mon, 21 Oct 2013 07:55:35 GMT + audio/opensource_audio + + + + 245CEC3E026E543DB855113D5DA639411258594F + + + gov.uscourts.dcd.153973 + gov.uscourts.dcd.153973 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.dcd.153973&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.dcd.153973 + https://archive.org/details/gov.uscourts.dcd.153973 + Mon, 21 Oct 2013 06:45:41 GMT + texts/usfederalcourts + + 5C897BB4B02ADE078AE7C399EA38897EEC76C265 + + + فلم 19 + فلم 19 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=MezaaGe_234F067D-&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG4, Metadata, Ogg Video, Thumbnail</p> + https://archive.org/details/MezaaGe_234F067D- + https://archive.org/details/MezaaGe_234F067D- + Mon, 21 Oct 2013 05:36:21 GMT + movies/opensource_movies + + + + + EE2E9BF9883DC1DECD99A039F9B2CCB2EB87C56F + + + Katsaus Journal + Katsaus Journal + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Katsaus_Journal&mediatype=movies&collection=opensource_movies"/><p>Finnish continuation war newsreel.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/Katsaus_Journal + https://archive.org/details/Katsaus_Journal + Mon, 21 Oct 2013 05:19:10 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + movies/opensource_movies + + + + + + + 1941-1945 + + 71801FCE2832C222C013CF73A06054D012F17025 + + + AwPT - SHADE + AwPT - SHADE + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368707-awpt-shade&mediatype=movies&collection=SeattleCommunityMedia"/><p>  The first 57 minutes of a great  new 1:33:30 minute film  - available here: http://12160.info/video/shade-the-motion-picture-full-video-documentary More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368707-awpt-shade + https://archive.org/details/scm-368707-awpt-shade + Mon, 21 Oct 2013 04:46:41 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Documentary + + 96BAB900271137D5B85E4CD48744C6BF43F2FAC5 + + + When Trouble Comes My Way - Part 1 + When Trouble Comes My Way - Part 1 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WhenTroubleComesMyWay-Part1_289&mediatype=movies&collection=opensource_movies"/><p>Sermon delivered by Pastor David Vos at Lake Palms Community Church, 380 Fulton Drive SE, Largo, FL 33771 on October 20, 2013..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + Mon, 21 Oct 2013 04:34:48 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/opensource_movies + + + + Pastor David Vos, Sermon, Lake Palms Community Church + + 02D291F021ED97C7B8AAC171695C5488C6E8B740 + + + Appreciation + Appreciation + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368703-appreciation&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368703-appreciation + https://archive.org/details/scm-368703-appreciation + Mon, 21 Oct 2013 04:17:17 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Self improvement + + F9CA049C2858C148EE3A81011A48AC5248708D4B + + + دروس عامة للشيخ مصطفى العدوي + دروس عامة للشيخ مصطفى العدوي + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=4-islamic-1151&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Ogg Vorbis, Thumbnail, VBR MP3, Windows Media, h.264</p> + https://archive.org/details/4-islamic-1151 + https://archive.org/details/4-islamic-1151 + Mon, 21 Oct 2013 04:15:51 GMT + movies/opensource_movies + + + + + + + + C9F0489AC735484AE388B39D1E509B5FC0AE2E9F + + + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=cw2003-09-15.flac16&mediatype=etree&collection=ChrisWhitley"/><p>Chris Whitley Hanbury Ballroom, Brighton 15 September 2003 Bandridge BMC530 stereo condenser mic > Sony MZ-R91 MD > Philips CDR-760 > EAC >FLAC disc one: 01. new lost world 02. to joy 03. crystal ship 04....</p><p>This item belongs to: etree/ChrisWhitley.</p><p>This item has files of the following types: Archive BitTorrent, Checksums, Flac, Flac FingerPrint, Metadata, Ogg Vorbis, Text, VBR MP3</p> + https://archive.org/details/cw2003-09-15.flac16 + https://archive.org/details/cw2003-09-15.flac16 + Mon, 21 Oct 2013 04:12:23 GMT + etree/ChrisWhitley + + + + 0232B264B2349F7D97B6FD4656D8A42342918DC3 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418097-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418097-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67A8F0DB3FDC8602264446066F98925EBB5DDC46 + + + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418098-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418098-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 344D9552AE1A4B7655275C3556123B8471722045 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418099-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418099-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:01:56 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 68CE81DE137E079A4566C4FCF3ED4C5A8EC82082 + + + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikisource-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikisource that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikisource-20131019 + https://archive.org/details/incr-svwikisource-20131019 + Mon, 21 Oct 2013 04:01:54 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikisource, Swedish, Wikisource + + D5AAE1198D3729DF6C3D4FC61130F966863BA389 + + + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswiktionary-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wiktionary that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswiktionary-20131020 + https://archive.org/details/incr-eswiktionary-20131020 + Mon, 21 Oct 2013 04:01:49 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswiktionary, Spanish, Wiktionary + + 333665884E6DA297C78B0A10A7B9A729210D8DF4 + + + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418100-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418100-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:45 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + D6D73BBC1BFD646AF32AF7EC187565304C7FCA89 + + + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + <img width="160" style="padding-rig +8000 +ht:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516&mediatype=image&collection=coverartarchive"/><p>No description available.</p><p>This item belongs to: image/coverartarchive.</p><p>This item has files of the following types: Archive BitTorrent, JPEG, JPEG 250px Thumb, JPEG 500px Thumb, JPEG Thumb, JSON, Metadata, Metadata Log, MusicBrainz Metadata</p> + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + Mon, 21 Oct 2013 04:01:38 GMT + image/coverartarchive + + 37636FB9D3094A1F605BB78DB6B786F8ABB29BCC + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418101-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418101-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:34 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 258C467633F6C486084321E1E9542D3D1478A539 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418102-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418102-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 8D8869D528E2E6622287400A580DC388A58CAEF4 + + + alexa20131017-24 + alexa20131017-24 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=alexa20131017-24&mediatype=web&collection=alexacrawls"/><p>Alexa crawl.</p><p>This item belongs to: web/alexacrawls.</p><p>This item has files of the following types: Metadata</p> + https://archive.org/details/alexa20131017-24 + https://archive.org/details/alexa20131017-24 + Mon, 21 Oct 2013 04:01:09 GMT + web/alexacrawls + crawldata + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418103-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418103-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:06 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F9687CF40B8065FB8AF30042BFBB2ED3F62ED6F4 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418104-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418104-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:53 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + CDCB99998DB6C9E281F90B69C7315F3192C9D8D9 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418105-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418105-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:39 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F85C5F1543BCB96FF4CE64F33C704095934452AC + + + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikivoyage-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikivoyage that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikivoyage-20131020 + https://archive.org/details/incr-eswikivoyage-20131020 + Mon, 21 Oct 2013 04:00:33 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikivoyage, Spanish, Wikivoyage + + F2E6A9CCC64109E4A1F1548F23A5AC55AE317E84 + + + Eso No 16 10 13 Nota Gari + Eso No 16 10 13 Nota Gari + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=EsoNo161013NotaGari&mediatype=audio&collection=opensource_audio"/><p>Programa Eso no!! nota con Gary de Mr. White!!!.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/EsoNo161013NotaGari + https://archive.org/details/EsoNo161013NotaGari + Mon, 21 Oct 2013 04:00:32 GMT + audio/opensource_audio + + + "audios eso no" + + 97ACF3820CD8DEE4DBE7AD0F46AA00EAE8F735BB + + + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikiquote-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikiquote that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikiquote-20131019 + https://archive.org/details/incr-svwikiquote-20131019 + Mon, 21 Oct 2013 04:00:29 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikiquote, Swedish, Wikiquote + + 43439D621A3C1BB74D39948590A9193B2D165D4B + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418106-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418106-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:25 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 4A0A6C8BAECB9A545B3BDC40465ABB74442319A0 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418107-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418107-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:13 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F76FD396A322CC62A4197DE088CA6DCF833558A7 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418108-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418108-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + E2E238B9DF57CC47B4982E375A0F89E75CA9EAC2 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418109-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418109-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:47 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 78B49781C5AADB75726B576A00313D2C145074AE + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418110-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418110-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:35 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 221C3ED39EA1046DA1442C242B2EDF35114991FE + + + Silo 2.2 + Silo 2.2 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Silo2.2_201310&mediatype=texts&collection=opensource_media"/><p>Unbiased reviews of the Arc'teryx Silo 50 winter pack by real people. Silo 2.2 is now available If you have not already updated, you can follow the link below to download the latest version, which is a free upgrade for all Silo 2 owners....</p><p>This item belongs to: texts/opensource_media.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Windows Executable</p> + https://archive.org/details/Silo2.2_201310 + https://archive.org/details/Silo2.2_201310 + Mon, 21 Oct 2013 03:59:34 GMT + texts/opensource_media + Silo 2.2 + + 2E63B46A804C6EDF3A3200D0202E1DE357B2FF12 + + + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder&mediatype=movies&collection=opensource_movies"/><p>Season 3 Episode 17.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + Mon, 21 Oct 2013 03:59:26 GMT + movies/opensource_movies + + + + animation + + 7C9CAF48A8554A9E68AB6F7B25FD2EC2A93C8632 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418111-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418111-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:21 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 0E9E3AAD85082031C2525F7A9568048F8B1E8E7E + + + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikiversity-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikiversity that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikiversity-20131020 + https://archive.org/details/incr-eswikiversity-20131020 + Mon, 21 Oct 2013 03:59:17 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikiversity, Spanish, Wikiversity + + AD372EA58CF3006D8471BA41026C62BD1861078A + + + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418112-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418112-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 012A94D6BD2802A88F233C09E8C70437682D2131 + + + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikinews-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikinews that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikinews-20131019 + https://archive.org/details/incr-svwikinews-20131019 + Mon, 21 Oct 2013 03:59:05 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikinews, Swedish, Wikinews + + E501281C8B708DE164F23964725A87773F582478 + + + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WIDE-20131020220354-crawl422&mediatype=web&collection=wide00009"/><p>Internet Archive crawldata from Webwide Crawl, captured by crawl422.us.archive.org:wide from Sun Oct 20 22:03:54 PDT 2013 to Sun Oct 20 16:43:21 PDT 2013..</p><p>This item belongs to: web/wide00009.</p><p>This item has files of the following types: Item CDX Index, Item CDX Meta-Index, Metadata, Text, WARC CDX Index, Web ARChive GZ</p> + https://archive.org/details/WIDE-20131020220354-crawl422 + https://archive.org/details/WIDE-20131020220354-crawl422 + Mon, 21 Oct 2013 03:59:01 GMT + web/wide00009 + crawldata + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + <img width="160" style="padding-right:3px;floa +302d +t:left;" src="https://archive.org/services/get-item-image.php?identifier=418113-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418113-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:54 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 96238A946E099625F0281BEEF4A336EF1A1C447F + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418114-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418114-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:41 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67E3C7FAF4AC13E3E8AD70AEBFDBDED581CE4BA0 + + + gov.uscourts.flmd.283032 + gov.uscourts.flmd.283032 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.flmd.283032&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.flmd.283032 + https://archive.org/details/gov.uscourts.flmd.283032 + Mon, 21 Oct 2013 03:58:35 GMT + texts/usfederalcourts + + 6252A903C01875100635D5011543196896010583 + + + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418115-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418115-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:26 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 98F1A74C04D215AC2175D27B24C73C24E1F77A43 + + + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418116-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418116-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:12 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 1A93DFBE2B84324B987F7C3EF91FDC3299E5BB2A + + + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikisource-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikisource that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikisource-20131020 + https://archive.org/details/incr-eswikisource-20131020 + Mon, 21 Oct 2013 03:58:01 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikisource, Spanish, Wikisource + + BF69AA565071874D71ABB5D3D4FECFA85448C261 + + + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418117-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418117-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 6B645B4FBAD190864C10A471F96FA25AA67B88EE + + + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__&mediatype=texts&collection=opensource"/><p>Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 Romanian Book. Aceasta carte face parte din Colectia 10.000 de carti. Puteti downloada aici un fisier zip cu intreaga colectie 10.000 de carti (2 GB) sau puteti alege doar cartea dorita....</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF, Word Document</p> + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + Mon, 21 Oct 2013 03:57:59 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + texts/opensource + 10000 carti, Alain Le Bussy, Fata Care Se Temea De Apa 0.9 07, carti, carte, online, pdf, download, romana, carti in limba romana, romania, romanian, carti pdf, Books in Romanian language, ro-books, kjb, ftp.kjb.ro, 10000, 10000 carti + + 6F89BFC6BE691DC13FE417E2B09D81BE90DA24EA + + + + +0 + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/MANIFEST.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:MDYPVAS3DVOGNNDRGXW6RHSIQJHDUL6H +Content-Type: text/plain +Content-Length: 48 + + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget_arguments.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:4RRGL67PFUKCW27T2JYSPL7U2WU3UVAP +Content-Type: text/plain +Content-Length: 54 + +"-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget.log +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:WTDCWU2CDUXK2GWFEVX7NWJNCTC25MVC +Content-Type: text/plain +Content-Length: 5647 + +Opening WARC file ‘IAH-urls-wget.warc.gz’. + +urls.txt: Invalid URL dns:www.archive.org: Unsupported scheme ‘dns’ +--2013-10-21 22:53:06-- http://www.archive.org/robots.txt +Resolving www.archive.org... 207.241.224.2 +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/robots.txt [following] + + 0K 100% 9.03M=0s + +--2013-10-21 22:53:06-- http://archive.org/robots.txt +Resolving archive.org... 207.241.224.2 +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 727 [text/plain] +Saving to: ‘STDOUT’ + + 0K 100% 826K=0.001s + +2013-10-21 22:53:07 (826 KB/s) - written to stdout [727/727] + +--2013-10-21 22:53:07-- http://www.archive.org/ +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 19.2M=0s + +--2013-10-21 22:53:07-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:08-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 9.59M=0.003s + +2013-10-21 22:53:10 (9.59 MB/s) - written to stdout [30550] + +--2013-10-21 22:53:10-- http://www.archive.org/index.php +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 10.2M=0s + +--2013-10-21 22:53:11-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:11-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 96.9K=0.3s + +2013-10-21 22:53:14 (96.9 KB/s) - written to stdout [30459] + +--2013-10-21 22:53:14-- http://www.archive.org/images/logoc.jpg +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/logoc.jpg [following] + + 0K 100% 15.4M=0s + +--2013-10-21 22:53:14-- http://archive.org/images/logoc.jpg +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1662 (1.6K) [image/jpeg] +Saving to: ‘STDOUT’ + + 0K . 100% 122M=0s + +2013-10-21 22:53:14 (122 MB/s) - written to stdout [1662/1662] + +--2013-10-21 22:53:14-- http://www.archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/go-button-gateway.gif [following] + + 0K 100% 11.0M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1124 (1.1K) [image/gif] +Saving to: ‘STDOUT’ + + 0K . 100% 97.4M=0s + +2013-10-21 22:53:15 (97.4 MB/s) - written to stdout [1124/1124] + +--2013-10-21 22:53:15-- http://www.archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/star.png [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1016 [image/png] +Saving to: ‘STDOUT’ + + 0K 100% 74.5M=0s + +2013-10-21 22:53:16 (74.5 MB/s) - written to stdout [1016/1016] + +--2013-10-21 22:53:16-- http://www.archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/services/collection-rss.php [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:16-- http://archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/xml] +Saving to: ‘STDOUT’ + + 0K .......... .......... .......... .......... .......... 54.2K + 50K .......... .......... ..... 92.7K=1.2s + +2013-10-21 22:53:18 (63.2 KB/s) - written to stdout [77773] + +FINISHED --2013-10-21 22:53:18-- +Total wall clock time: 12s +Downloaded: 7 files, 140K in 1.5s (92.5 KB/s) + + From 6713e36e5a5415d73d9d874d5d51e367eeab309f Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 22:29:31 +0000 Subject: [PATCH 45/86] Added a cross-reference to the ARC implementation. --- src/main/java/org/archive/io/warc/WARCReaderFactory.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index a02adf03..c3e5baa0 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -100,11 +100,15 @@ public static ArchiveReader get(final String s, final InputStream is, atFirstRecord); } + /* + * Note that the ARC companion does this differently, with quite a lot of duplication. + * + * @see org.archive.io.arc.ARCReaderFactory.getArchiveReader(String, InputStream, boolean) + */ protected ArchiveReader getArchiveReader(final String f, final InputStream is, final boolean atFirstRecord) throws IOException { - // Check if it's compressed: - // TODO Currently relies on the file extension, but this should all really sniff the content properly. + // Check if it's compressed, based on file extension. if( f.endsWith(".gz") ) { return new CompressedWARCReader(f, is, atFirstRecord); } else { From e2635cfdce44b234895668b134fbecfe6ded13ac Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 22:38:03 +0000 Subject: [PATCH 46/86] Added changes to log. --- CHANGES.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d5c720ab..a3c8bbac 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ -1.1.1 +1.1.2 ----- +* Fixed support for reading uncompressed WARCs, along with some unit testing. (https://github.com/iipc/webarchive-commons/pull/12) +1.1.1 +----- * Renamed from commons-webarchive to webarchive-commons (https://github.com/iipc/webarchive-commons/pull/8) * Cope with malformed GZip extra fields as produced by wget 1.14 (https://github.com/iipc/webarchive-commons/pull/10) * Switch to httpcomponents, and add IA deployment information. (https://github.com/iipc/webarchive-commons/pull/11) From af538006344de6e3e67a4e028d0341d8ea6987be Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 22:51:17 +0000 Subject: [PATCH 47/86] [maven-release-plugin] prepare release webarchive-commons-1.1.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 52cef01e..8078499f 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.2-SNAPSHOT + 1.1.2 jar webarchive-commons From 5713b2be81f79fb20d8057ac55f5457fbc51e9d0 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 7 Mar 2014 22:51:22 +0000 Subject: [PATCH 48/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8078499f..a8493ffd 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.2 + 1.1.3-SNAPSHOT jar webarchive-commons From 399c58eb1a0b84f0670d6c9ffbc81fc455f78a7b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Sat, 8 Mar 2014 08:25:23 +0000 Subject: [PATCH 49/86] Removing dist override again to try to fix Travis. --- pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pom.xml b/pom.xml index a8493ffd..e542d616 100644 --- a/pom.xml +++ b/pom.xml @@ -245,6 +245,7 @@ +
From 3efbffdd3628b165616707e7fa849380460618b2 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 13 Mar 2014 18:59:12 -0700 Subject: [PATCH 50/86] avoid pulling in logback, which is wreaking havoc on logging in apps using this library --- pom.xml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9c8698c7..4e635475 100644 --- a/pom.xml +++ b/pom.xml @@ -48,7 +48,6 @@ git@github.com:iipc/webarchive-commons.git - UTF-8 ${maven.build.timestamp} @@ -165,6 +164,12 @@ dsiutils 2.0.12 compile + + + ch.qos.logback + logback-classic + + org.apache.httpcomponents From b45ea54b82c363d0987ee32893cb33c96ea0f701 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 4 Apr 2014 12:02:49 +0100 Subject: [PATCH 51/86] Added potential test case and lots of debug logging. --- .../java/org/archive/io/ArchiveRecord.java | 5 + .../org/archive/io/arc/ARCReaderFactory.java | 2 +- .../java/org/archive/io/arc/ARCRecord.java | 4 + .../archive/io/arc/ARCReaderFactoryTest.java | 61 + ...080430204825-00000-blackbook-truncated.arc | 1006 +++++++++++++++++ 5 files changed, 1077 insertions(+), 1 deletion(-) create mode 100644 src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java create mode 100644 src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index 63bfe628..a3cab4ba 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -292,10 +292,13 @@ public String getDigestStr() { } protected void incrementPosition() { + System.err.println("incrementPostion()"); this.position++; } protected void incrementPosition(final long incr) { + new Exception().printStackTrace(); + System.err.println("incrementPostion("+incr+")"); this.position += incr; } @@ -404,6 +407,8 @@ public boolean hasContentHeaders() { } protected void setBodyOffset(int bodyOffset) { + new Exception().printStackTrace(); + System.err.println("setBodyOffset("+bodyOffset+")"); this.position = bodyOffset; } } diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index e7dc1625..ce12c4bb 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -147,7 +147,7 @@ protected ArchiveReader getArchiveReader(final String arc, possiblyWrapped.mark(100); boolean compressed = testCompressedARCStream(possiblyWrapped); possiblyWrapped.reset(); - + if (compressed) { return new CompressedARCReader(arc, possiblyWrapped, atFirstRecord); } else { diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 21bea07c..7f3bf653 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -344,6 +344,8 @@ private int getTokenizedHeaderLine(final InputStream stream, // save verbatim header String this.headerString = StringUtils.join(list," "); + System.err.println("This "+this.headerString); + return read; } @@ -589,6 +591,7 @@ private InputStream readHttpHeader() throws IOException { statusLine = EncodingUtil.getString(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); + System.err.println("statusLine: "+statusLine); // If a null or DELETED break immediately if ((statusLine == null) || statusLine.startsWith("DELETED")) { @@ -602,6 +605,7 @@ private InputStream readHttpHeader() throws IOException { } // Add bytes read to error "offset" to add to position + System.err.println("BYTES: "+new String(statusBytes)); errOffset += statusBytes.length; } diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java new file mode 100644 index 00000000..090ccef7 --- /dev/null +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -0,0 +1,61 @@ +package org.archive.io.arc; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.RandomAccessFile; + +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import junit.framework.TestCase; + +/** + * + * Based on https://github.com/iipc/openwayback/pull/104/files + * + * @author csr@statsbiblioteket.dk (Colin Rosenthal) + * + */ +public class ARCReaderFactoryTest extends TestCase { + + private File testfile1 = new File("src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"); + + /** + * Test reading uncompressed arcfile for issue + * https://github.com/iipc/openwayback/issues/101 + * @throws Exception + */ + public void testGetResource() throws Exception { + this.offsetResourceTest(testfile1, 1515, "archive.org/robots.txt" ); + this.offsetResourceTest(testfile1, 36420, "archive.org/services/collection-rss.php" ); + } + + private void offsetResourceTest( File testfile, long offset, String uri ) throws Exception { + RandomAccessFile raf = new RandomAccessFile(testfile, "r"); + raf.seek(offset); + InputStream is = new FileInputStream(raf.getFD()); + String fPath = testfile.getAbsolutePath(); + ArchiveReader reader = ARCReaderFactory.get(fPath, is, false); + // This one works: + //ArchiveReader reader = ARCReaderFactory.get(testfile, offset); + ArchiveRecord record = reader.get(); + System.out.println("Position:"+record.getPosition()); + + final String url = record.getHeader().getUrl(); + System.out.println("Got URL: "+url); + assertEquals("URL of record is not as expected.", uri, url); + + final long position = record.getPosition(); + final long recordLength = record.getHeader().getLength(); + System.out.println("Position:"+position); + System.out.println("Length:"+recordLength); + assertTrue("Position " + position + " is after end of record " + recordLength, position <= recordLength); + + // Clean up: + if( raf != null ) + raf.close(); + } + +} diff --git a/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc new file mode 100644 index 00000000..3cbffb81 --- /dev/null +++ b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc @@ -0,0 +1,1006 @@ +filedesc://IAH-20080430204825-00000-blackbook-truncated.arc 0.0.0.0 20080430204825 text/plain 1300 +1 1 InternetArchive +URL IP-address Archive-date Content-type Archive-length + + +Heritrix @VERSION@ http://crawler.archive.org +blackbook +192.168.1.13 +archive.org-shallow +archive.org shallow +Admin +2008-04-30T20:48:24+00:00 +Mozilla/5.0 (compatible; heritrix/1.14.0 +http://crawler.archive.org) +archive-crawler-agent@lists.sourceforge.net +classic +ARC file version 1.1 +http://www.archive.org/web/researcher/ArcFileFormat.php + +dns:www.archive.org 68.87.76.178 20080430204825 text/dns 56 +20080430204825 +www.archive.org. 589 IN A 207.241.229.39 +http://www.archive.org/robots.txt 207.241.229.39 20080430204825 text/plain 782 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:24 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sat, 02 Feb 2008 19:40:44 GMT +ETag: "47c3-1d3-11134700" +Accept-Ranges: bytes +Content-Length: 467 +Connection: close +Content-Type: text/plain; charset=UTF-8 + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## +User-agent: * +Disallow: /nothing---please-crawl-us-- + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Crawl-Delay: 10 +http://www.archive.org/ 207.241.229.39 20080430204826 text/html 680 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Wed, 09 Jan 2008 23:18:29 GMT +ETag: "47ac-16e-4f9e5b40" +Accept-Ranges: bytes +Content-Length: 366 +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + +
+Please visit our website at: +http://www.archive.org + + +http://www.archive.org/index.php 207.241.229.39 20080430204826 text/html 29000 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Set-Cookie: PHPSESSID=657fa9749e9426f2ffa75f14b54ed4ac; path=/; domain=.archive.org +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + Internet Archive + + + + + + + + + + + + +
+ (logo) + + + + + + +
+ +Web | +Moving Images | +Texts | +Audio | +Software | +Education | +Patron Info | +About IA
Forums | FAQs | Contributions | Jobs | Donate
+ + + + + +
+

+ Search: + + + + + + + +

+
+
+
+ (navigation image)
+
+ + + + + + + + + + + + + + + + +
UploadAnonymous User (login or join us) 
   +
Announcements (more)
+
   +
Web85 billion pages
+
+
+
+ + + + + + + + +
+ (wayback logo) + + +
+ + + + Advanced Search + +
+
+
+
   +
Welcome to the ArchiveSee recent additions in RSS
+
+
+The Internet Archive is building a digital library of Internet + sites and other cultural artifacts in digital form. Like a paper + library, we provide free access to researchers, historians, + scholars, and the general public.
+
  
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
   +
Moving Images
 115,646 movies
movies icon
+
+
+
Browse   + + (by keyword)
+
   +
Live Music Archive
 48,893 concerts
etree icon
+
+
+
Browse   + + (by band)
+
   +
Audio
 250,854 recordings
audio icon
+
+
+
Browse   + + (by keyword)
+
   +
Texts
 395,004 texts
texts icon
+
+
+
Browse   + + (by keyword)
+
  
   +
Curator's Choice (more)
+
(movies pick)
A Few Good G-Men
Randall Glass, the maker of "Warthog Jump," re-creates in "A Few Good G-Men" an entire scene from...
+
   +
Curator's Choice (more)
+
(etree pick)
Grateful Dead Live at Nashville Municipal...
Set 1 Sugaree Beat It On Down The Line Candyman Me And My Uncle -> Big River Stagger Lee Looks Like...
+
   +
Curator's Choice (more)
+
(audio pick)
Zanstones - Slaakhuis: Live in Rotterdam, Holland
Zanstones confuses the dutch masses with this live display of wacked rhythms, whacked vocals, and...
+
   +
Curator's Choice (more)
+
  
   +
Recent Reviews
+
   +
Recent Reviews
+
   +
Recent Reviews
+
   +
Recent Reviews
+
  
+ + +
+ + + +
   +
+ + + + + + + + + + + + + +
Most recent posts (write a post by going to a forum) more...
Subject Poster Forum RepliesViewsDate
Re: Making a mix for a chick I know... William Tell GratefulDead 0 6 20 minutes ago
Re: Bob's shorts not going into archives BobsShortShorts GratefulDead 0 9 26 minutes ago
Re: Thanks to All airgarcia416 GratefulDead 0 5 26 minutes ago
Re: Bob's shorts not going into archives sydthecat2 GratefulDead 0 8 36 minutes ago
Re: What is the worst-reviewed feature film on IA? RipJarvis feature_films 0 9 50 minutes ago
Re: Playin' In The Band...all day and all night sydthecat2 GratefulDead 0 11 58 minutes ago
Re: Playin' In The Band...all day and all night rastamon GratefulDead 0 16 1 hour ago
Re: Making a mix for a chick I know... caspersvapors GratefulDead 1 11 1 hour ago
Re: Bob's shorts not going into archives rastamon GratefulDead 0 11 1 hour ago
Re: Bob's shorts not going into archives bluedevil GratefulDead 1 13 1 hour ago
+
  
+ + +
  + 
+ + +
+

Skin: classic | columns | custom!
+ + Terms of Use (10 Mar 2001) +

+ +http://www.archive.org/images/logoc.jpg 207.241.229.39 20080430204829 image/jpeg 1963 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:28 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Mon, 16 Jun 2003 22:28:51 GMT +ETag: "34dc-67e-2ed02ec0" +Accept-Ranges: bytes +Content-Length: 1662 +Connection: close +Content-Type: image/jpeg + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<)w@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C;http://www.archive.org/images/star.png 207.241.229.39 20080430204830 image/png 564 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sun, 27 Feb 2005 21:35:31 GMT +ETag: "358d-109-f15f4ec0" +Accept-Ranges: bytes +Content-Length: 265 +Connection: close +Content-Type: image/png + +PNG + + IHDR e^|3PLTEXtRNS@fbKGDH pHYs  tIME; BFoOIDATUQ0P霓?P1~@a 9?C31aѻ__ucTCԑJٕ4xW{2~aIENDB`http://www.archive.org/services/collection-rss.php 207.241.229.39 20080430204830 text/xml 50832 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Connection: close +Content-Type: text/xml + + + + + http://www.archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + tracey@archive.org + Wed, 30 Apr 2008 20:48:29 GMT + + http://www.archive.org/images/logo.jpg + Internet Archive + http://www.archive.org + + + ArtTECHtonic 5 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Arttechtonic5&mediatype=audio&collection=opensource_audio"/><p>An interview with Gretchen Wagner, General Counsel and Secretary of ARTstor on Fair Use.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Arttechtonic5 + http://www.archive.org/details/Arttechtonic5 + Wed, 30 Apr 2008 20:44:20 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + fair_use, ARTstor, libraries + + + ۩۞۩ جبريل يسأل والنبى يجيب - ترجمه الأمام مسلم (30-4-2008)۩۞۩ للشيخ محمد حسان ۩۞۩ + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=55322&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 24Kbps MP3, Cinepack, Metadata</p> + http://www.archive.org/details/55322 + http://www.archive.org/details/55322 + Wed, 30 Apr 2008 20:43:16 GMT + movies/opensource_movies + + + + alsrdaab + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=alsrdaab_125&mediatype=Other&collection=ourmedia"/><p>alsrdaab.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/alsrdaab_125 + http://www.archive.org/details/alsrdaab_125 + Wed, 30 Apr 2008 20:43:01 GMT + Other/ourmedia + + alsrdaab + + + DOC-DEBUT: Super Amigos + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_superamigos20080430&mediatype=movies&collection=opensource_movies"/><p>This action filled documentary follows five real-life "social wrestlers" in Mexico City who have capitalized on the popularity of Mexico's larger than life Lucha Libre wrestlers to fight for social justice rather than trophies..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 350Kb, MPEG4 60Kb, Metadata</p> + http://www.archive.org/details/linktv_superamigos20080430 + http://www.archive.org/details/linktv_superamigos20080430 + Wed, 30 Apr 2008 20:41:22 GMT + movies/opensource_movies + + + + erwews + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=reit987erfed&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media</p> + http://www.archive.org/details/reit987erfed + http://www.archive.org/details/reit987erfed + Wed, 30 Apr 2008 20:40:57 GMT + movies/opensource_movies + + + + quran + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=mohadart&mediatype=movies&collection=opensource_movies"/><p>walo had sa3a.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/mohadart + http://www.archive.org/details/mohadart + Wed, 30 Apr 2008 20:38:31 GMT + movies/opensource_movies + + + + asdas + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=asdas_725&mediatype=Image&collection=ourmedia"/><p>saa.</p><p>This item belongs to: Image/ourmedia.</p><p>This item has files of the following types: Metadata</p> + http://www.archive.org/details/asdas_725 + http://www.archive.org/details/asdas_725 + Wed, 30 Apr 2008 20:37:02 GMT + Image/ourmedia + + saas + + + nibrasukul + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=nibrasukul&mediatype=texts&collection=opensource"/><p>nibrasukul.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/nibrasukul + http://www.archive.org/details/nibrasukul + Wed, 30 Apr 2008 20:36:56 GMT + texts/opensource + + nibrasukul + + + rtyed + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hyu121&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media Audio</p> + http://www.archive.org/details/hyu121 + http://www.archive.org/details/hyu121 + Wed, 30 Apr 2008 20:36:53 GMT + movies/opensource_movies + + + + remomberfiler58.info + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=uictfwt&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/uictfwt + http://www.archive.org/details/uictfwt + Wed, 30 Apr 2008 20:21:58 GMT + movies/opensource_movies + + + + The committing magistrate, a treatise on the arrest, examination, bailing, and commitment of offenders, including fugitives from justice, with the remedial features of the writs of habeas corpus, certiorari, mandamus, and prohibition + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=committingmagist00flam&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/committingmagist00flam + http://www.archive.org/details/committingmagist00flam + Wed, 30 Apr 2008 03:14:26 GMT + texts/americana + + Police magistrates -- New York (State), Habeas corpus, Mandamus, Prohibition (Law), Appellate procedure -- New York (State) + + + Papers and addresses + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=papersaddresses00bras&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/papersaddresses00bras + http://www.archive.org/details/papersaddresses00bras + Wed, 30 Apr 2008 01:11:05 GMT + texts/americana + + Imperial federation, Great Britain -- Colonies + + + European years; the letters of an idle man + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=europeanyearslet00warn&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/europeanyearslet00warn + http://www.archive.org/details/europeanyearslet00warn + Tue, 29 Apr 2008 16:43:31 GMT + texts/americana + + Europe -- Description and travel, United States -- Description and travel + + + Cable and satellite carrier compulsory licenses : hearing before the Subcommittee on Intellectual Property and Judicial Administration of the Committee on the Judiciary, House of Representatives, One Hundred Third Congress, first session, on H.R. 759 and H.R. 1103 ... March 17, 1993 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=cablesatelliteca00unit&mediatype=texts&collection=americana"/><p>Includes bibliographical references.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/cablesatelliteca00unit + http://www.archive.org/details/cablesatelliteca00unit + Tue, 29 Apr 2008 16:28:47 GMT + texts/americana + + Cable television -- Licenses United States, Direct broadcast satellite television -- Licenses United States + + + Leinender + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Leinender&mediatype=Other&collection=ourmedia"/><p>TSOP Leinender.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/Leinender + http://www.archive.org/details/Leinender + Tue, 29 Apr 2008 06:59:06 GMT + Other/ourmedia + + TSOP + + + tribute + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=tribute_488&mediatype=audio&collection=opensource_audio"/><p>Tribute Yanni music Vocal: Nathan-Pacheco.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/tribute_488 + http://www.archive.org/details/tribute_488 + Tue, 29 Apr 2008 06:53:43 GMT + audio/opensource_audio + + Yanni, Tribute + + + La Voz de Brasil #012: Efemerides 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=RodrigoDubLaVozdeBrasil_012_Efemerides2008&mediatype=Audio&collection=ourmedia"/><p>¡Ogum yê! En el año en que el Sobrevivendo no Inferno de los Racionais MC's completa su decimo cumpleaño, la Voz de Brasil presenta algunas otras efemérides - empezando por João Gilberto y la primera invención de Brasil (la segunda fué del maestro Jorge Ben, pero eso queda para otro programa....</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 160Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + Tue, 29 Apr 2008 06:53:09 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + musica, brasil, brasileña, brazilian, music, psicodelia, psychodelic + + + Doubleknit Podcast #1 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=DoubleknitPodcast1&mediatype=audio&collection=opensource_audio"/><p>Debut podcast of the Doubleknit Twins..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/DoubleknitPodcast1 + http://www.archive.org/details/DoubleknitPodcast1 + Tue, 29 Apr 2008 06:51:57 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + audio/opensource_audio + + knit, knitting, yarn, seattle + + + etceterapodcast37 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=etceterapodcast37_831&mediatype=audio&collection=opensource_audio"/><p>Restaurantes repetidos, los lenguages que se pierden, trencito de gendarmería, trailer de la hamburgesa perfecta, la película fué Shaun of The Dead, guerra en la convención de Taekwondo, pez globo, hara-kiris, océanos del mundo, el efecto Rocky y un Adam Sandler, el punto en que las a....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/etceterapodcast37_831 + http://www.archive.org/details/etceterapodcast37_831 + Tue, 29 Apr 2008 06:49:31 GMT + http://creativecommons.org/licenses/by-nc-nd/2.5/ar/ + audio/opensource_audio + + Anhdres, Andres, Nahuel, Etcetera, Etc, Podcast, Español, Spanish, Buenos Aires, Argentina + + + midoz + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=22008-04-02.wwe0002&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Real Media</p> + http://www.archive.org/details/22008-04-02.wwe0002 + http://www.archive.org/details/22008-04-02.wwe0002 + Tue, 29 Apr 2008 06:47:41 GMT + movies/opensource_movies + + + + music9 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=music9&mediatype=audio&collection=opensource_audio"/><p>music9.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/music9 + http://www.archive.org/details/music9 + Tue, 29 Apr 2008 06:45:18 GMT + audio/opensource_audio + + music9 + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_741&mediatype=audio&collection=opensource_audio"/><p>Desire YANNI music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_741 + http://www.archive.org/details/desire_741 + Tue, 29 Apr 2008 06:44:53 GMT + audio/opensource_audio + + Desire * Yanni * Ender-Thomas + + + Live at Nelson Ledges Quarry Park on 2008-04-25 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-25.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Chicago-> 02. Hookahville-> 03. Chicago 04. Mississippi Steamboat 05. Sure Cure For the Blues* 06. Mexican Opera* 07. Sail Away 08. Washboard Annie 09. Serpentine 10. Tumblin' Set 2 (Disc 2): 01....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + Tue, 29 Apr 2008 06:44:36 GMT + etree/EkoostikHookah + + + + Live at Nelson Ledges Quarry Park on 2008-04-26 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-26.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Right Back Out in the Streets 02. Utopia 03. The Devil & Me 04. When the Sun Goes Down 05. Rocketman 06. Stuck In the Snow 07. John Henry 08. Green 09. Shadane Set 2 (Disc 2): 01. Ecstasy 02....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + Tue, 29 Apr 2008 06:44:23 GMT + etree/EkoostikHookah + + + + Burn C.C. + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BurnC.c&mediatype=movies&collection=opensource_movies"/><p>Burn C.C..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/BurnC.c + http://www.archive.org/details/BurnC.c + Tue, 29 Apr 2008 06:43:49 GMT + movies/opensource_movies + + + + + + + Burn C.C. + + + TOTD 29 APRIL JAM 17 - DEWI SHINTAW ATY - ICHSANUDIN NOORSY - MANTAN ANGGOTA DPR - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=visioning_641&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/visioning_641 + http://www.archive.org/details/visioning_641 + Tue, 29 Apr 2008 06:43:36 GMT + audio/opensource_audio + + pasfm + + + IrationVibrationShow-4-27-08-p3 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=IrationVibrationShow-4-27-08-p3&mediatype=audio&collection=opensource_audio"/><p>pt3 of.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + Tue, 29 Apr 2008 06:43:04 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + iration, vibration, luciano, toots, promo + + + Birthday Party + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BirthdayParty&mediatype=movies&collection=opensource_movies"/><p>Home movie of a birthday party and travels through India from the late 1960s. From Lostinlight.org.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Video, 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, MPEG1, MPEG2, Metadata, Thumbnail</p> + http://www.archive.org/details/BirthdayParty + http://www.archive.org/details/BirthdayParty + Tue, 29 Apr 2008 06:41:45 GMT + http://creativecommons.org/licenses/by-nc/3.0/ + movies/opensource_movies + + + + + + + + + + + rADIO + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=rADIO_538&mediatype=audio&collection=opensource_audio"/><p>rADIO rADIO rADIO rADIO rADIO.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/rADIO_538 + http://www.archive.org/details/rADIO_538 + Tue, 29 Apr 2008 06:40:23 GMT + audio/opensource_audio + + rADIO rADIO V + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_583&mediatype=audio&collection=opensource_audio"/><p>Desire Yanni music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_583 + http://www.archive.org/details/desire_583 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + Yanni * Desire + + + TOTD 29 APRIL JAM 12 - DEWI SHINTAW ATY - HAMDAN ZULVA - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_253&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_253 + http://www.archive.org/details/topic_253 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + pasfm + + + The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents&mediatype=audio&collection=opensource_audio"/><p>The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + Tue, 29 Apr 2008 06:38:38 GMT + audio/opensource_audio + + a + + + beethoven 9th + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Beethovenbeethoven9th&mediatype=Audio&collection=ourmedia"/><p>sample symphony music, test file.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata</p> + http://www.archive.org/details/Beethovenbeethoven9th + http://www.archive.org/details/Beethovenbeethoven9th + Tue, 29 Apr 2008 06:38:01 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + beethoven, 9th, symphony + + + KASDAMAM + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=KASDAMAM_444&mediatype=texts&collection=opensource"/><p>KASDAMAM.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/KASDAMAM_444 + http://www.archive.org/details/KASDAMAM_444 + Tue, 29 Apr 2008 06:37:53 GMT + texts/opensource + + KASDAMAM + + + James + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=James_278&mediatype=Other&collection=ourmedia"/><p>d.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/James_278 + http://www.archive.org/details/James_278 + Tue, 29 Apr 2008 06:37:47 GMT + Other/ourmedia + + s + + + Mosaic News - 04/28/08: World News From The Middle East + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_mosaic20080428&mediatype=movies&collection=opensource_movies"/><p>The Peabody Award-winning daily compilation of television news reports from the Middle East, including Egypt, Lebanon, Israel, Syria, the Palestinian Authority, Iraq and Iran..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 1.5Mbps, MPEG4 350Kb, MPEG4 60Kb, Metadata, iPod Video (MP4)</p> + http://www.archive.org/details/linktv_mosaic20080428 + http://www.archive.org/details/linktv_mosaic20080428 + Tue, 29 Apr 2008 06:37:38 GMT + movies/opensource_movies + + + + Fouth Wall Weekly #2 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Jesster_StateoftheArtandFredSoloFouthWallWeekly_2&mediatype=Audio&collection=ourmedia"/><p>In this weeks edition we go over : The end of countdown and Batman R.I.P preview Cloverfiled DVD Mortal Kombat aka the death of a franchise and GTA preview And other film news.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + Tue, 29 Apr 2008 06:36:06 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Comics, film, videogames + + + John 11:20-44 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=AlanDisbrowJohn11_20-44_0&mediatype=Audio&collection=ourmedia"/><p>Bible Study of John 11:20-44, Arise to a New Life, by Alan Disbrow..</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + Tue, 29 Apr 2008 06:34:13 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Christianity, Jesus, Bible Study, Calvary Chapel, John + + + day 6 potok + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Joshleo-day6Potok652-3&mediatype=movies&collection=bliptv"/><p>me and this cat don't get along.</p><p>This item belongs to: movies/bliptv.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash, Metadata, Quicktime, Thumbnail</p> + http://www.archive.org/details/Joshleo-day6Potok652-3 + http://www.archive.org/details/Joshleo-day6Potok652-3 + Tue, 29 Apr 2008 06:33:24 GMT + http://creativecommons.org/licenses/by-nc-sa/2.0/ + movies/bliptv + + + + + + + + + 4jkfhwjkl + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=4jkfhwjkl&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/4jkfhwjkl + http://www.archive.org/details/4jkfhwjkl + Tue, 29 Apr 2008 06:32:22 GMT + movies/opensource_movies + + + + 5knfwk + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=5knfwk&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/5knfwk + http://www.archive.org/details/5knfwk + Tue, 29 Apr 2008 06:31:57 GMT + movies/opensource_movies + + + + SA11 - Upheaval - Descending in Motion + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Sa11-Upheaval-DescendingInMotion&mediatype=audio&collection=opensource_audio"/><p>SA11 - Upheaval - Descending in Motion -------------------------------------- Descending in Motion -------------------------------------- Tom Maggio (also of Turmoil and Domestic Turmoil) brings us an EP of subtle sinking sounds and vaguely disturbing industrial echos....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, JPEG, Metadata, Ogg Vorbis, Text, VBR M3U, VBR MP3, VBR ZIP, ZIP</p> + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + Tue, 29 Apr 2008 06:31:51 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + dark, ambient, industrial + + + Vespa ride to Ufomammut - Smoke (3) + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=VespaRideToUfomammut-Smoke3&mediatype=movies&collection=opensource_movies"/><p>riding through London on my vespa, listening to Ufomammut, Smoke..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + Tue, 29 Apr 2008 06:31:05 GMT + http://creativecommons.org/licenses/by-nc-nd/2.0/uk/ + movies/opensource_movies + + + + vespa, london, ufomammut + + + NaturesLead_OV_04__LockedInAGraveyard + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=NaturesLead_OV_04__LockedInAGraveyard&mediatype=audio&collection=opensource_audio"/><p>In this Open Valley, I share my experience of getting locked in Rome's Protestant Cemetery where Keats and Shelley are buried..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + Tue, 29 Apr 2008 06:31:03 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + Keats, Shelley, cemetery + + + 20/20 Podcast #12A + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=2020Podcast12a&mediatype=audio&collection=opensource_audio"/><p>Discussion between Jamie and Scott about Google Android and the Open Handset Alliance. All things cellular are fair game..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/2020Podcast12a + http://www.archive.org/details/2020Podcast12a + Tue, 29 Apr 2008 06:30:26 GMT + audio/opensource_audio + + google, android, cell phone, gadget, samsung, htc, t-mobile + + + Urdu Poetry podcast + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=ZAhmedUrduPoetrypodcast_2&mediatype=Audio&collection=ourmedia"/><p>An Urdu poem by N M Rashid with english translation.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 256Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + Tue, 29 Apr 2008 06:30:23 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Urdu, Rashid, poetry + + + Beth Ann Turkey 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=SteveEasomBethAnnTurkey2008&mediatype=MovingImage&collection=ourmedia"/><p>2008 Rio Turkey taken By Beth Ann in Oklahoma.</p><p>This item belongs to: MovingImage/ourmedia.</p><p>This item has files of the following types: Metadata, QuickTime</p> + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + Tue, 29 Apr 2008 06:30:21 GMT + http://creativecommons.org/licenses/by/2.5/ + MovingImage/ourmedia + + Beth Ann, Turkey + + + TOTD 29 APRIL JAM 06 - DEWI SHINTAW ATY - AS HIKAM - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_462&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_462 + http://www.archive.org/details/topic_462 + Tue, 29 Apr 2008 06:30:12 GMT + audio/opensource_audio + + pasfm + + + hosam + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hosam_244&mediatype=movies&collection=opensource_movies"/><p>6w7u45r7.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, Thumbnail, Windows Media</p> + http://www.archive.org/details/hosam_244 + http://www.archive.org/details/hosam_244 + Tue, 29 Apr 2008 06:30:05 GMT + movies/opensource_movies + + + + + + + 57uy436 + + + free - destiny's child + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Free-DestinysChild&mediatype=audio&collection=opensource_audio"/><p>from destiny fufilled.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Free-DestinysChild + http://www.archive.org/details/Free-DestinysChild + Tue, 29 Apr 2008 06:30:01 GMT + audio/opensource_audio + + free, destiny's child + + + From c565d2e36ed4bcbd3ecbc8e184daf754910ea460 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 4 Apr 2014 12:14:02 +0100 Subject: [PATCH 52/86] Passing atFirstRecord down to UncompressedARCReader seems to work. --- src/main/java/org/archive/io/arc/ARCReaderFactory.java | 5 +++-- src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index ce12c4bb..44437ed7 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -151,7 +151,7 @@ protected ArchiveReader getArchiveReader(final String arc, if (compressed) { return new CompressedARCReader(arc, possiblyWrapped, atFirstRecord); } else { - return new UncompressedARCReader(arc, possiblyWrapped); + return new UncompressedARCReader(arc, possiblyWrapped, atFirstRecord); } } @@ -330,10 +330,11 @@ public UncompressedARCReader(final File f, final long offset) * @param f Uncompressed arc to read. * @param is InputStream. */ - public UncompressedARCReader(final String f, final InputStream is) { + public UncompressedARCReader(final String f, final InputStream is, boolean atFirstRecord) { // Arc file has been tested for existence by time it has come // to here. setIn(new CountingInputStream(is)); + setAlignedOnFirstRecord(atFirstRecord); initialize(f); } } diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java index 090ccef7..83c08252 100644 --- a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -28,8 +28,8 @@ public class ARCReaderFactoryTest extends TestCase { * @throws Exception */ public void testGetResource() throws Exception { - this.offsetResourceTest(testfile1, 1515, "archive.org/robots.txt" ); - this.offsetResourceTest(testfile1, 36420, "archive.org/services/collection-rss.php" ); + this.offsetResourceTest(testfile1, 1515, "http://www.archive.org/robots.txt" ); + this.offsetResourceTest(testfile1, 36420, "http://www.archive.org/services/collection-rss.php" ); } private void offsetResourceTest( File testfile, long offset, String uri ) throws Exception { From b38aae442f1bb8b77903cef7bfc421b233040bb9 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 4 Apr 2014 12:17:36 +0100 Subject: [PATCH 53/86] Removed debug logging and stacktraces. --- src/main/java/org/archive/io/ArchiveRecord.java | 5 ----- src/main/java/org/archive/io/arc/ARCRecord.java | 4 ---- src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java | 4 ---- 3 files changed, 13 deletions(-) diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index a3cab4ba..63bfe628 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -292,13 +292,10 @@ public String getDigestStr() { } protected void incrementPosition() { - System.err.println("incrementPostion()"); this.position++; } protected void incrementPosition(final long incr) { - new Exception().printStackTrace(); - System.err.println("incrementPostion("+incr+")"); this.position += incr; } @@ -407,8 +404,6 @@ public boolean hasContentHeaders() { } protected void setBodyOffset(int bodyOffset) { - new Exception().printStackTrace(); - System.err.println("setBodyOffset("+bodyOffset+")"); this.position = bodyOffset; } } diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 7f3bf653..21bea07c 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -344,8 +344,6 @@ private int getTokenizedHeaderLine(final InputStream stream, // save verbatim header String this.headerString = StringUtils.join(list," "); - System.err.println("This "+this.headerString); - return read; } @@ -591,7 +589,6 @@ private InputStream readHttpHeader() throws IOException { statusLine = EncodingUtil.getString(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); - System.err.println("statusLine: "+statusLine); // If a null or DELETED break immediately if ((statusLine == null) || statusLine.startsWith("DELETED")) { @@ -605,7 +602,6 @@ private InputStream readHttpHeader() throws IOException { } // Add bytes read to error "offset" to add to position - System.err.println("BYTES: "+new String(statusBytes)); errOffset += statusBytes.length; } diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java index 83c08252..0721f795 100644 --- a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -41,16 +41,12 @@ private void offsetResourceTest( File testfile, long offset, String uri ) throws // This one works: //ArchiveReader reader = ARCReaderFactory.get(testfile, offset); ArchiveRecord record = reader.get(); - System.out.println("Position:"+record.getPosition()); final String url = record.getHeader().getUrl(); - System.out.println("Got URL: "+url); assertEquals("URL of record is not as expected.", uri, url); final long position = record.getPosition(); final long recordLength = record.getHeader().getLength(); - System.out.println("Position:"+position); - System.out.println("Length:"+recordLength); assertTrue("Position " + position + " is after end of record " + recordLength, position <= recordLength); // Clean up: From b3cb37682d45486afdf40613be8b9adb5c1d71cf Mon Sep 17 00:00:00 2001 From: Gerhard Gossen Date: Wed, 4 Jun 2014 11:56:24 +0200 Subject: [PATCH 54/86] Update to current Guava version --- pom.xml | 2 +- .../extract/DumpingExtractorOutput.java | 4 +-- .../extract/RealCDXExtractorOutput.java | 5 +-- .../WARCMetadataRecordExtractorOutput.java | 5 +-- .../archive/resource/AbstractResource.java | 4 +-- .../org/archive/resource/arc/ARCResource.java | 4 +-- .../resource/http/HTTPResponseResource.java | 5 ++- .../archive/resource/warc/WARCResource.java | 4 +-- .../org/archive/url/URLRegexTransformer.java | 4 +-- src/main/java/org/archive/util/TextUtils.java | 13 +++---- .../impl/HDFSSeekableLineReader.java | 4 +-- .../RandomAccessFileSeekableLineReader.java | 4 +-- .../util/zip/GZIPMembersInputStreamTest.java | 36 +++++++++---------- 13 files changed, 48 insertions(+), 46 deletions(-) diff --git a/pom.xml b/pom.xml index e542d616..add79749 100644 --- a/pom.xml +++ b/pom.xml @@ -72,7 +72,7 @@ com.google.guava guava - 14.0.1 + 17.0 diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java index a4151076..69591931 100644 --- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java +++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java @@ -9,8 +9,8 @@ import org.archive.util.StreamCopy; import org.json.JSONException; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class DumpingExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -22,7 +22,7 @@ public DumpingExtractorOutput(OutputStream out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); StreamCopy.copy(resource.getInputStream(), co); long bytes = co.getCount(); diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 306f67a3..62a423c5 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URISyntaxException; @@ -23,8 +24,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class RealCDXExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -72,7 +73,7 @@ public RealCDXExtractorOutput(PrintWriter out) { // SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length"); // SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length"); public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 0d564a6f..ff46a914 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URISyntaxException; @@ -21,8 +22,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class WARCMetadataRecordExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -47,7 +48,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); diff --git a/src/main/java/org/archive/resource/AbstractResource.java b/src/main/java/org/archive/resource/AbstractResource.java index 409e7408..301c53d4 100755 --- a/src/main/java/org/archive/resource/AbstractResource.java +++ b/src/main/java/org/archive/resource/AbstractResource.java @@ -5,7 +5,7 @@ import org.archive.util.StreamCopy; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; public abstract class AbstractResource implements Resource { protected ResourceContainer container; @@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept // out.println("Headers Before"); // out.print(m.toString()); - long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream()); + long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream()); out.println("Resource Was:"+bytes+" Long"); out.println("[\n]Headers After"); diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java index 5d63fd4d..b6e0a1c1 100644 --- a/src/main/java/org/archive/resource/arc/ARCResource.java +++ b/src/main/java/org/archive/resource/arc/ARCResource.java @@ -18,8 +18,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class ARCResource extends AbstractResource @@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container, fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength()); countingIS = new CountingInputStream( - new LimitInputStream(raw, arcMetaData.getLength())); + ByteStreams.limit(raw, arcMetaData.getLength())); try { digIS = new DigestInputStream(countingIS, diff --git a/src/main/java/org/archive/resource/http/HTTPResponseResource.java b/src/main/java/org/archive/resource/http/HTTPResponseResource.java index b5d189bc..cc325427 100644 --- a/src/main/java/org/archive/resource/http/HTTPResponseResource.java +++ b/src/main/java/org/archive/resource/http/HTTPResponseResource.java @@ -7,7 +7,6 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Logger; - import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; import org.archive.format.http.HttpResponseMessage; @@ -20,8 +19,8 @@ import org.archive.util.io.EOFNotifyingInputStream; import org.archive.util.io.EOFObserver; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; @@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData, headers.putString(h.getName(),h.getValue()); } if(forceCheck && (length >= 0)) { - LimitInputStream lis = new LimitInputStream(response, length); + InputStream lis = ByteStreams.limit(response, length); countingIS = new CountingInputStream(lis); } else { countingIS = new CountingInputStream(response); diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index ab9b6900..80929206 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -19,8 +19,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants { CountingInputStream countingIS; @@ -51,7 +51,7 @@ public WARCResource(MetaData metaData, ResourceContainer container, if(length >= 0) { countingIS = new CountingInputStream( - new LimitInputStream(response, length)); + ByteStreams.limit(response, length)); } else { throw new ResourceParseException(null); } diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 930f5b34..c5505a74 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -101,7 +101,7 @@ public static String hostToPublicSuffix(String host) { InternetDomainName idn; try { - idn = InternetDomainName.fromLenient(host); + idn = InternetDomainName.from(host); } catch(IllegalArgumentException e) { return host; } @@ -109,7 +109,7 @@ public static String hostToPublicSuffix(String host) { if(tmp == null) { return host; } - String pubSuff = tmp.name(); + String pubSuff = tmp.toString(); int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2)); if(idx == -1) { return host; diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 707f93c7..9061a161 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -36,8 +36,9 @@ import org.apache.commons.lang.StringEscapeUtils; -import com.google.common.base.Function; -import com.google.common.collect.MapMaker; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; public class TextUtils { private static final String FIRSTWORD = "^([^\\s]*).*$"; @@ -51,11 +52,11 @@ protected Map initialValue() { }; /** global soft-cache of Patterns, by string key */ - private static final ConcurrentMap PATTERNS = new MapMaker() + private static final LoadingCache PATTERNS = CacheBuilder.newBuilder() .concurrencyLevel(16) .softValues() - .makeComputingMap(new Function() { - public Pattern apply(String regex) { + .build(new CacheLoader() { + public Pattern load(String regex) { return Pattern.compile(regex); } }); @@ -84,7 +85,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) { final Map matchers = TL_MATCHER_MAP.get(); Matcher m = (Matcher)matchers.get(pattern); if(m == null) { - m = PATTERNS.get(pattern).matcher(input); + m = PATTERNS.getUnchecked(pattern).matcher(input); } else { matchers.put(pattern,null); m.reset(input); diff --git a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java index 621c6bce..93757a45 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java @@ -6,7 +6,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class HDFSSeekableLineReader extends AbstractSeekableLineReader { private FSDataInputStream fsdis; @@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { fsdis.seek(offset); if (maxLength >= 0) { - return new LimitInputStream(fsdis, maxLength); + return ByteStreams.limit(fsdis, maxLength); } else { return fsdis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java index b211db16..5131dd06 100644 --- a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java @@ -7,7 +7,7 @@ import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader { @@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { FileInputStream fis = new FileInputStream(raf.getFD()); if (maxLength > 0) { - return new LimitInputStream(fis, maxLength); + return ByteStreams.limit(fis, maxLength); } else { return fis; } diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java index d3dc1ff6..710ff069 100644 --- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java +++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java @@ -30,7 +30,7 @@ import org.archive.util.ArchiveUtils; import org.archive.util.zip.GZIPMembersInputStream; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; /** @@ -70,14 +70,14 @@ public static void main(String [] args) { public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); } public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); } @@ -85,31 +85,31 @@ public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); - int count0 = IOUtils.copy(gzin, new NullOutputStream()); + int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count1 = IOUtils.copy(gzin, new NullOutputStream()); + int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -118,14 +118,14 @@ public void testReadPerMemberSixSmall() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); gzin.nextMember(); } - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -172,19 +172,19 @@ public void testMemberSeek() throws IOException { new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); assertEquals("wrong eof count", 0, countEnd); } @@ -195,7 +195,7 @@ public void testMemberIterator() throws IOException { Iterator iter = gzin.memberIterator(); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); - int count0 = IOUtils.copy(gzMember0, new NullOutputStream()); + int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream()); assertEquals("wrong 1k member count", 1024, count0); assertEquals("wrong member number", 0, gzin.getMemberNumber()); assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); @@ -203,7 +203,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); - int count1 = IOUtils.copy(gzMember1, new NullOutputStream()); + int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream()); assertEquals("wrong 32k member count", (32*1024), count1); assertEquals("wrong member number", 1, gzin.getMemberNumber()); assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); @@ -211,7 +211,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); - int count2 = IOUtils.copy(gzMember2, new NullOutputStream()); + int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream()); assertEquals("wrong 1-byte member count", 1, count2); assertEquals("wrong member number", 2, gzin.getMemberNumber()); assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); @@ -219,7 +219,7 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); - int count3 = IOUtils.copy(gzMember3, new NullOutputStream()); + int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream()); assertEquals("wrong 5-byte member count", 5, count3); assertEquals("wrong member number", 3, gzin.getMemberNumber()); assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); From 3b275708ad15a136ac9d833a268c13342f7bdd78 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Mon, 7 Jul 2014 12:52:24 +0100 Subject: [PATCH 55/86] Added update info for recent bugfixes. --- CHANGES.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index a3c8bbac..0d03e277 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +1.1.3 +----- +* [Synchronised with IA fork](https://github.com/iipc/webarchive-commons/pull/18) +* [Updated to more recent Guava APIs](https://github.com/iipc/webarchive-commons/pull/17) +* [Fixed handling of uncompressed ARC files #13 and #14](https://github.com/iipc/webarchive-commons/pull/14) +* [Avoid pulling in the logback dependency IA#13](https://github.com/internetarchive/webarchive-commons/pull/13) + 1.1.2 ----- * Fixed support for reading uncompressed WARCs, along with some unit testing. (https://github.com/iipc/webarchive-commons/pull/12) From 50372c06a59d09db3374d7dab8c776860800448b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Mon, 7 Jul 2014 13:13:16 +0100 Subject: [PATCH 56/86] [maven-release-plugin] prepare release webarchive-commons-1.1.3 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 72ad4446..648e65c2 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.3-SNAPSHOT + 1.1.3 jar webarchive-commons From be7c5879ccbd414a4d6cf78229cad219b47c1477 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Mon, 7 Jul 2014 13:13:20 +0100 Subject: [PATCH 57/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 648e65c2..58220c6d 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.3 + 1.1.4-SNAPSHOT jar webarchive-commons From e0ca831cf202945bd9b60576af6907a73b7d7842 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Thu, 7 Aug 2014 10:19:56 +0200 Subject: [PATCH 58/86] Explicitly setting locale for dateformats --- .../org/archive/format/gzip/zipnum/ZipNumCluster.java | 3 ++- src/main/java/org/archive/util/ArchiveUtils.java | 8 ++++---- src/main/java/org/archive/util/DateUtils.java | 2 +- .../util/binsearch/impl/http/ApacheHttp31SLRFactory.java | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java index bc773a58..a3d34a4b 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java @@ -21,6 +21,7 @@ import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Level; @@ -102,7 +103,7 @@ public void run() { public final static String LATEST_TIMESTAMP = "_LATEST"; public final static String OFF = "OFF"; - protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); protected Date startDate, endDate; class BlockSize diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java index c41c0bc0..e4224384 100644 --- a/src/main/java/org/archive/util/ArchiveUtils.java +++ b/src/main/java/org/archive/util/ArchiveUtils.java @@ -104,7 +104,7 @@ public class ArchiveUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } @@ -393,9 +393,9 @@ public static Date getDate(String d) throws ParseException { } final static SimpleDateFormat dateToTimestampFormats[] = - {new SimpleDateFormat("MM/dd/yyyy"), - new SimpleDateFormat("MM/yyyy"), - new SimpleDateFormat("yyyy")}; + {new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH), + new SimpleDateFormat("MM/yyyy", Locale.ENGLISH), + new SimpleDateFormat("yyyy", Locale.ENGLISH)}; /** * Convert a user-entered date into a timestamp diff --git a/src/main/java/org/archive/util/DateUtils.java b/src/main/java/org/archive/util/DateUtils.java index e7fe78b7..d01b63ce 100755 --- a/src/main/java/org/archive/util/DateUtils.java +++ b/src/main/java/org/archive/util/DateUtils.java @@ -65,7 +65,7 @@ public class DateUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java index 9bd7542b..bc5b83f4 100644 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; +import java.util.Locale; import java.util.logging.Logger; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; @@ -156,7 +157,7 @@ public boolean isStaleChecking() public long getModTime() { HTTPSeekableLineReader reader = null; - SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz"); + SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH); try { reader = get(); From b04e7106f8e3640cbb4ffde7e6d2681b164840b5 Mon Sep 17 00:00:00 2001 From: lintool Date: Sat, 16 Aug 2014 10:05:11 -0400 Subject: [PATCH 59/86] Proposed fix to issue #23: fastutil conflicts in dependencies --- pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pom.xml b/pom.xml index 58220c6d..b7f82619 100644 --- a/pom.xml +++ b/pom.xml @@ -183,12 +183,6 @@ joda-time 1.6 - - fastutil - fastutil - 5.0.7 - compile - From 31251dea86ad8b66fd64415d9473de6a7ed92bce Mon Sep 17 00:00:00 2001 From: Andy Jackson Date: Tue, 2 Sep 2014 12:02:51 +0100 Subject: [PATCH 60/86] Added notes on recent fixes. --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 0d03e277..65d24814 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +1.1.4 +----- +* [All dates should be independent of locale settings](https://github.com/iipc/webarchive-commons/pull/22) +* [Resolved fastutil conflict in dependencies](https://github.com/iipc/webarchive-commons/pull/24) + 1.1.3 ----- * [Synchronised with IA fork](https://github.com/iipc/webarchive-commons/pull/18) From c917e7e91b6a0bb3c75d10c557fa151da0ee165b Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Wed, 10 Sep 2014 09:59:27 +0200 Subject: [PATCH 61/86] [maven-release-plugin] prepare release webarchive-commons-1.1.4 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b7f82619..d6fe68a8 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.4-SNAPSHOT + 1.1.4 jar webarchive-commons From 249131174b60e3c7cf17f44e8d08a19cb4001e5a Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Wed, 10 Sep 2014 09:59:32 +0200 Subject: [PATCH 62/86] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d6fe68a8..6664efd8 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.4 + 1.1.5-SNAPSHOT jar webarchive-commons From f7cd67d9f4ac252385517770fce63dea767f9203 Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Tue, 23 Sep 2014 13:06:44 +0200 Subject: [PATCH 63/86] - Replaced direct references to "/tmp" with generic temporary directory reference using File.createTempFile(). Fixes bug reported in iipc/webarchive-commons Issue #2. --- .../archive/format/gzip/GZIPMemberWriterTest.java | 4 ++-- .../util/binsearch/SortedTextFileTest.java | 2 +- .../iterator/SortedCompositeIteratorTest.java | 15 ++++++--------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java index 5cd75ccf..483d2baf 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java @@ -12,8 +12,8 @@ public class GZIPMemberWriterTest extends TestCase { public void testWrite() throws IOException { - String outPath = "/tmp/tmp.gz"; - GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(new File(outPath))); + File outFile = File.createTempFile("tmp", ".gz"); + GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(outFile)); gzw.write(new ByteArrayInputStream("Here is record 1".getBytes(IAUtils.UTF8))); gzw.write(new ByteArrayInputStream("Here is record 2".getBytes(IAUtils.UTF8))); } diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 2c9d19e8..8f812b75 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -25,7 +25,7 @@ private void createFile(File target, int max) throws FileNotFoundException { public void testGetRecordIteratorStringBoolean() throws IOException { - File test = new File("/tmp/test.tmp"); + File test = File.createTempFile("test", null); int max = 1000000; createFile(test,max); RandomAccessFileSeekableLineReaderFactory factory = diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index f1c2a0ec..0f4dc68a 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; +import java.io.IOException; import java.io.PrintWriter; import java.util.Comparator; @@ -11,21 +12,16 @@ public class SortedCompositeIteratorTest extends TestCase { - public void testHasNext() throws FileNotFoundException { + public void testHasNext() throws FileNotFoundException, IOException { long t = 210000; long c = 134; float f = (float)c / (float)t; System.err.format("F(%f)\n",f); - File a = new File("/tmp/a"); - File b = new File("/tmp/b"); - if(a.isFile()) { - a.delete(); - } - if(b.isFile()) { - b.delete(); - } + File a = File.createTempFile("filea", null); + File b = File.createTempFile("fileb", null); + PrintWriter apw = new PrintWriter(a); PrintWriter bpw = new PrintWriter(b); apw.println("1"); @@ -38,6 +34,7 @@ public void testHasNext() throws FileNotFoundException { BufferedReader bbr = new BufferedReader(new FileReader(b)); SortedCompositeIterator sci = new SortedCompositeIterator(new Comparator() { + @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } From 077abb783d77b8a556112a6617911d0ee7006595 Mon Sep 17 00:00:00 2001 From: thomase Date: Tue, 23 Sep 2014 14:48:48 +0200 Subject: [PATCH 64/86] * changed newline to System.lineSeparator --- .../org/archive/net/PublicSuffixesTest.java | 386 +++++++++--------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index b88acb6d..a82bab22 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -1,193 +1,193 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.net; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.net.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.net; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.net.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + private final String NL = System.lineSeparator(); + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,yokohama,public,assigned,www,", + "jp,yokohama,public,assigned,"); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From 5054060e27da6fef0816efc8b90af06e4e998d9a Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Wed, 24 Sep 2014 10:04:32 +0200 Subject: [PATCH 65/86] Updated release notes. --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 65d24814..db09a463 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +1.1.5 +----- +* [Removed direct reference to Unix TMP-path](https://github.com/iipc/webarchive-commons/issues/2) + 1.1.4 ----- * [All dates should be independent of locale settings](https://github.com/iipc/webarchive-commons/pull/22) From f3e12da0bb53cb4ffb0d21b2d13cda1b6918b1d1 Mon Sep 17 00:00:00 2001 From: Thomas Edvardsen Date: Wed, 24 Sep 2014 10:26:38 +0200 Subject: [PATCH 66/86] * changed newline from 0d0a to 0a in sourcfile --- .../org/archive/net/PublicSuffixesTest.java | 386 +++++++++--------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index a82bab22..7528bbe1 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -1,193 +1,193 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.net; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.net.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - private final String NL = System.lineSeparator(); - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"com,\"" + NL + - " \"\"" + NL, dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"com,\"" + NL + - " \"edu,\"" + NL + - " \"\"" + NL, dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"*,\"" + NL, dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"!hoge,\"" + NL + - " \"*,\"" + NL + - " \"\"" + NL, dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.net; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.net.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + private final String NL = System.lineSeparator(); + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,yokohama,public,assigned,www,", + "jp,yokohama,public,assigned,"); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From faec599fc4a1cc8f09523e78cab073ed570b8adc Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Wed, 24 Sep 2014 11:03:55 +0200 Subject: [PATCH 67/86] - Removed pointless code. --- .../archive/util/iterator/SortedCompositeIteratorTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index 0f4dc68a..11ea1229 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -14,11 +14,6 @@ public class SortedCompositeIteratorTest extends TestCase { public void testHasNext() throws FileNotFoundException, IOException { - long t = 210000; - long c = 134; - float f = (float)c / (float)t; - System.err.format("F(%f)\n",f); - File a = File.createTempFile("filea", null); File b = File.createTempFile("fileb", null); From 0dd6db16c61abe91924a60a123b1d2894f40bed8 Mon Sep 17 00:00:00 2001 From: Nicholas Clarke Date: Wed, 24 Sep 2014 14:10:13 +0200 Subject: [PATCH 68/86] Correct WARC writer concerning missing CRLF in records with zero payload. --- src/main/java/org/archive/io/warc/WARCWriter.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index b9558263..e2d28ee9 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -245,10 +245,11 @@ public void writeRecord(WARCRecordInfo recordInfo) write(bytes); totalBytes += bytes.length; + // Write out the header/body separator. + write(CRLF_BYTES); + totalBytes += CRLF_BYTES.length; + if (recordInfo.getContentStream() != null && recordInfo.getContentLength() > 0) { - // Write out the header/body separator. - write(CRLF_BYTES); // TODO: should this be written even for zero-length? - totalBytes += CRLF_BYTES.length; contentBytes += copyFrom(recordInfo.getContentStream(), recordInfo.getContentLength(), recordInfo.getEnforceLength()); From 46d0f6ffbad1b02fd7917c0e218eeed6557f3d9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristinn=20Sigur=C3=B0sson?= Date: Tue, 30 Sep 2014 15:10:48 +0000 Subject: [PATCH 69/86] Java 6 compatibility System.lineSeparator() was introducted in Java 7 --- src/test/java/org/archive/net/PublicSuffixesTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index 7528bbe1..ca6e6408 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -36,7 +36,7 @@ */ public class PublicSuffixesTest extends TestCase { // test of low level implementation - private final String NL = System.lineSeparator(); + private final String NL = System.getProperty("line.separator"); public void testCompare() { Node n = new Node("hoge"); From 6556c7f14e54d07f13fe49c4c1bc6ee88c18f134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristinn=20Sigur=C3=B0sson?= Date: Tue, 30 Sep 2014 16:09:54 +0000 Subject: [PATCH 70/86] Change test value to get around Java 8 bug Fixes issue #31 which relates to changes in how Java rounds doubles in some edge cases. --- .../java/org/archive/util/ArchiveUtilsTest.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/archive/util/ArchiveUtilsTest.java b/src/test/java/org/archive/util/ArchiveUtilsTest.java index 8251615a..586a1821 100644 --- a/src/test/java/org/archive/util/ArchiveUtilsTest.java +++ b/src/test/java/org/archive/util/ArchiveUtilsTest.java @@ -229,16 +229,19 @@ public void testByteArrayEquals() { /** test doubleToString() */ public void testDoubleToString(){ - double test = 12.345; - assertTrue( + double test = 12.121d; + assertEquals( "cecking zero precision", - ArchiveUtils.doubleToString(test, 0).equals("12")); - assertTrue( + "12", + ArchiveUtils.doubleToString(test, 0)); + assertEquals( "cecking 2 character precision", - ArchiveUtils.doubleToString(test, 2).equals("12.34")); - assertTrue( + "12.12", + ArchiveUtils.doubleToString(test, 2)); + assertEquals( "cecking precision higher then the double has", - ArchiveUtils.doubleToString(test, 65).equals("12.345")); + "12.121", + ArchiveUtils.doubleToString(test, 65)); } From fbbaab079b06260aa84b8b2d896a34db3a6872e3 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Wed, 1 Oct 2014 12:54:59 +0200 Subject: [PATCH 71/86] Update CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index db09a463..a84f579e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,6 @@ 1.1.5 ----- -* [Removed direct reference to Unix TMP-path](https://github.com/iipc/webarchive-commons/issues/2) +* [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) 1.1.4 ----- From 7914bdf04dbf5d0b431065b650a91773684ae757 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Wed, 1 Oct 2014 12:58:31 +0200 Subject: [PATCH 72/86] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index a84f579e..8e787634 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ 1.1.5 ----- * [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) +* [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) 1.1.4 ----- From 166656eb4b0dbfb16611a8b74e79c35b8954e72a Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Mon, 6 Oct 2014 14:03:17 +0200 Subject: [PATCH 73/86] Added method to UsableUri to get the IDN in non-puny form --- src/main/java/org/archive/url/UsableURI.java | 48 +++++++++++++++++++ .../java/org/archive/url/UsableURITest.java | 12 +++++ 2 files changed, 60 insertions(+) diff --git a/src/main/java/org/archive/url/UsableURI.java b/src/main/java/org/archive/url/UsableURI.java index b9c4ff9d..fa1de57a 100644 --- a/src/main/java/org/archive/url/UsableURI.java +++ b/src/main/java/org/archive/url/UsableURI.java @@ -18,6 +18,7 @@ */ package org.archive.url; +import gnu.inet.encoding.IDNA; import java.io.File; import java.io.IOException; import java.io.ObjectOutputStream; @@ -271,6 +272,53 @@ public String toString() { return toCustomString(); } + /** + * In the case of a puny encoded IDN, this method returns the decoded Unicode version. + * @return decoded IDN version of URI + */ + public String toUnicodeHostString() { + if (!_is_hostname) { + return toString(); + } + + try { + StringBuilder buf = new StringBuilder(); + + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + if (_userinfo != null) { + buf.append(_userinfo).append('@'); + } + buf.append(IDNA.toUnicode(getHost())); + if (_port >= 0) { + buf.append(':').append(_port); + } + this._authority = buf.toString().toCharArray(); + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + return buf.toString(); + } catch (URIException ex) { + throw new RuntimeException(ex); + } + } + public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 2aec0e96..7588f03c 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -53,4 +53,16 @@ public void testSchemalessRelative() throws URIException { UsableURI test = new UsableURI(base, relative); assertEquals("http://www.facebook.com/?href=http://www.archive.org/a", test.toString()); } + + /** + * Test of toUnicodeHostString method, of class UsableURI. + */ + public void testToUnicodeHostString() throws URIException { + assertEquals("http://øx.dk", new UsableURI("http://xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("xn--x-4ga.dk", new UsableURI("xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user:pass@øx.dk:8080", new UsableURI("http://user:pass@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user@øx.dk:8080", new UsableURI("http://user@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://øx.dk/foo/bar?query=q", new UsableURI("http://xn--x-4ga.dk/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://127.0.0.1/foo/bar?query=q", new UsableURI("http://127.0.0.1/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + } } From 619412c284baf78e8fbb3e2391687e226c4ea0f1 Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Mon, 10 Nov 2014 12:12:07 +0100 Subject: [PATCH 74/86] Fixed bug which changed the URI after calling toUnicodeHostString. --- src/main/java/org/archive/url/UsableURI.java | 4 +++- src/test/java/org/archive/url/UsableURITest.java | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/archive/url/UsableURI.java b/src/main/java/org/archive/url/UsableURI.java index fa1de57a..ed40f41a 100644 --- a/src/main/java/org/archive/url/UsableURI.java +++ b/src/main/java/org/archive/url/UsableURI.java @@ -274,6 +274,9 @@ public String toString() { /** * In the case of a puny encoded IDN, this method returns the decoded Unicode version. + *

+ * Most of this implementation is copied from {@link org.apache.commons.httpclient.URI#setURI()}. + * * @return decoded IDN version of URI */ public String toUnicodeHostString() { @@ -298,7 +301,6 @@ public String toUnicodeHostString() { if (_port >= 0) { buf.append(':').append(_port); } - this._authority = buf.toString().toCharArray(); } } if (_opaque != null && _is_opaque_part) { diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 7588f03c..73694f79 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -64,5 +64,20 @@ public void testToUnicodeHostString() throws URIException { assertEquals("http://user@øx.dk:8080", new UsableURI("http://user@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); assertEquals("http://øx.dk/foo/bar?query=q", new UsableURI("http://xn--x-4ga.dk/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); assertEquals("http://127.0.0.1/foo/bar?query=q", new UsableURI("http://127.0.0.1/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + + // test idn round trip + // XXX fails because idn is not handled here (it is converted to punycode in UsableURIFactory.fixupDomainlabel()) + // assertEquals("http://øx.dk", new UsableURI("http://øx.dk", false, "UTF-8").toUnicodeHostString()); + // To check the round trip it is then necessary to use the factory method in UsableURIFactory. + assertEquals("http://øx.dk/", UsableURIFactory.getInstance("http://øx.dk/", "UTF-8").toUnicodeHostString()); + + // non-idn domain name + assertEquals("http://example.org", new UsableURI("http://example.org", true, "UTF-8").toUnicodeHostString()); + + // ensure a call to toUnicodeHostString() has no effect on toString() + UsableURI uri = new UsableURI("http://xn--x-4ga.dk", true, "UTF-8"); + assertEquals("http://øx.dk", uri.toUnicodeHostString()); + uri.setPath(uri.getPath()); // force toString() cached value to be recomputed + assertEquals("http://xn--x-4ga.dk", uri.toString()); } } From 61f5a8cb7233f48196ea8fa305492d6b9f637b7f Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Mon, 10 Nov 2014 12:14:08 +0100 Subject: [PATCH 75/86] Fixed bug that prevented the https scheme from using static string. --- src/main/java/org/archive/url/LaxURI.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 807333d3..e1cea9b7 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -211,7 +211,7 @@ protected void setURI() { if (_scheme.length == 4 && Arrays.equals(_scheme, HTTP_SCHEME)) { _scheme = HTTP_SCHEME; } else if (_scheme.length == 5 - && Arrays.equals(_scheme, HTTP_SCHEME)) { + && Arrays.equals(_scheme, HTTPS_SCHEME)) { _scheme = HTTPS_SCHEME; } } From 6b7971f86eda7255c1d5ab05f7883da30db7fced Mon Sep 17 00:00:00 2001 From: John Erik Halse Date: Mon, 10 Nov 2014 15:39:15 +0100 Subject: [PATCH 76/86] Removed unnecessary import --- src/test/java/org/archive/url/UsableURITest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 73694f79..2a2f41f5 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -21,7 +21,6 @@ import java.net.URISyntaxException; import org.apache.commons.httpclient.URIException; -import org.archive.url.UsableURI; import junit.framework.TestCase; From 363a3c51b40a5d559bfa6eb7d2f038b9258f577a Mon Sep 17 00:00:00 2001 From: Gerhard Gossen Date: Wed, 17 Dec 2014 16:39:24 +0100 Subject: [PATCH 77/86] Improve URL escaping in CDX writer --- .../extract/RealCDXExtractorOutput.java | 9 ++++-- .../extract/RealCDXExtractorOutputTest.java | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 62a423c5..8ca3ff82 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -4,6 +4,7 @@ import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; +import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.List; @@ -307,12 +308,14 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) { return "-"; } - private String resolve(String context, String spec) { + static String resolve(String context, String spec) { // TODO: test! try { URL cUrl = new URL(context); - URL resolved = new URL(cUrl,spec); - return resolved.toURI().toASCIIString(); + URL url = new URL(cUrl, spec); + // this constructor escapes its arguments, if necessary + URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), url.getRef()); + return uri.toASCIIString(); } catch (URISyntaxException e) { } catch (MalformedURLException e) { diff --git a/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java new file mode 100644 index 00000000..14f8489d --- /dev/null +++ b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java @@ -0,0 +1,28 @@ +package org.archive.extract; + +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLEncoder; + +import junit.framework.TestCase; + + +public class RealCDXExtractorOutputTest extends TestCase { + + public void testEscapeResolvedUrl() throws Exception { + String context ="http://www.uni-giessen.de/cms/studium/dateien/informationberatung/merkblattpdf"; + String spec = "http://fss.plone.uni-giessen.de/fß/studium/dateien/informationberatung/merkblattpdf/file/Mérkblatt zur Gestaltung von Nachteilsausgleichen.pdf?föo=bar#änchor"; + String escaped = RealCDXExtractorOutput.resolve(context, spec); + assertTrue(escaped.indexOf(" ") < 0); + URI parsed = new URI(escaped); + assertEquals("änchor", parsed.getFragment()); + } + + public void testNoDoubleEscaping() throws Exception { + String spec = "https://www.google.com/search?q=java+escape+url+spaces&ie=utf-8&oe=utf-8"; + String resolved = RealCDXExtractorOutput.resolve(spec, spec); + assertTrue(spec.equals(resolved)); + } +} From 1ee18d8a426a0b18aa502f71896d9962416262a0 Mon Sep 17 00:00:00 2001 From: Gerhard Gossen Date: Wed, 17 Dec 2014 17:12:42 +0100 Subject: [PATCH 78/86] Update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 8e787634..7fb2f7c4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,6 @@ 1.1.5 ----- +* [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36) * [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) * [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) From f130aad04b255e7d8cd4eee4bac86c25b0cbbf36 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 15:54:59 -0800 Subject: [PATCH 79/86] move RecordingOutputStreamTest.java from heritrix to webarchive-commons --- .../archive/io/RecordingOutputStreamTest.java | 260 ++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 src/test/java/org/archive/io/RecordingOutputStreamTest.java diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java new file mode 100644 index 00000000..1c53549b --- /dev/null +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -0,0 +1,260 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.archive.util.TmpDirTestCase; + + +/** + * Test casesfor RecordingOutputStream. + * + * @author stack + */ +public class RecordingOutputStreamTest extends TmpDirTestCase +{ + /** + * Size of buffer used in tests. + */ + private static final int BUFFER_SIZE = 5; + + /** + * How much to write total to testing RecordingOutputStream. + */ + private static final int WRITE_TOTAL = 10; + + + /* + * @see TmpDirTestCase#setUp() + */ + protected void setUp() throws Exception + { + super.setUp(); + } + + /** + * Test reusing instance of RecordingOutputStream. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testReuse() + throws IOException + { + final String BASENAME = "testReuse"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Bkg.txt")).getAbsolutePath()); + for (int i = 0; i < 3; i++) + { + reuse(BASENAME, ros, i); + } + } + + private void reuse(String baseName, RecordingOutputStream ros, int index) + throws IOException + { + final String BASENAME = baseName + Integer.toString(index); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(int). + * + * Uses small buffer size and small write size. Test mark and reset too. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWriteint() + throws IOException + { + final String BASENAME = "testWriteint"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(byte []). + * + * Uses small buffer size and small write size. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + public void testWritebytearray() + throws IOException + { + final String BASENAME = "testWritebytearray"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Test mark and reset. + * @throws IOException + */ + public void testMarkReset() throws IOException + { + final String BASENAME = "testMarkReset"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + ReplayInputStream ris = ros.getReplayInputStream(); + ris.mark(10 /*Arbitrary value*/); + // Read from the stream. + ris.read(); + ris.read(); + ris.read(); + // Reset it. It should be back at zero. + ris.reset(); + assertEquals("Reset to zero", ris.read(), 0); + assertEquals("Reset to zero char 1", ris.read(), 1); + assertEquals("Reset to zero char 2", ris.read(), 2); + // Mark stream. Here. Next character should be '3'. + ris.mark(10 /* Arbitrary value*/); + ris.read(); + ris.read(); + ris.reset(); + assertEquals("Reset to zero char 3", ris.read(), 3); + } + + /** + * Record a file write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes a character at a time. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeIntRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + for (int i = 0; i < WRITE_TOTAL; i++) + { + ros.write(i); + } + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Record a file byte array write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

This method writes using a byte array. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeByteRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(getTmpDir(), basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + byte [] b = new byte[size]; + for (int i = 0; i < size; i++) + { + b[i] = (byte)i; + } + ros.write(b); + ros.close(); + fos.close(); + assertEquals("Content-Length test", size, + ros.getResponseContentLength()); + return f; + } + + /** + * Verify what was written is both in the file written to and in the + * recording stream. + * + * @param ros Stream to check. + * @param f File that was recorded. Stream should have its content + * exactly. + * @param size Amount of bytes written. + * + * @exception IOException Failure reading streams. + */ + private void verifyRecording(RecordingOutputStream ros, File f, + int size) throws IOException + { + assertEquals("Recorded file size.", size, f.length()); + FileInputStream fis = new FileInputStream(f); + assertNotNull("FileInputStream not null", fis); + ReplayInputStream ris = ros.getReplayInputStream(); + assertNotNull("ReplayInputStream not null", ris); + for (int i = 0; i < size; i++) + { + assertEquals("ReplayInputStream content verification", i, + ris.read()); + assertEquals("Recorded file content verification", i, + fis.read()); + } + assertEquals("ReplayInputStream at EOF", -1, ris.read()); + fis.close(); + ris.close(); + } +} From da5d63d41d83fe4d5ea6d14165830e75c568c9a2 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 15:58:31 -0800 Subject: [PATCH 80/86] fix for https://github.com/iipc/webarchive-commons/issues/38 - detect end of http protocol headers in a smarter way, to avoid calling write(byte) repeatedly; add unit tests --- .../org/archive/io/RecordingOutputStream.java | 49 +++++++-- .../archive/io/RecordingOutputStreamTest.java | 100 ++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java index fe05701c..7d2ff212 100644 --- a/src/main/java/org/archive/io/RecordingOutputStream.java +++ b/src/main/java/org/archive/io/RecordingOutputStream.java @@ -242,6 +242,26 @@ public void write(int b) throws IOException { checkLimits(); } + private int findMessageBodyBeginMark(byte[] b, int off, int len) { + if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r') + && len >= 1 && b[off] == '\n') { + return 1; + } else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') { + return 2; + } + + for (int i = off; i < off + len - 1; i++) { + if (b[i] == '\n' && b[i+1] == '\n') { + return i + 2; + } else if (b[i] == '\n' && b[i+1] == '\r' + && i + 2 < off + len && b[i+2] == '\n') { + return i + 3; + } + } + + return -1; + } + public void write(byte[] b, int off, int len) throws IOException { if(position < maxPosition) { if(position+len<=maxPosition) { @@ -255,20 +275,35 @@ public void write(byte[] b, int off, int len) throws IOException { off += consumeRange; len -= consumeRange; } - - // see comment on int[] lastTwoBytes - while (messageBodyBeginMark < 0 && len > 0) { - write(b[off]); - off++; - len--; + + if (messageBodyBeginMark < 0) { + // see comment on int[] lastTwoBytes + int mark = findMessageBodyBeginMark(b, off, len); + if (mark > 0) { + if(recording) { + record(b, off, mark - off); + } + if (this.out != null) { + this.out.write(b, off, mark - off); + } + markMessageBodyBegin(); + len = len - (mark - off); + off = mark; + } } - + if(recording) { record(b, off, len); } if (this.out != null) { this.out.write(b, off, len); } + if (len >= 1) { + lastTwoBytes[1] = b[off + len - 1]; + if (len >= 2) { + lastTwoBytes[0] = b[off + len - 2]; + } + } checkLimits(); } diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java index 1c53549b..f697ff31 100644 --- a/src/test/java/org/archive/io/RecordingOutputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -18,11 +18,13 @@ */ package org.archive.io; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import org.archive.util.Base32; import org.archive.util.TmpDirTestCase; @@ -257,4 +259,102 @@ private void verifyRecording(RecordingOutputStream ros, File f, fis.close(); ris.close(); } + + public void testMessageBodyBegin() throws IOException { + final String BASENAME = "testMessageBodyBegin"; + cleanUpOldFiles(BASENAME); + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + ros.setSha1Digest(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\r\n\r\nabcdefghij".getBytes()); + assertEquals(14, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\r\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes()); + ros.write('\n'); + ros.write('\r'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes()) { + ros.write(b); + } + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes()); + ros.write('\n'); + ros.write("abcdefghij".getBytes()); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + } } From 808dcfe76002ebc126c168abb5b6f00b5d3b7e07 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 16:08:48 -0800 Subject: [PATCH 81/86] move TmpDirTestCase.java from heritrix to webarchive-commons --- .../java/org/archive/util/TmpDirTestCase.java | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/main/java/org/archive/util/TmpDirTestCase.java diff --git a/src/main/java/org/archive/util/TmpDirTestCase.java b/src/main/java/org/archive/util/TmpDirTestCase.java new file mode 100644 index 00000000..09ec345b --- /dev/null +++ b/src/main/java/org/archive/util/TmpDirTestCase.java @@ -0,0 +1,119 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.util; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; + + +/** + * Base class for TestCases that want access to a tmp dir for the writing + * of files. + * + * @author stack + */ +public abstract class TmpDirTestCase extends TestCase +{ + /** + * Name of the system property that holds pointer to tmp directory into + * which we can safely write files. + */ + public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir"; + + /** + * Default test tmp. + */ + public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" + + File.separator + "heritrix-junit-tests"; + + /** + * Directory to write temporary files to. + */ + private File tmpDir = null; + + + public TmpDirTestCase() + { + super(); + } + + public TmpDirTestCase(String testName) + { + super(testName); + } + + /* + * @see TestCase#setUp() + */ + protected void setUp() throws Exception { + super.setUp(); + this.tmpDir = tmpDir(); + } + + /** + * @return Returns the tmpDir. + */ + public File getTmpDir() + { + return this.tmpDir; + } + + /** + * Delete any files left over from previous run. + * + * @param basename Base name of files we're to clean up. + */ + public void cleanUpOldFiles(String basename) { + cleanUpOldFiles(getTmpDir(), basename); + } + + /** + * Delete any files left over from previous run. + * + * @param prefix Base name of files we're to clean up. + * @param basedir Directory to start cleaning in. + */ + public void cleanUpOldFiles(File basedir, String prefix) { + File [] files = FileUtils.getFilesWithPrefix(basedir, prefix); + if (files != null) { + for (int i = 0; i < files.length; i++) { + org.apache.commons.io.FileUtils.deleteQuietly(files[i]); + } + } + } + + + public static File tmpDir() throws IOException { + String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME); + tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr; + File tmpDir = new File(tmpDirStr); + FileUtils.ensureWriteableDirectory(tmpDir); + + if (!tmpDir.canWrite()) + { + throw new IOException(tmpDir.getAbsolutePath() + + " is unwriteable."); + } + + return tmpDir; + } +} From eda46e2554f52d0514de04b6624f81964e67289d Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 6 Jan 2015 16:24:39 -0800 Subject: [PATCH 82/86] update junit dependency since TmpDirTestCase.java is not in the "test" area --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6664efd8..df8d0928 100644 --- a/pom.xml +++ b/pom.xml @@ -65,7 +65,6 @@ junit junit 3.8.1 - test From c77d6f5b0dcd899f5adff3db8eab87319cc162ed Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Tue, 27 Jan 2015 14:55:45 -0800 Subject: [PATCH 83/86] update CHANGES.md --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 7fb2f7c4..b872846d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ * [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36) * [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) * [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) +* [RecordingOutputStream can affect tcp packets sent in an undesirable way](https://github.com/iipc/webarchive-commons/issues/38) 1.1.4 ----- From 5df4d91d8cb7c4c2943c318eb44cb9579ac55597 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 4 Feb 2015 10:10:11 +0000 Subject: [PATCH 84/86] [maven-release-plugin] prepare release webarchive-commons-1.1.5 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index df8d0928..0ed119b8 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.5-SNAPSHOT + 1.1.5 jar webarchive-commons From 98c6d1a7e00479fc30715bdb848575018b28abcc Mon Sep 17 00:00:00 2001 From: Hunter Stern Date: Fri, 20 Mar 2015 15:39:53 -0700 Subject: [PATCH 85/86] Allow backslash path separator to be used for canonicalizing url. For https://webarchive.jira.com/browse/ARI-4246 --- src/main/java/org/archive/url/BasicURLCanonicalizer.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index c09ad6e6..5f39ce76 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -74,15 +74,15 @@ public void canonicalize(HandyURL url) { url.setPath(escapeOnce(normalizePath(path))); } - private static final Pattern SINGLE_FORWARDSLASH_PATTERN = Pattern - .compile("/"); + private static final Pattern SINGLE_FORWARDANDBACKSLASH_PATTERN = Pattern + .compile("[/\\\\]"); public String normalizePath(String path) { if (path == null) { path = "/"; } else { // -1 gives an empty trailing element if path ends with '/': - String[] paths = SINGLE_FORWARDSLASH_PATTERN.split(path, -1); + String[] paths = SINGLE_FORWARDANDBACKSLASH_PATTERN.split(path, -1); ArrayList keptPaths = new ArrayList(); boolean first = true; for (String p : paths) { From 92f434e24e038e4f7c81087023a7463b4a009998 Mon Sep 17 00:00:00 2001 From: Kenji Nagahashi Date: Wed, 25 Mar 2015 22:31:54 -0700 Subject: [PATCH 86/86] Changes for local build: - add "-IA" version suffix - restore distributionManagement --- pom.xml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 0ed119b8..222a4c78 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.5 + 1.1.5-IA jar webarchive-commons @@ -243,7 +243,6 @@ -