From 7a1a39b39b7e2dd886a5bfa15d5b65f2c05f5e18 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Tue, 23 Apr 2013 11:44:04 +0200 Subject: [PATCH 01/28] Added a suitable travis-ci config file. --- .travis.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..52ea3bf1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: java +jdk: + - oraclejdk7 +before_script: + - "echo $JAVA_OPTS" + - "export JAVA_OPTS=-Xmx1024m" + - "echo $JAVA_OPTS" + - "ulimit -a" + - "ulimit -u 2048" + - "ulimit -a" From 299281f5ec54b007ea61b74edd690bd05db6399d Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 09:18:00 +0100 Subject: [PATCH 02/28] Initial build under org.netpreserve Renamed the project, resolved some dependencies which were only held at IA. This did involve updating the code very slightly, as one of the dependant classes had moved package. --- LICENSE | 191 ++++++++++++++++++ README.md | 4 + pom.xml | 43 ++-- .../org/archive/url/UsableURIFactory.java | 2 +- 4 files changed, 212 insertions(+), 28 deletions(-) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..37ec93a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..b70f8318 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +OpenWayback Web Commons +======================= + +This repository contains common utility code for the OpenWayback project. diff --git a/pom.xml b/pom.xml index cbdb154b..c3a7a358 100644 --- a/pom.xml +++ b/pom.xml @@ -2,13 +2,20 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.archive - ia-web-commons - 1.0-SNAPSHOT + + org.sonatype.oss + oss-parent + 7 + + + + org.netpreserve.commons + web-commons + 1.0.0-SNAPSHOT jar - ia-web-commons - http://maven.apache.org + iipc-web-commons + https://github.com/iipc/iipc-web-commons UTF-8 @@ -42,7 +49,7 @@ - org.mozilla + com.googlecode.juniversalchardet juniversalchardet 1.0.3 @@ -115,8 +122,8 @@ it.unimi.dsi - mg4j - 1.0.1 + dsiutils + 2.0.12 compile @@ -139,7 +146,7 @@ jar-with-dependencies - ia-web-commons + iipc-web-commons @@ -160,24 +167,6 @@ - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - cloudera Cloudera Hadoop diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 46b8e119..9118b850 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,7 +20,7 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.mg4j.util.MutableString; +import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; From 7f7054287b86650adb6955f43a38311e913152f9 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 13:08:36 +0100 Subject: [PATCH 03/28] Copied PublicSuffixes code into web-commons from heritrix-commons. --- .../java/org/archive/url/PublicSuffixes.java | 363 ++ src/main/resources/effective_tld_names.dat | 5229 +++++++++++++++++ .../org/archive/url/PublicSuffixesTest.java | 193 + 3 files changed, 5785 insertions(+) create mode 100644 src/main/java/org/archive/url/PublicSuffixes.java create mode 100644 src/main/resources/effective_tld_names.dat create mode 100644 src/test/java/org/archive/url/PublicSuffixesTest.java diff --git a/src/main/java/org/archive/url/PublicSuffixes.java b/src/main/java/org/archive/url/PublicSuffixes.java new file mode 100644 index 00000000..7c3df6b8 --- /dev/null +++ b/src/main/java/org/archive/url/PublicSuffixes.java @@ -0,0 +1,363 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.url; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.IOUtils; +import org.archive.util.TextUtils; + +/** + * Utility class for making use of the information about 'public suffixes' at + * http://publicsuffix.org. + * + * The public suffix list (once known as 'effective TLDs') was motivated by the + * need to decide on which broader domains a subdomain was allowed to set + * cookies. For example, a server at 'www.example.com' can set cookies for + * 'www.example.com' or 'example.com' but not 'com'. 'www.example.co.uk' can set + * cookies for 'www.example.co.uk' or 'example.co.uk' but not 'co.uk' or 'uk'. + * The number of rules for all top-level-domains and 2nd- or 3rd- level domains + * has become quite long; essentially the broadest domain a subdomain may assign + * to is the one that was sold/registered to a specific name registrant. + * + * This concept should be useful in other contexts, too. Grouping URIs (or + * queues of URIs to crawl) together with others sharing the same registered + * suffix may be useful for applying the same rules to all, such as assigning + * them to the same queue or crawler in a multi- machine setup. + * + * As of Heritrix3, we prefer the term 'Assignment Level Domain' (ALD) + * for such domains, by analogy to 'Top Level Domain' (TLD) or '2nd Level + * Domain' (2LD), etc. + * + * @author Gojomo + * + * this version of PublicSuffixes uses suffix-tree data structure for generating less + * redundant regular expression. It may be even possible to write a light-weight, + * thread-safe matcher based on this class. + * @author Kenji Nagahashi + */ +public class PublicSuffixes { + protected static Pattern topmostAssignedSurtPrefixPattern; + protected static String topmostAssignedSurtPrefixRegex; + + /** + * prefix tree node. each Node represents sequence of letters (prefix) + * and alternative sequences following it (list of Node's). Nodes in + * {@code branches} are sorted for skip list like lookup and for generating + * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char).) + * + * as is intended for internal use only, there's no access methods. procedures for updating + * prefix tree with new input are defined within this class ({@link #addBranch(CharSequence)}). + * + * terminal node could be represented in two different form: 1) Node with zero branches, + * or 2) Node with zero-length {@code cs}. So, root node must be initialized with empty (not null) + * {@code branches} unless empty string matches the overall pattern. + * {@code cs} must not be null except for root node. + */ + public static class Node implements Comparable { + protected CharSequence cs; + protected List branches; + public Node() { + this("", null); + } + protected Node(CharSequence cs) { + this(cs, null); + } + protected Node(CharSequence cs, List branches) { + this.cs = cs; + this.branches = branches; + } + public void addBranch(CharSequence s) { + if (branches == null) { + branches = new ArrayList(); + branches.add(new Node("", null)); + } + for (int i = 0; i < branches.size(); i++) { + Node alt = branches.get(i); + if (alt.add(s)) return; + if (alt.compareTo(s.charAt(0)) > 0) { + Node alt1 = new Node(s, null); + branches.add(i, alt1); + return; + } + } + Node alt2 = new Node(s, null); + branches.add(alt2); + } + public boolean add(CharSequence s) { + int l = Math.min(s.length(), cs.length()); + int i = 0; + while (i < l && s.charAt(i) == cs.charAt(i)) + i++; + // zero-length match holds only when both cs and s are empty. + if (i == 0) return cs.length() == 0 && s.length() == 0; + if (i < cs.length()) { + CharSequence cs0 = cs.subSequence(0, i); + CharSequence cs1 = cs.subSequence(i, cs.length()); + CharSequence cs2 = s.subSequence(i, s.length()); + cs = cs0; + Node alt1 = new Node(cs1, branches); + (branches = new ArrayList()).add(alt1); + addBranch(cs2); + } else { + assert i == cs.length(); + addBranch(s.subSequence(i, s.length())); + } + return true; + } + public int compareTo(Node other) { + if (other.cs == null || other.cs.length() == 0) + return (cs == null || cs.length() == 0) ? 0 : -1; + return compareTo(other.cs.charAt(0)); + } + public int compareTo(char oc) { + if (cs == null || cs.length() == 0) return 1; + // '!' and '*' must come after ordinary letters, in this order, for regexp + // to work as intended. + char c = cs.charAt(0); + if (c == oc) return 0; + if (c == '!') return oc == '*' ? -1 : 1; + if (c == '*') return 1; + if (oc == '*' || oc == '!') return -1; + return Character.valueOf(c).compareTo(oc); + // for generating the same regexp as previous version. + //return Character.valueOf(oc).compareTo(c); + } + } + + /** + * Utility method for dumping a regex String, based on a published public + * suffix list, which matches any SURT-form hostname up through the broadest + * 'private' (assigned/sold) domain-segment. That is, for any of the + * SURT-form hostnames... + * + * com,example, com,example,www, com,example,california,www + * + * ...the regex will match 'com,example,'. + * + * @param args + * @throws IOException + */ + public static void main(String args[]) throws IOException { + InputStream is; + if (args.length == 0 || "=".equals(args[0])) { + // use bundled list + is = PublicSuffixes.class.getClassLoader().getResourceAsStream( + "effective_tld_names.dat"); + } else { + is = new FileInputStream(args[0]); + } + BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + String regex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(is); + + boolean needsClose = false; + BufferedWriter writer; + if (args.length >= 2) { + // write to specified file + writer = new BufferedWriter(new FileWriter(args[1])); + needsClose = true; + } else { + // write to stdout + writer = new BufferedWriter(new OutputStreamWriter(System.out)); + } + writer.append(regex); + writer.flush(); + if (needsClose) { + writer.close(); + } + } + /** + * Reads a file of the format promulgated by publicsuffix.org, ignoring + * comments and '!' exceptions/notations, converting domain segments to + * SURT-ordering. Leaves glob-style '*' wildcarding in place. Returns root + * node of SURT-ordered prefix tree. + * + * @param reader + * @return root of prefix tree node. + * @throws IOException + */ + protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws IOException { + // initializing with empty Alt list prevents empty pattern from being + // created for the first addBranch() + Node alt = new Node(null, new ArrayList()); + String line; + while ((line = reader.readLine()) != null) { + // discard whitespace, empty lines, comments, exceptions + line = line.trim(); + if (line.length() == 0 || line.startsWith("//")) continue; + // discard utf8 notation after entry + line = line.split("\\s+")[0]; + // TODO: maybe we don't need to create lower-cased String + line = line.toLowerCase(); + // SURT-order domain segments + String[] segs = line.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = segs.length - 1; i >= 0; i--) { + if (segs[i].length() == 0) continue; + sb.append(segs[i]).append(','); + } + alt.addBranch(sb.toString()); + } + return alt; + } + /** + * utility function for dumping prefix tree structure. intended for debug use. + * @param alt root of prefix tree. + * @param lv indent level. 0 for root (no indent). + * @param out writer to send output to. + */ + public static void dump(Node alt, int lv, PrintWriter out) { + for (int i = 0; i < lv; i++) + out.print(" "); + out.println(alt.cs != null ? ('"'+alt.cs.toString()+'"') : "(null)"); + if (alt.branches != null) { + for (Node br : alt.branches) { + dump(br, lv + 1, out); + } + } + } + /** + * bulids regular expression from prefix-tree {@code alt} into buffer {@code sb}. + * @param alt prefix tree root. + * @param sb StringBuffer to store regular expression. + */ + protected static void buildRegex(Node alt, StringBuilder sb) { + String close = null; + if (alt.cs != null) { + // actually '!' always be the first character, because it is + // always used along with '*'. + for (int i = 0; i < alt.cs.length(); i++) { + char c = alt.cs.charAt(i); + if (c == '!') { + if (close != null) + throw new RuntimeException("more than one '!'"); + sb.append("(?="); + close = ")"; + } else if (c == '*') { + sb.append("[-\\w]+"); + } else { + sb.append(c); + } + } + } + if (alt.branches != null) { + // alt.branches.size() should always be > 1 + if (alt.branches.size() > 1) { + sb.append("(?:"); + } + String sep = ""; + for (Node alt1 : alt.branches) { + sb.append(sep); sep = "|"; + buildRegex(alt1, sb); + } + if (alt.branches.size() > 1) { + sb.append(")"); + } + } + if (close != null) + sb.append(close); + } + + /** + * Converts SURT-ordered list of public prefixes into a Java regex which + * matches the public-portion "plus one" segment, giving the domain on which + * cookies can be set or other policy grouping should occur. Also adds to + * regex a fallback matcher that for any new/unknown TLDs assumes the + * second-level domain is assignable. (Eg: 'zzz,example,'). + * + * @param list + * @return + */ + private static String surtPrefixRegexFromTrie(Node trie) { + StringBuilder regex = new StringBuilder(); + regex.append("(?ix)^\n"); + trie.addBranch("*,"); // for new/unknown TLDs + buildRegex(trie, regex); + regex.append("\n([-\\w]+,)"); + return regex.toString(); + } + + public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { + if (topmostAssignedSurtPrefixPattern == null) { + topmostAssignedSurtPrefixPattern = Pattern + .compile(getTopmostAssignedSurtPrefixRegex()); + } + return topmostAssignedSurtPrefixPattern; + } + + public static synchronized String getTopmostAssignedSurtPrefixRegex() { + if (topmostAssignedSurtPrefixRegex == null) { + // use bundled list + try { + BufferedReader reader = new BufferedReader(new InputStreamReader( + PublicSuffixes.class.getClassLoader().getResourceAsStream( + "effective_tld_names.dat"), "UTF-8")); + topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(reader); + } catch (UnsupportedEncodingException ex) { + // should never happen + throw new RuntimeException(ex); + } + } + return topmostAssignedSurtPrefixRegex; + } + + public static String getTopmostAssignedSurtPrefixRegex(BufferedReader reader) { + try { + Node trie = readPublishedFileToSurtTrie(reader); + return surtPrefixRegexFromTrie(trie); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Truncate SURT to its topmost assigned domain segment; that is, + * the public suffix plus one segment, but as a SURT-ordered prefix. + * + * if the pattern doesn't match, the passed-in SURT is returned. + * + * @param surt SURT to truncate + * @return truncated-to-topmost-assigned SURT prefix + */ + public static String reduceSurtToAssignmentLevel(String surt) { + Matcher matcher = TextUtils.getMatcher( + getTopmostAssignedSurtPrefixRegex(), surt); + if (matcher.find()) { + surt = matcher.group(); + } + TextUtils.recycleMatcher(matcher); + return surt; + } +} diff --git a/src/main/resources/effective_tld_names.dat b/src/main/resources/effective_tld_names.dat new file mode 100644 index 00000000..2c201312 --- /dev/null +++ b/src/main/resources/effective_tld_names.dat @@ -0,0 +1,5229 @@ +// ***** BEGIN LICENSE BLOCK ***** +// Version: MPL 1.1/GPL 2.0/LGPL 2.1 +// +// The contents of this file are subject to the Mozilla Public License Version +// 1.1 (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// http://www.mozilla.org/MPL/ +// +// Software distributed under the License is distributed on an "AS IS" basis, +// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +// for the specific language governing rights and limitations under the +// License. +// +// The Original Code is the Public Suffix List. +// +// The Initial Developer of the Original Code is +// Jo Hermans . +// Portions created by the Initial Developer are Copyright (C) 2007 +// the Initial Developer. All Rights Reserved. +// +// Contributor(s): +// Ruben Arakelyan +// Gervase Markham +// Pamela Greene +// David Triendl +// Jothan Frakes +// The kind representatives of many TLD registries +// +// Alternatively, the contents of this file may be used under the terms of +// either the GNU General Public License Version 2 or later (the "GPL"), or +// the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +// in which case the provisions of the GPL or the LGPL are applicable instead +// of those above. If you wish to allow use of your version of this file only +// under the terms of either the GPL or the LGPL, and not to allow others to +// use your version of this file under the terms of the MPL, indicate your +// decision by deleting the provisions above and replace them with the notice +// and other provisions required by the GPL or the LGPL. If you do not delete +// the provisions above, a recipient may use your version of this file under +// the terms of any one of the MPL, the GPL or the LGPL. +// +// ***** END LICENSE BLOCK ***** + +// ===BEGIN ICANN DOMAINS=== + +// ac : http://en.wikipedia.org/wiki/.ac +ac +com.ac +edu.ac +gov.ac +net.ac +mil.ac +org.ac + +// ad : http://en.wikipedia.org/wiki/.ad +ad +nom.ad + +// ae : http://en.wikipedia.org/wiki/.ae +// see also: "Domain Name Eligibility Policy" at http://www.aeda.ae/eng/aepolicy.php +ae +co.ae +net.ae +org.ae +sch.ae +ac.ae +gov.ae +mil.ae + +// aero : see http://www.information.aero/index.php?id=66 +aero +accident-investigation.aero +accident-prevention.aero +aerobatic.aero +aeroclub.aero +aerodrome.aero +agents.aero +aircraft.aero +airline.aero +airport.aero +air-surveillance.aero +airtraffic.aero +air-traffic-control.aero +ambulance.aero +amusement.aero +association.aero +author.aero +ballooning.aero +broker.aero +caa.aero +cargo.aero +catering.aero +certification.aero +championship.aero +charter.aero +civilaviation.aero +club.aero +conference.aero +consultant.aero +consulting.aero +control.aero +council.aero +crew.aero +design.aero +dgca.aero +educator.aero +emergency.aero +engine.aero +engineer.aero +entertainment.aero +equipment.aero +exchange.aero +express.aero +federation.aero +flight.aero +freight.aero +fuel.aero +gliding.aero +government.aero +groundhandling.aero +group.aero +hanggliding.aero +homebuilt.aero +insurance.aero +journal.aero +journalist.aero +leasing.aero +logistics.aero +magazine.aero +maintenance.aero +marketplace.aero +media.aero +microlight.aero +modelling.aero +navigation.aero +parachuting.aero +paragliding.aero +passenger-association.aero +pilot.aero +press.aero +production.aero +recreation.aero +repbody.aero +res.aero +research.aero +rotorcraft.aero +safety.aero +scientist.aero +services.aero +show.aero +skydiving.aero +software.aero +student.aero +taxi.aero +trader.aero +trading.aero +trainer.aero +union.aero +workinggroup.aero +works.aero + +// af : http://www.nic.af/help.jsp +af +gov.af +com.af +org.af +net.af +edu.af + +// ag : http://www.nic.ag/prices.htm +ag +com.ag +org.ag +net.ag +co.ag +nom.ag + +// ai : http://nic.com.ai/ +ai +off.ai +com.ai +net.ai +org.ai + +// al : http://www.ert.gov.al/ert_alb/faq_det.html?Id=31 +al +com.al +edu.al +gov.al +mil.al +net.al +org.al + +// am : http://en.wikipedia.org/wiki/.am +am + +// an : http://www.una.an/an_domreg/default.asp +an +com.an +net.an +org.an +edu.an + +// ao : http://en.wikipedia.org/wiki/.ao +// http://www.dns.ao/REGISTR.DOC +ao +ed.ao +gv.ao +og.ao +co.ao +pb.ao +it.ao + +// aq : http://en.wikipedia.org/wiki/.aq +aq + +// ar : http://en.wikipedia.org/wiki/.ar +*.ar +!congresodelalengua3.ar +!educ.ar +!gobiernoelectronico.ar +!mecon.ar +!nacion.ar +!nic.ar +!promocion.ar +!retina.ar +!uba.ar + +// arpa : http://en.wikipedia.org/wiki/.arpa +// Confirmed by registry 2008-06-18 +e164.arpa +in-addr.arpa +ip6.arpa +iris.arpa +uri.arpa +urn.arpa + +// as : http://en.wikipedia.org/wiki/.as +as +gov.as + +// asia : http://en.wikipedia.org/wiki/.asia +asia + +// at : http://en.wikipedia.org/wiki/.at +// Confirmed by registry 2008-06-17 +at +ac.at +co.at +gv.at +or.at + +// au : http://en.wikipedia.org/wiki/.au +// http://www.auda.org.au/ +// 2LDs +com.au +net.au +org.au +edu.au +gov.au +csiro.au +asn.au +id.au +// Historic 2LDs (closed to new registration, but sites still exist) +info.au +conf.au +oz.au +// CGDNs - http://www.cgdn.org.au/ +act.au +nsw.au +nt.au +qld.au +sa.au +tas.au +vic.au +wa.au +// 3LDs +act.edu.au +nsw.edu.au +nt.edu.au +qld.edu.au +sa.edu.au +tas.edu.au +vic.edu.au +wa.edu.au +act.gov.au +// Removed at request of Shae.Donelan@services.nsw.gov.au, 2010-03-04 +// nsw.gov.au +nt.gov.au +qld.gov.au +sa.gov.au +tas.gov.au +vic.gov.au +wa.gov.au + +// aw : http://en.wikipedia.org/wiki/.aw +aw +com.aw + +// ax : http://en.wikipedia.org/wiki/.ax +ax + +// az : http://en.wikipedia.org/wiki/.az +az +com.az +net.az +int.az +gov.az +org.az +edu.az +info.az +pp.az +mil.az +name.az +pro.az +biz.az + +// ba : http://en.wikipedia.org/wiki/.ba +ba +org.ba +net.ba +edu.ba +gov.ba +mil.ba +unsa.ba +unbi.ba +co.ba +com.ba +rs.ba + +// bb : http://en.wikipedia.org/wiki/.bb +bb +biz.bb +com.bb +edu.bb +gov.bb +info.bb +net.bb +org.bb +store.bb + +// bd : http://en.wikipedia.org/wiki/.bd +*.bd + +// be : http://en.wikipedia.org/wiki/.be +// Confirmed by registry 2008-06-08 +be +ac.be + +// bf : http://en.wikipedia.org/wiki/.bf +bf +gov.bf + +// bg : http://en.wikipedia.org/wiki/.bg +// https://www.register.bg/user/static/rules/en/index.html +bg +a.bg +b.bg +c.bg +d.bg +e.bg +f.bg +g.bg +h.bg +i.bg +j.bg +k.bg +l.bg +m.bg +n.bg +o.bg +p.bg +q.bg +r.bg +s.bg +t.bg +u.bg +v.bg +w.bg +x.bg +y.bg +z.bg +0.bg +1.bg +2.bg +3.bg +4.bg +5.bg +6.bg +7.bg +8.bg +9.bg + +// bh : http://en.wikipedia.org/wiki/.bh +bh +com.bh +edu.bh +net.bh +org.bh +gov.bh + +// bi : http://en.wikipedia.org/wiki/.bi +// http://whois.nic.bi/ +bi +co.bi +com.bi +edu.bi +or.bi +org.bi + +// biz : http://en.wikipedia.org/wiki/.biz +biz + +// bj : http://en.wikipedia.org/wiki/.bj +bj +asso.bj +barreau.bj +gouv.bj + +// bm : http://www.bermudanic.bm/dnr-text.txt +bm +com.bm +edu.bm +gov.bm +net.bm +org.bm + +// bn : http://en.wikipedia.org/wiki/.bn +*.bn + +// bo : http://www.nic.bo/ +bo +com.bo +edu.bo +gov.bo +gob.bo +int.bo +org.bo +net.bo +mil.bo +tv.bo + +// br : http://registro.br/dominio/dpn.html +// Updated by registry 2011-03-01 +br +adm.br +adv.br +agr.br +am.br +arq.br +art.br +ato.br +b.br +bio.br +blog.br +bmd.br +can.br +cim.br +cng.br +cnt.br +com.br +coop.br +ecn.br +edu.br +emp.br +eng.br +esp.br +etc.br +eti.br +far.br +flog.br +fm.br +fnd.br +fot.br +fst.br +g12.br +ggf.br +gov.br +imb.br +ind.br +inf.br +jor.br +jus.br +lel.br +mat.br +med.br +mil.br +mus.br +net.br +nom.br +not.br +ntr.br +odo.br +org.br +ppg.br +pro.br +psc.br +psi.br +qsl.br +radio.br +rec.br +slg.br +srv.br +taxi.br +teo.br +tmp.br +trd.br +tur.br +tv.br +vet.br +vlog.br +wiki.br +zlg.br + +// bs : http://www.nic.bs/rules.html +bs +com.bs +net.bs +org.bs +edu.bs +gov.bs + +// bt : http://en.wikipedia.org/wiki/.bt +bt +com.bt +edu.bt +gov.bt +net.bt +org.bt + +// bv : No registrations at this time. +// Submitted by registry 2006-06-16 + +// bw : http://en.wikipedia.org/wiki/.bw +// http://www.gobin.info/domainname/bw.doc +// list of other 2nd level tlds ? +bw +co.bw +org.bw + +// by : http://en.wikipedia.org/wiki/.by +// http://tld.by/rules_2006_en.html +// list of other 2nd level tlds ? +by +gov.by +mil.by +// Official information does not indicate that com.by is a reserved +// second-level domain, but it's being used as one (see www.google.com.by and +// www.yahoo.com.by, for example), so we list it here for safety's sake. +com.by + +// http://hoster.by/ +of.by + +// bz : http://en.wikipedia.org/wiki/.bz +// http://www.belizenic.bz/ +bz +com.bz +net.bz +org.bz +edu.bz +gov.bz + +// ca : http://en.wikipedia.org/wiki/.ca +ca +// ca geographical names +ab.ca +bc.ca +mb.ca +nb.ca +nf.ca +nl.ca +ns.ca +nt.ca +nu.ca +on.ca +pe.ca +qc.ca +sk.ca +yk.ca +// gc.ca: http://en.wikipedia.org/wiki/.gc.ca +// see also: http://registry.gc.ca/en/SubdomainFAQ +gc.ca + +// cat : http://en.wikipedia.org/wiki/.cat +cat + +// cc : http://en.wikipedia.org/wiki/.cc +cc + +// cd : http://en.wikipedia.org/wiki/.cd +// see also: https://www.nic.cd/domain/insertDomain_2.jsp?act=1 +cd +gov.cd + +// cf : http://en.wikipedia.org/wiki/.cf +cf + +// cg : http://en.wikipedia.org/wiki/.cg +cg + +// ch : http://en.wikipedia.org/wiki/.ch +ch + +// ci : http://en.wikipedia.org/wiki/.ci +// http://www.nic.ci/index.php?page=charte +ci +org.ci +or.ci +com.ci +co.ci +edu.ci +ed.ci +ac.ci +net.ci +go.ci +asso.ci +aéroport.ci +int.ci +presse.ci +md.ci +gouv.ci + +// ck : http://en.wikipedia.org/wiki/.ck +*.ck +!www.ck + +// cl : http://en.wikipedia.org/wiki/.cl +cl +gov.cl +gob.cl +co.cl +mil.cl + +// cm : http://en.wikipedia.org/wiki/.cm +cm +gov.cm + +// cn : http://en.wikipedia.org/wiki/.cn +// Submitted by registry 2008-06-11 +cn +ac.cn +com.cn +edu.cn +gov.cn +net.cn +org.cn +mil.cn +公司.cn +网络.cn +網絡.cn +// cn geographic names +ah.cn +bj.cn +cq.cn +fj.cn +gd.cn +gs.cn +gz.cn +gx.cn +ha.cn +hb.cn +he.cn +hi.cn +hl.cn +hn.cn +jl.cn +js.cn +jx.cn +ln.cn +nm.cn +nx.cn +qh.cn +sc.cn +sd.cn +sh.cn +sn.cn +sx.cn +tj.cn +xj.cn +xz.cn +yn.cn +zj.cn +hk.cn +mo.cn +tw.cn + +// co : http://en.wikipedia.org/wiki/.co +// Submitted by registry 2008-06-11 +co +arts.co +com.co +edu.co +firm.co +gov.co +info.co +int.co +mil.co +net.co +nom.co +org.co +rec.co +web.co + +// com : http://en.wikipedia.org/wiki/.com +com + +// coop : http://en.wikipedia.org/wiki/.coop +coop + +// cr : http://www.nic.cr/niccr_publico/showRegistroDominiosScreen.do +cr +ac.cr +co.cr +ed.cr +fi.cr +go.cr +or.cr +sa.cr + +// cu : http://en.wikipedia.org/wiki/.cu +cu +com.cu +edu.cu +org.cu +net.cu +gov.cu +inf.cu + +// cv : http://en.wikipedia.org/wiki/.cv +cv + +// cx : http://en.wikipedia.org/wiki/.cx +// list of other 2nd level tlds ? +cx +gov.cx + +// cy : http://en.wikipedia.org/wiki/.cy +*.cy + +// cz : http://en.wikipedia.org/wiki/.cz +cz + +// de : http://en.wikipedia.org/wiki/.de +// Confirmed by registry (with technical +// reservations) 2008-07-01 +de + +// dj : http://en.wikipedia.org/wiki/.dj +dj + +// dk : http://en.wikipedia.org/wiki/.dk +// Confirmed by registry 2008-06-17 +dk + +// dm : http://en.wikipedia.org/wiki/.dm +dm +com.dm +net.dm +org.dm +edu.dm +gov.dm + +// do : http://en.wikipedia.org/wiki/.do +do +art.do +com.do +edu.do +gob.do +gov.do +mil.do +net.do +org.do +sld.do +web.do + +// dz : http://en.wikipedia.org/wiki/.dz +dz +com.dz +org.dz +net.dz +gov.dz +edu.dz +asso.dz +pol.dz +art.dz + +// ec : http://www.nic.ec/reg/paso1.asp +// Submitted by registry 2008-07-04 +ec +com.ec +info.ec +net.ec +fin.ec +k12.ec +med.ec +pro.ec +org.ec +edu.ec +gov.ec +gob.ec +mil.ec + +// edu : http://en.wikipedia.org/wiki/.edu +edu + +// ee : http://www.eenet.ee/EENet/dom_reeglid.html#lisa_B +ee +edu.ee +gov.ee +riik.ee +lib.ee +med.ee +com.ee +pri.ee +aip.ee +org.ee +fie.ee + +// eg : http://en.wikipedia.org/wiki/.eg +eg +com.eg +edu.eg +eun.eg +gov.eg +mil.eg +name.eg +net.eg +org.eg +sci.eg + +// er : http://en.wikipedia.org/wiki/.er +*.er + +// es : https://www.nic.es/site_ingles/ingles/dominios/index.html +es +com.es +nom.es +org.es +gob.es +edu.es + +// et : http://en.wikipedia.org/wiki/.et +*.et + +// eu : http://en.wikipedia.org/wiki/.eu +eu + +// fi : http://en.wikipedia.org/wiki/.fi +fi +// aland.fi : http://en.wikipedia.org/wiki/.ax +// This domain is being phased out in favor of .ax. As there are still many +// domains under aland.fi, we still keep it on the list until aland.fi is +// completely removed. +// TODO: Check for updates (expected to be phased out around Q1/2009) +aland.fi + +// fj : http://en.wikipedia.org/wiki/.fj +*.fj + +// fk : http://en.wikipedia.org/wiki/.fk +*.fk + +// fm : http://en.wikipedia.org/wiki/.fm +fm + +// fo : http://en.wikipedia.org/wiki/.fo +fo + +// fr : http://www.afnic.fr/ +// domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs +fr +com.fr +asso.fr +nom.fr +prd.fr +presse.fr +tm.fr +// domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels +aeroport.fr +assedic.fr +avocat.fr +avoues.fr +cci.fr +chambagri.fr +chirurgiens-dentistes.fr +experts-comptables.fr +geometre-expert.fr +gouv.fr +greta.fr +huissier-justice.fr +medecin.fr +notaires.fr +pharmacien.fr +port.fr +veterinaire.fr + +// ga : http://en.wikipedia.org/wiki/.ga +ga + +// gb : This registry is effectively dormant +// Submitted by registry 2008-06-12 + +// gd : http://en.wikipedia.org/wiki/.gd +gd + +// ge : http://www.nic.net.ge/policy_en.pdf +ge +com.ge +edu.ge +gov.ge +org.ge +mil.ge +net.ge +pvt.ge + +// gf : http://en.wikipedia.org/wiki/.gf +gf + +// gg : http://www.channelisles.net/applic/avextn.shtml +gg +co.gg +org.gg +net.gg +sch.gg +gov.gg + +// gh : http://en.wikipedia.org/wiki/.gh +// see also: http://www.nic.gh/reg_now.php +// Although domains directly at second level are not possible at the moment, +// they have been possible for some time and may come back. +gh +com.gh +edu.gh +gov.gh +org.gh +mil.gh + +// gi : http://www.nic.gi/rules.html +gi +com.gi +ltd.gi +gov.gi +mod.gi +edu.gi +org.gi + +// gl : http://en.wikipedia.org/wiki/.gl +// http://nic.gl +gl + +// gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm +gm + +// gn : http://psg.com/dns/gn/gn.txt +// Submitted by registry 2008-06-17 +ac.gn +com.gn +edu.gn +gov.gn +org.gn +net.gn + +// gov : http://en.wikipedia.org/wiki/.gov +gov + +// gp : http://www.nic.gp/index.php?lang=en +gp +com.gp +net.gp +mobi.gp +edu.gp +org.gp +asso.gp + +// gq : http://en.wikipedia.org/wiki/.gq +gq + +// gr : https://grweb.ics.forth.gr/english/1617-B-2005.html +// Submitted by registry 2008-06-09 +gr +com.gr +edu.gr +net.gr +org.gr +gov.gr + +// gs : http://en.wikipedia.org/wiki/.gs +gs + +// gt : http://www.gt/politicas.html +*.gt +!www.gt + +// gu : http://gadao.gov.gu/registration.txt +*.gu + +// gw : http://en.wikipedia.org/wiki/.gw +gw + +// gy : http://en.wikipedia.org/wiki/.gy +// http://registry.gy/ +gy +co.gy +com.gy +net.gy + +// hk : https://www.hkdnr.hk +// Submitted by registry 2008-06-11 +hk +com.hk +edu.hk +gov.hk +idv.hk +net.hk +org.hk +公司.hk +教育.hk +敎育.hk +政府.hk +個人.hk +个人.hk +箇人.hk +網络.hk +网络.hk +组織.hk +網絡.hk +网絡.hk +组织.hk +組織.hk +組织.hk + +// hm : http://en.wikipedia.org/wiki/.hm +hm + +// hn : http://www.nic.hn/politicas/ps02,,05.html +hn +com.hn +edu.hn +org.hn +net.hn +mil.hn +gob.hn + +// hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf +hr +iz.hr +from.hr +name.hr +com.hr + +// ht : http://www.nic.ht/info/charte.cfm +ht +com.ht +shop.ht +firm.ht +info.ht +adult.ht +net.ht +pro.ht +org.ht +med.ht +art.ht +coop.ht +pol.ht +asso.ht +edu.ht +rel.ht +gouv.ht +perso.ht + +// hu : http://www.domain.hu/domain/English/sld.html +// Confirmed by registry 2008-06-12 +hu +co.hu +info.hu +org.hu +priv.hu +sport.hu +tm.hu +2000.hu +agrar.hu +bolt.hu +casino.hu +city.hu +erotica.hu +erotika.hu +film.hu +forum.hu +games.hu +hotel.hu +ingatlan.hu +jogasz.hu +konyvelo.hu +lakas.hu +media.hu +news.hu +reklam.hu +sex.hu +shop.hu +suli.hu +szex.hu +tozsde.hu +utazas.hu +video.hu + +// id : http://en.wikipedia.org/wiki/.id +// see also: https://register.pandi.or.id/ +id +ac.id +co.id +go.id +mil.id +net.id +or.id +sch.id +web.id + +// ie : http://en.wikipedia.org/wiki/.ie +ie +gov.ie + +// il : http://en.wikipedia.org/wiki/.il +*.il + +// im : https://www.nic.im/pdfs/imfaqs.pdf +im +co.im +ltd.co.im +plc.co.im +net.im +gov.im +org.im +nic.im +ac.im + +// in : http://en.wikipedia.org/wiki/.in +// see also: http://www.inregistry.in/policies/ +// Please note, that nic.in is not an offical eTLD, but used by most +// government institutions. +in +co.in +firm.in +net.in +org.in +gen.in +ind.in +nic.in +ac.in +edu.in +res.in +gov.in +mil.in + +// info : http://en.wikipedia.org/wiki/.info +info + +// int : http://en.wikipedia.org/wiki/.int +// Confirmed by registry 2008-06-18 +int +eu.int + +// io : http://www.nic.io/rules.html +// list of other 2nd level tlds ? +io +com.io + +// iq : http://www.cmc.iq/english/iq/iqregister1.htm +iq +gov.iq +edu.iq +mil.iq +com.iq +org.iq +net.iq + +// ir : http://www.nic.ir/Terms_and_Conditions_ir,_Appendix_1_Domain_Rules +// Also see http://www.nic.ir/Internationalized_Domain_Names +// Two .ir entries added at request of , 2010-04-16 +ir +ac.ir +co.ir +gov.ir +id.ir +net.ir +org.ir +sch.ir +// xn--mgba3a4f16a.ir (.ir, Persian YEH) +ایران.ir +// xn--mgba3a4fra.ir (.ir, Arabic YEH) +ايران.ir + +// is : http://www.isnic.is/domain/rules.php +// Confirmed by registry 2008-12-06 +is +net.is +com.is +edu.is +gov.is +org.is +int.is + +// it : http://en.wikipedia.org/wiki/.it +it +gov.it +edu.it +// list of reserved geo-names : +// http://www.nic.it/documenti/regolamenti-e-linee-guida/regolamento-assegnazione-versione-6.0.pdf +// (There is also a list of reserved geo-names corresponding to Italian +// municipalities : http://www.nic.it/documenti/appendice-c.pdf , but it is +// not included here.) +agrigento.it +ag.it +alessandria.it +al.it +ancona.it +an.it +aosta.it +aoste.it +ao.it +arezzo.it +ar.it +ascoli-piceno.it +ascolipiceno.it +ap.it +asti.it +at.it +avellino.it +av.it +bari.it +ba.it +andria-barletta-trani.it +andriabarlettatrani.it +trani-barletta-andria.it +tranibarlettaandria.it +barletta-trani-andria.it +barlettatraniandria.it +andria-trani-barletta.it +andriatranibarletta.it +trani-andria-barletta.it +traniandriabarletta.it +bt.it +belluno.it +bl.it +benevento.it +bn.it +bergamo.it +bg.it +biella.it +bi.it +bologna.it +bo.it +bolzano.it +bozen.it +balsan.it +alto-adige.it +altoadige.it +suedtirol.it +bz.it +brescia.it +bs.it +brindisi.it +br.it +cagliari.it +ca.it +caltanissetta.it +cl.it +campobasso.it +cb.it +carboniaiglesias.it +carbonia-iglesias.it +iglesias-carbonia.it +iglesiascarbonia.it +ci.it +caserta.it +ce.it +catania.it +ct.it +catanzaro.it +cz.it +chieti.it +ch.it +como.it +co.it +cosenza.it +cs.it +cremona.it +cr.it +crotone.it +kr.it +cuneo.it +cn.it +dell-ogliastra.it +dellogliastra.it +ogliastra.it +og.it +enna.it +en.it +ferrara.it +fe.it +fermo.it +fm.it +firenze.it +florence.it +fi.it +foggia.it +fg.it +forli-cesena.it +forlicesena.it +cesena-forli.it +cesenaforli.it +fc.it +frosinone.it +fr.it +genova.it +genoa.it +ge.it +gorizia.it +go.it +grosseto.it +gr.it +imperia.it +im.it +isernia.it +is.it +laquila.it +aquila.it +aq.it +la-spezia.it +laspezia.it +sp.it +latina.it +lt.it +lecce.it +le.it +lecco.it +lc.it +livorno.it +li.it +lodi.it +lo.it +lucca.it +lu.it +macerata.it +mc.it +mantova.it +mn.it +massa-carrara.it +massacarrara.it +carrara-massa.it +carraramassa.it +ms.it +matera.it +mt.it +medio-campidano.it +mediocampidano.it +campidano-medio.it +campidanomedio.it +vs.it +messina.it +me.it +milano.it +milan.it +mi.it +modena.it +mo.it +monza.it +monza-brianza.it +monzabrianza.it +monzaebrianza.it +monzaedellabrianza.it +monza-e-della-brianza.it +mb.it +napoli.it +naples.it +na.it +novara.it +no.it +nuoro.it +nu.it +oristano.it +or.it +padova.it +padua.it +pd.it +palermo.it +pa.it +parma.it +pr.it +pavia.it +pv.it +perugia.it +pg.it +pescara.it +pe.it +pesaro-urbino.it +pesarourbino.it +urbino-pesaro.it +urbinopesaro.it +pu.it +piacenza.it +pc.it +pisa.it +pi.it +pistoia.it +pt.it +pordenone.it +pn.it +potenza.it +pz.it +prato.it +po.it +ragusa.it +rg.it +ravenna.it +ra.it +reggio-calabria.it +reggiocalabria.it +rc.it +reggio-emilia.it +reggioemilia.it +re.it +rieti.it +ri.it +rimini.it +rn.it +roma.it +rome.it +rm.it +rovigo.it +ro.it +salerno.it +sa.it +sassari.it +ss.it +savona.it +sv.it +siena.it +si.it +siracusa.it +sr.it +sondrio.it +so.it +taranto.it +ta.it +tempio-olbia.it +tempioolbia.it +olbia-tempio.it +olbiatempio.it +ot.it +teramo.it +te.it +terni.it +tr.it +torino.it +turin.it +to.it +trapani.it +tp.it +trento.it +trentino.it +tn.it +treviso.it +tv.it +trieste.it +ts.it +udine.it +ud.it +varese.it +va.it +venezia.it +venice.it +ve.it +verbania.it +vb.it +vercelli.it +vc.it +verona.it +vr.it +vibo-valentia.it +vibovalentia.it +vv.it +vicenza.it +vi.it +viterbo.it +vt.it + +// je : http://www.channelisles.net/applic/avextn.shtml +je +co.je +org.je +net.je +sch.je +gov.je + +// jm : http://www.com.jm/register.html +*.jm + +// jo : http://www.dns.jo/Registration_policy.aspx +jo +com.jo +org.jo +net.jo +edu.jo +sch.jo +gov.jo +mil.jo +name.jo + +// jobs : http://en.wikipedia.org/wiki/.jobs +jobs + +// jp : http://en.wikipedia.org/wiki/.jp +// http://jprs.co.jp/en/jpdomain.html +// Submitted by registry 2008-06-11 +// Updated by registry 2008-12-04 +jp +// jp organizational type names +ac.jp +ad.jp +co.jp +ed.jp +go.jp +gr.jp +lg.jp +ne.jp +or.jp +// jp geographic type names +// http://jprs.jp/doc/rule/saisoku-1.html +*.aichi.jp +*.akita.jp +*.aomori.jp +*.chiba.jp +*.ehime.jp +*.fukui.jp +*.fukuoka.jp +*.fukushima.jp +*.gifu.jp +*.gunma.jp +*.hiroshima.jp +*.hokkaido.jp +*.hyogo.jp +*.ibaraki.jp +*.ishikawa.jp +*.iwate.jp +*.kagawa.jp +*.kagoshima.jp +*.kanagawa.jp +*.kawasaki.jp +*.kitakyushu.jp +*.kobe.jp +*.kochi.jp +*.kumamoto.jp +*.kyoto.jp +*.mie.jp +*.miyagi.jp +*.miyazaki.jp +*.nagano.jp +*.nagasaki.jp +*.nagoya.jp +*.nara.jp +*.niigata.jp +*.oita.jp +*.okayama.jp +*.okinawa.jp +*.osaka.jp +*.saga.jp +*.saitama.jp +*.sapporo.jp +*.sendai.jp +*.shiga.jp +*.shimane.jp +*.shizuoka.jp +*.tochigi.jp +*.tokushima.jp +*.tokyo.jp +*.tottori.jp +*.toyama.jp +*.wakayama.jp +*.yamagata.jp +*.yamaguchi.jp +*.yamanashi.jp +*.yokohama.jp +!metro.tokyo.jp +!pref.aichi.jp +!pref.akita.jp +!pref.aomori.jp +!pref.chiba.jp +!pref.ehime.jp +!pref.fukui.jp +!pref.fukuoka.jp +!pref.fukushima.jp +!pref.gifu.jp +!pref.gunma.jp +!pref.hiroshima.jp +!pref.hokkaido.jp +!pref.hyogo.jp +!pref.ibaraki.jp +!pref.ishikawa.jp +!pref.iwate.jp +!pref.kagawa.jp +!pref.kagoshima.jp +!pref.kanagawa.jp +!pref.kochi.jp +!pref.kumamoto.jp +!pref.kyoto.jp +!pref.mie.jp +!pref.miyagi.jp +!pref.miyazaki.jp +!pref.nagano.jp +!pref.nagasaki.jp +!pref.nara.jp +!pref.niigata.jp +!pref.oita.jp +!pref.okayama.jp +!pref.okinawa.jp +!pref.osaka.jp +!pref.saga.jp +!pref.saitama.jp +!pref.shiga.jp +!pref.shimane.jp +!pref.shizuoka.jp +!pref.tochigi.jp +!pref.tokushima.jp +!pref.tottori.jp +!pref.toyama.jp +!pref.wakayama.jp +!pref.yamagata.jp +!pref.yamaguchi.jp +!pref.yamanashi.jp +!city.chiba.jp +!city.fukuoka.jp +!city.hiroshima.jp +!city.kawasaki.jp +!city.kitakyushu.jp +!city.kobe.jp +!city.kyoto.jp +!city.nagoya.jp +!city.niigata.jp +!city.okayama.jp +!city.osaka.jp +!city.saitama.jp +!city.sapporo.jp +!city.sendai.jp +!city.shizuoka.jp +!city.yokohama.jp + +// ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145 +*.ke + +// kg : http://www.domain.kg/dmn_n.html +kg +org.kg +net.kg +com.kg +edu.kg +gov.kg +mil.kg + +// kh : http://www.mptc.gov.kh/dns_registration.htm +*.kh + +// ki : http://www.ki/dns/index.html +ki +edu.ki +biz.ki +net.ki +org.ki +gov.ki +info.ki +com.ki + +// km : http://en.wikipedia.org/wiki/.km +// http://www.domaine.km/documents/charte.doc +km +org.km +nom.km +gov.km +prd.km +tm.km +edu.km +mil.km +ass.km +com.km +// These are only mentioned as proposed suggestions at domaine.km, but +// http://en.wikipedia.org/wiki/.km says they're available for registration: +coop.km +asso.km +presse.km +medecin.km +notaires.km +pharmaciens.km +veterinaire.km +gouv.km + +// kn : http://en.wikipedia.org/wiki/.kn +// http://www.dot.kn/domainRules.html +kn +net.kn +org.kn +edu.kn +gov.kn + +// kp : http://www.kcce.kp/en_index.php +com.kp +edu.kp +gov.kp +org.kp +rep.kp +tra.kp + +// kr : http://en.wikipedia.org/wiki/.kr +// see also: http://domain.nida.or.kr/eng/registration.jsp +kr +ac.kr +co.kr +es.kr +go.kr +hs.kr +kg.kr +mil.kr +ms.kr +ne.kr +or.kr +pe.kr +re.kr +sc.kr +// kr geographical names +busan.kr +chungbuk.kr +chungnam.kr +daegu.kr +daejeon.kr +gangwon.kr +gwangju.kr +gyeongbuk.kr +gyeonggi.kr +gyeongnam.kr +incheon.kr +jeju.kr +jeonbuk.kr +jeonnam.kr +seoul.kr +ulsan.kr + +// kw : http://en.wikipedia.org/wiki/.kw +*.kw + +// ky : http://www.icta.ky/da_ky_reg_dom.php +// Confirmed by registry 2008-06-17 +ky +edu.ky +gov.ky +com.ky +org.ky +net.ky + +// kz : http://en.wikipedia.org/wiki/.kz +// see also: http://www.nic.kz/rules/index.jsp +kz +org.kz +edu.kz +net.kz +gov.kz +mil.kz +com.kz + +// la : http://en.wikipedia.org/wiki/.la +// Submitted by registry 2008-06-10 +la +int.la +net.la +info.la +edu.la +gov.la +per.la +com.la +org.la + +// lb : http://en.wikipedia.org/wiki/.lb +// Submitted by registry 2008-06-17 +com.lb +edu.lb +gov.lb +net.lb +org.lb + +// lc : http://en.wikipedia.org/wiki/.lc +// see also: http://www.nic.lc/rules.htm +lc +com.lc +net.lc +co.lc +org.lc +edu.lc +gov.lc + +// li : http://en.wikipedia.org/wiki/.li +li + +// lk : http://www.nic.lk/seclevpr.html +lk +gov.lk +sch.lk +net.lk +int.lk +com.lk +org.lk +edu.lk +ngo.lk +soc.lk +web.lk +ltd.lk +assn.lk +grp.lk +hotel.lk + +// lr : http://psg.com/dns/lr/lr.txt +// Submitted by registry 2008-06-17 +com.lr +edu.lr +gov.lr +org.lr +net.lr + +// ls : http://en.wikipedia.org/wiki/.ls +ls +co.ls +org.ls + +// lt : http://en.wikipedia.org/wiki/.lt +lt +// gov.lt : http://www.gov.lt/index_en.php +gov.lt + +// lu : http://www.dns.lu/en/ +lu + +// lv : http://www.nic.lv/DNS/En/generic.php +lv +com.lv +edu.lv +gov.lv +org.lv +mil.lv +id.lv +net.lv +asn.lv +conf.lv + +// ly : http://www.nic.ly/regulations.php +ly +com.ly +net.ly +gov.ly +plc.ly +edu.ly +sch.ly +med.ly +org.ly +id.ly + +// ma : http://en.wikipedia.org/wiki/.ma +// http://www.anrt.ma/fr/admin/download/upload/file_fr782.pdf +ma +co.ma +net.ma +gov.ma +org.ma +ac.ma +press.ma + +// mc : http://www.nic.mc/ +mc +tm.mc +asso.mc + +// md : http://en.wikipedia.org/wiki/.md +md + +// me : http://en.wikipedia.org/wiki/.me +me +co.me +net.me +org.me +edu.me +ac.me +gov.me +its.me +priv.me + +// mg : http://www.nic.mg/tarif.htm +mg +org.mg +nom.mg +gov.mg +prd.mg +tm.mg +edu.mg +mil.mg +com.mg + +// mh : http://en.wikipedia.org/wiki/.mh +mh + +// mil : http://en.wikipedia.org/wiki/.mil +mil + +// mk : http://en.wikipedia.org/wiki/.mk +// see also: http://dns.marnet.net.mk/postapka.php +mk +com.mk +org.mk +net.mk +edu.mk +gov.mk +inf.mk +name.mk + +// ml : http://www.gobin.info/domainname/ml-template.doc +// see also: http://en.wikipedia.org/wiki/.ml +ml +com.ml +edu.ml +gouv.ml +gov.ml +net.ml +org.ml +presse.ml + +// mm : http://en.wikipedia.org/wiki/.mm +*.mm + +// mn : http://en.wikipedia.org/wiki/.mn +mn +gov.mn +edu.mn +org.mn + +// mo : http://www.monic.net.mo/ +mo +com.mo +net.mo +org.mo +edu.mo +gov.mo + +// mobi : http://en.wikipedia.org/wiki/.mobi +mobi + +// mp : http://www.dot.mp/ +// Confirmed by registry 2008-06-17 +mp + +// mq : http://en.wikipedia.org/wiki/.mq +mq + +// mr : http://en.wikipedia.org/wiki/.mr +mr +gov.mr + +// ms : http://en.wikipedia.org/wiki/.ms +ms + +// mt : https://www.nic.org.mt/dotmt/ +*.mt + +// mu : http://en.wikipedia.org/wiki/.mu +mu +com.mu +net.mu +org.mu +gov.mu +ac.mu +co.mu +or.mu + +// museum : http://about.museum/naming/ +// http://index.museum/ +museum +academy.museum +agriculture.museum +air.museum +airguard.museum +alabama.museum +alaska.museum +amber.museum +ambulance.museum +american.museum +americana.museum +americanantiques.museum +americanart.museum +amsterdam.museum +and.museum +annefrank.museum +anthro.museum +anthropology.museum +antiques.museum +aquarium.museum +arboretum.museum +archaeological.museum +archaeology.museum +architecture.museum +art.museum +artanddesign.museum +artcenter.museum +artdeco.museum +arteducation.museum +artgallery.museum +arts.museum +artsandcrafts.museum +asmatart.museum +assassination.museum +assisi.museum +association.museum +astronomy.museum +atlanta.museum +austin.museum +australia.museum +automotive.museum +aviation.museum +axis.museum +badajoz.museum +baghdad.museum +bahn.museum +bale.museum +baltimore.museum +barcelona.museum +baseball.museum +basel.museum +baths.museum +bauern.museum +beauxarts.museum +beeldengeluid.museum +bellevue.museum +bergbau.museum +berkeley.museum +berlin.museum +bern.museum +bible.museum +bilbao.museum +bill.museum +birdart.museum +birthplace.museum +bonn.museum +boston.museum +botanical.museum +botanicalgarden.museum +botanicgarden.museum +botany.museum +brandywinevalley.museum +brasil.museum +bristol.museum +british.museum +britishcolumbia.museum +broadcast.museum +brunel.museum +brussel.museum +brussels.museum +bruxelles.museum +building.museum +burghof.museum +bus.museum +bushey.museum +cadaques.museum +california.museum +cambridge.museum +can.museum +canada.museum +capebreton.museum +carrier.museum +cartoonart.museum +casadelamoneda.museum +castle.museum +castres.museum +celtic.museum +center.museum +chattanooga.museum +cheltenham.museum +chesapeakebay.museum +chicago.museum +children.museum +childrens.museum +childrensgarden.museum +chiropractic.museum +chocolate.museum +christiansburg.museum +cincinnati.museum +cinema.museum +circus.museum +civilisation.museum +civilization.museum +civilwar.museum +clinton.museum +clock.museum +coal.museum +coastaldefence.museum +cody.museum +coldwar.museum +collection.museum +colonialwilliamsburg.museum +coloradoplateau.museum +columbia.museum +columbus.museum +communication.museum +communications.museum +community.museum +computer.museum +computerhistory.museum +comunicações.museum +contemporary.museum +contemporaryart.museum +convent.museum +copenhagen.museum +corporation.museum +correios-e-telecomunicações.museum +corvette.museum +costume.museum +countryestate.museum +county.museum +crafts.museum +cranbrook.museum +creation.museum +cultural.museum +culturalcenter.museum +culture.museum +cyber.museum +cymru.museum +dali.museum +dallas.museum +database.museum +ddr.museum +decorativearts.museum +delaware.museum +delmenhorst.museum +denmark.museum +depot.museum +design.museum +detroit.museum +dinosaur.museum +discovery.museum +dolls.museum +donostia.museum +durham.museum +eastafrica.museum +eastcoast.museum +education.museum +educational.museum +egyptian.museum +eisenbahn.museum +elburg.museum +elvendrell.museum +embroidery.museum +encyclopedic.museum +england.museum +entomology.museum +environment.museum +environmentalconservation.museum +epilepsy.museum +essex.museum +estate.museum +ethnology.museum +exeter.museum +exhibition.museum +family.museum +farm.museum +farmequipment.museum +farmers.museum +farmstead.museum +field.museum +figueres.museum +filatelia.museum +film.museum +fineart.museum +finearts.museum +finland.museum +flanders.museum +florida.museum +force.museum +fortmissoula.museum +fortworth.museum +foundation.museum +francaise.museum +frankfurt.museum +franziskaner.museum +freemasonry.museum +freiburg.museum +fribourg.museum +frog.museum +fundacio.museum +furniture.museum +gallery.museum +garden.museum +gateway.museum +geelvinck.museum +gemological.museum +geology.museum +georgia.museum +giessen.museum +glas.museum +glass.museum +gorge.museum +grandrapids.museum +graz.museum +guernsey.museum +halloffame.museum +hamburg.museum +handson.museum +harvestcelebration.museum +hawaii.museum +health.museum +heimatunduhren.museum +hellas.museum +helsinki.museum +hembygdsforbund.museum +heritage.museum +histoire.museum +historical.museum +historicalsociety.museum +historichouses.museum +historisch.museum +historisches.museum +history.museum +historyofscience.museum +horology.museum +house.museum +humanities.museum +illustration.museum +imageandsound.museum +indian.museum +indiana.museum +indianapolis.museum +indianmarket.museum +intelligence.museum +interactive.museum +iraq.museum +iron.museum +isleofman.museum +jamison.museum +jefferson.museum +jerusalem.museum +jewelry.museum +jewish.museum +jewishart.museum +jfk.museum +journalism.museum +judaica.museum +judygarland.museum +juedisches.museum +juif.museum +karate.museum +karikatur.museum +kids.museum +koebenhavn.museum +koeln.museum +kunst.museum +kunstsammlung.museum +kunstunddesign.museum +labor.museum +labour.museum +lajolla.museum +lancashire.museum +landes.museum +lans.museum +läns.museum +larsson.museum +lewismiller.museum +lincoln.museum +linz.museum +living.museum +livinghistory.museum +localhistory.museum +london.museum +losangeles.museum +louvre.museum +loyalist.museum +lucerne.museum +luxembourg.museum +luzern.museum +mad.museum +madrid.museum +mallorca.museum +manchester.museum +mansion.museum +mansions.museum +manx.museum +marburg.museum +maritime.museum +maritimo.museum +maryland.museum +marylhurst.museum +media.museum +medical.museum +medizinhistorisches.museum +meeres.museum +memorial.museum +mesaverde.museum +michigan.museum +midatlantic.museum +military.museum +mill.museum +miners.museum +mining.museum +minnesota.museum +missile.museum +missoula.museum +modern.museum +moma.museum +money.museum +monmouth.museum +monticello.museum +montreal.museum +moscow.museum +motorcycle.museum +muenchen.museum +muenster.museum +mulhouse.museum +muncie.museum +museet.museum +museumcenter.museum +museumvereniging.museum +music.museum +national.museum +nationalfirearms.museum +nationalheritage.museum +nativeamerican.museum +naturalhistory.museum +naturalhistorymuseum.museum +naturalsciences.museum +nature.museum +naturhistorisches.museum +natuurwetenschappen.museum +naumburg.museum +naval.museum +nebraska.museum +neues.museum +newhampshire.museum +newjersey.museum +newmexico.museum +newport.museum +newspaper.museum +newyork.museum +niepce.museum +norfolk.museum +north.museum +nrw.museum +nuernberg.museum +nuremberg.museum +nyc.museum +nyny.museum +oceanographic.museum +oceanographique.museum +omaha.museum +online.museum +ontario.museum +openair.museum +oregon.museum +oregontrail.museum +otago.museum +oxford.museum +pacific.museum +paderborn.museum +palace.museum +paleo.museum +palmsprings.museum +panama.museum +paris.museum +pasadena.museum +pharmacy.museum +philadelphia.museum +philadelphiaarea.museum +philately.museum +phoenix.museum +photography.museum +pilots.museum +pittsburgh.museum +planetarium.museum +plantation.museum +plants.museum +plaza.museum +portal.museum +portland.museum +portlligat.museum +posts-and-telecommunications.museum +preservation.museum +presidio.museum +press.museum +project.museum +public.museum +pubol.museum +quebec.museum +railroad.museum +railway.museum +research.museum +resistance.museum +riodejaneiro.museum +rochester.museum +rockart.museum +roma.museum +russia.museum +saintlouis.museum +salem.museum +salvadordali.museum +salzburg.museum +sandiego.museum +sanfrancisco.museum +santabarbara.museum +santacruz.museum +santafe.museum +saskatchewan.museum +satx.museum +savannahga.museum +schlesisches.museum +schoenbrunn.museum +schokoladen.museum +school.museum +schweiz.museum +science.museum +scienceandhistory.museum +scienceandindustry.museum +sciencecenter.museum +sciencecenters.museum +science-fiction.museum +sciencehistory.museum +sciences.museum +sciencesnaturelles.museum +scotland.museum +seaport.museum +settlement.museum +settlers.museum +shell.museum +sherbrooke.museum +sibenik.museum +silk.museum +ski.museum +skole.museum +society.museum +sologne.museum +soundandvision.museum +southcarolina.museum +southwest.museum +space.museum +spy.museum +square.museum +stadt.museum +stalbans.museum +starnberg.museum +state.museum +stateofdelaware.museum +station.museum +steam.museum +steiermark.museum +stjohn.museum +stockholm.museum +stpetersburg.museum +stuttgart.museum +suisse.museum +surgeonshall.museum +surrey.museum +svizzera.museum +sweden.museum +sydney.museum +tank.museum +tcm.museum +technology.museum +telekommunikation.museum +television.museum +texas.museum +textile.museum +theater.museum +time.museum +timekeeping.museum +topology.museum +torino.museum +touch.museum +town.museum +transport.museum +tree.museum +trolley.museum +trust.museum +trustee.museum +uhren.museum +ulm.museum +undersea.museum +university.museum +usa.museum +usantiques.museum +usarts.museum +uscountryestate.museum +usculture.museum +usdecorativearts.museum +usgarden.museum +ushistory.museum +ushuaia.museum +uslivinghistory.museum +utah.museum +uvic.museum +valley.museum +vantaa.museum +versailles.museum +viking.museum +village.museum +virginia.museum +virtual.museum +virtuel.museum +vlaanderen.museum +volkenkunde.museum +wales.museum +wallonie.museum +war.museum +washingtondc.museum +watchandclock.museum +watch-and-clock.museum +western.museum +westfalen.museum +whaling.museum +wildlife.museum +williamsburg.museum +windmill.museum +workshop.museum +york.museum +yorkshire.museum +yosemite.museum +youth.museum +zoological.museum +zoology.museum +ירושלים.museum +иком.museum + +// mv : http://en.wikipedia.org/wiki/.mv +// "mv" included because, contra Wikipedia, google.mv exists. +mv +aero.mv +biz.mv +com.mv +coop.mv +edu.mv +gov.mv +info.mv +int.mv +mil.mv +museum.mv +name.mv +net.mv +org.mv +pro.mv + +// mw : http://www.registrar.mw/ +mw +ac.mw +biz.mw +co.mw +com.mw +coop.mw +edu.mw +gov.mw +int.mw +museum.mw +net.mw +org.mw + +// mx : http://www.nic.mx/ +// Submitted by registry 2008-06-19 +mx +com.mx +org.mx +gob.mx +edu.mx +net.mx + +// my : http://www.mynic.net.my/ +my +com.my +net.my +org.my +gov.my +edu.my +mil.my +name.my + +// mz : http://www.gobin.info/domainname/mz-template.doc +*.mz + +// na : http://www.na-nic.com.na/ +// http://www.info.na/domain/ +na +info.na +pro.na +name.na +school.na +or.na +dr.na +us.na +mx.na +ca.na +in.na +cc.na +tv.na +ws.na +mobi.na +co.na +com.na +org.na + +// name : has 2nd-level tlds, but there's no list of them +name + +// nc : http://www.cctld.nc/ +nc +asso.nc + +// ne : http://en.wikipedia.org/wiki/.ne +ne + +// net : http://en.wikipedia.org/wiki/.net +net + +// nf : http://en.wikipedia.org/wiki/.nf +nf +com.nf +net.nf +per.nf +rec.nf +web.nf +arts.nf +firm.nf +info.nf +other.nf +store.nf + +// ng : http://psg.com/dns/ng/ +// Submitted by registry 2008-06-17 +ac.ng +com.ng +edu.ng +gov.ng +net.ng +org.ng + +// ni : http://www.nic.ni/dominios.htm +*.ni + +// nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html +// Confirmed by registry (with technical +// reservations) 2008-06-08 +nl + +// BV.nl will be a registry for dutch BV's (besloten vennootschap) +bv.nl + +// no : http://www.norid.no/regelverk/index.en.html +// The Norwegian registry has declined to notify us of updates. The web pages +// referenced below are the official source of the data. There is also an +// announce mailing list: +// https://postlister.uninett.no/sympa/info/norid-diskusjon +no +// Norid generic domains : http://www.norid.no/regelverk/vedlegg-c.en.html +fhs.no +vgs.no +fylkesbibl.no +folkebibl.no +museum.no +idrett.no +priv.no +// Non-Norid generic domains : http://www.norid.no/regelverk/vedlegg-d.en.html +mil.no +stat.no +dep.no +kommune.no +herad.no +// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html +// counties +aa.no +ah.no +bu.no +fm.no +hl.no +hm.no +jan-mayen.no +mr.no +nl.no +nt.no +of.no +ol.no +oslo.no +rl.no +sf.no +st.no +svalbard.no +tm.no +tr.no +va.no +vf.no +// primary and lower secondary schools per county +gs.aa.no +gs.ah.no +gs.bu.no +gs.fm.no +gs.hl.no +gs.hm.no +gs.jan-mayen.no +gs.mr.no +gs.nl.no +gs.nt.no +gs.of.no +gs.ol.no +gs.oslo.no +gs.rl.no +gs.sf.no +gs.st.no +gs.svalbard.no +gs.tm.no +gs.tr.no +gs.va.no +gs.vf.no +// cities +akrehamn.no +åkrehamn.no +algard.no +ålgård.no +arna.no +brumunddal.no +bryne.no +bronnoysund.no +brønnøysund.no +drobak.no +drøbak.no +egersund.no +fetsund.no +floro.no +florø.no +fredrikstad.no +hokksund.no +honefoss.no +hønefoss.no +jessheim.no +jorpeland.no +jørpeland.no +kirkenes.no +kopervik.no +krokstadelva.no +langevag.no +langevåg.no +leirvik.no +mjondalen.no +mjøndalen.no +mo-i-rana.no +mosjoen.no +mosjøen.no +nesoddtangen.no +orkanger.no +osoyro.no +osøyro.no +raholt.no +råholt.no +sandnessjoen.no +sandnessjøen.no +skedsmokorset.no +slattum.no +spjelkavik.no +stathelle.no +stavern.no +stjordalshalsen.no +stjørdalshalsen.no +tananger.no +tranby.no +vossevangen.no +// communities +afjord.no +åfjord.no +agdenes.no +al.no +ål.no +alesund.no +ålesund.no +alstahaug.no +alta.no +áltá.no +alaheadju.no +álaheadju.no +alvdal.no +amli.no +åmli.no +amot.no +åmot.no +andebu.no +andoy.no +andøy.no +andasuolo.no +ardal.no +årdal.no +aremark.no +arendal.no +ås.no +aseral.no +åseral.no +asker.no +askim.no +askvoll.no +askoy.no +askøy.no +asnes.no +åsnes.no +audnedaln.no +aukra.no +aure.no +aurland.no +aurskog-holand.no +aurskog-høland.no +austevoll.no +austrheim.no +averoy.no +averøy.no +balestrand.no +ballangen.no +balat.no +bálát.no +balsfjord.no +bahccavuotna.no +báhccavuotna.no +bamble.no +bardu.no +beardu.no +beiarn.no +bajddar.no +bájddar.no +baidar.no +báidár.no +berg.no +bergen.no +berlevag.no +berlevåg.no +bearalvahki.no +bearalváhki.no +bindal.no +birkenes.no +bjarkoy.no +bjarkøy.no +bjerkreim.no +bjugn.no +bodo.no +bodø.no +badaddja.no +bådåddjå.no +budejju.no +bokn.no +bremanger.no +bronnoy.no +brønnøy.no +bygland.no +bykle.no +barum.no +bærum.no +bo.telemark.no +bø.telemark.no +bo.nordland.no +bø.nordland.no +bievat.no +bievát.no +bomlo.no +bømlo.no +batsfjord.no +båtsfjord.no +bahcavuotna.no +báhcavuotna.no +dovre.no +drammen.no +drangedal.no +dyroy.no +dyrøy.no +donna.no +dønna.no +eid.no +eidfjord.no +eidsberg.no +eidskog.no +eidsvoll.no +eigersund.no +elverum.no +enebakk.no +engerdal.no +etne.no +etnedal.no +evenes.no +evenassi.no +evenášši.no +evje-og-hornnes.no +farsund.no +fauske.no +fuossko.no +fuoisku.no +fedje.no +fet.no +finnoy.no +finnøy.no +fitjar.no +fjaler.no +fjell.no +flakstad.no +flatanger.no +flekkefjord.no +flesberg.no +flora.no +fla.no +flå.no +folldal.no +forsand.no +fosnes.no +frei.no +frogn.no +froland.no +frosta.no +frana.no +fræna.no +froya.no +frøya.no +fusa.no +fyresdal.no +forde.no +førde.no +gamvik.no +gangaviika.no +gáŋgaviika.no +gaular.no +gausdal.no +gildeskal.no +gildeskål.no +giske.no +gjemnes.no +gjerdrum.no +gjerstad.no +gjesdal.no +gjovik.no +gjøvik.no +gloppen.no +gol.no +gran.no +grane.no +granvin.no +gratangen.no +grimstad.no +grong.no +kraanghke.no +kråanghke.no +grue.no +gulen.no +hadsel.no +halden.no +halsa.no +hamar.no +hamaroy.no +habmer.no +hábmer.no +hapmir.no +hápmir.no +hammerfest.no +hammarfeasta.no +hámmárfeasta.no +haram.no +hareid.no +harstad.no +hasvik.no +aknoluokta.no +ákŋoluokta.no +hattfjelldal.no +aarborte.no +haugesund.no +hemne.no +hemnes.no +hemsedal.no +heroy.more-og-romsdal.no +herøy.møre-og-romsdal.no +heroy.nordland.no +herøy.nordland.no +hitra.no +hjartdal.no +hjelmeland.no +hobol.no +hobøl.no +hof.no +hol.no +hole.no +holmestrand.no +holtalen.no +holtålen.no +hornindal.no +horten.no +hurdal.no +hurum.no +hvaler.no +hyllestad.no +hagebostad.no +hægebostad.no +hoyanger.no +høyanger.no +hoylandet.no +høylandet.no +ha.no +hå.no +ibestad.no +inderoy.no +inderøy.no +iveland.no +jevnaker.no +jondal.no +jolster.no +jølster.no +karasjok.no +karasjohka.no +kárášjohka.no +karlsoy.no +galsa.no +gálsá.no +karmoy.no +karmøy.no +kautokeino.no +guovdageaidnu.no +klepp.no +klabu.no +klæbu.no +kongsberg.no +kongsvinger.no +kragero.no +kragerø.no +kristiansand.no +kristiansund.no +krodsherad.no +krødsherad.no +kvalsund.no +rahkkeravju.no +ráhkkerávju.no +kvam.no +kvinesdal.no +kvinnherad.no +kviteseid.no +kvitsoy.no +kvitsøy.no +kvafjord.no +kvæfjord.no +giehtavuoatna.no +kvanangen.no +kvænangen.no +navuotna.no +návuotna.no +kafjord.no +kåfjord.no +gaivuotna.no +gáivuotna.no +larvik.no +lavangen.no +lavagis.no +loabat.no +loabát.no +lebesby.no +davvesiida.no +leikanger.no +leirfjord.no +leka.no +leksvik.no +lenvik.no +leangaviika.no +leaŋgaviika.no +lesja.no +levanger.no +lier.no +lierne.no +lillehammer.no +lillesand.no +lindesnes.no +lindas.no +lindås.no +lom.no +loppa.no +lahppi.no +láhppi.no +lund.no +lunner.no +luroy.no +lurøy.no +luster.no +lyngdal.no +lyngen.no +ivgu.no +lardal.no +lerdal.no +lærdal.no +lodingen.no +lødingen.no +lorenskog.no +lørenskog.no +loten.no +løten.no +malvik.no +masoy.no +måsøy.no +muosat.no +muosát.no +mandal.no +marker.no +marnardal.no +masfjorden.no +meland.no +meldal.no +melhus.no +meloy.no +meløy.no +meraker.no +meråker.no +moareke.no +moåreke.no +midsund.no +midtre-gauldal.no +modalen.no +modum.no +molde.no +moskenes.no +moss.no +mosvik.no +malselv.no +målselv.no +malatvuopmi.no +málatvuopmi.no +namdalseid.no +aejrie.no +namsos.no +namsskogan.no +naamesjevuemie.no +nååmesjevuemie.no +laakesvuemie.no +nannestad.no +narvik.no +narviika.no +naustdal.no +nedre-eiker.no +nes.akershus.no +nes.buskerud.no +nesna.no +nesodden.no +nesseby.no +unjarga.no +unjárga.no +nesset.no +nissedal.no +nittedal.no +nord-aurdal.no +nord-fron.no +nord-odal.no +norddal.no +nordkapp.no +davvenjarga.no +davvenjárga.no +nordre-land.no +nordreisa.no +raisa.no +ráisa.no +nore-og-uvdal.no +notodden.no +naroy.no +nærøy.no +notteroy.no +nøtterøy.no +odda.no +oksnes.no +øksnes.no +oppdal.no +oppegard.no +oppegård.no +orkdal.no +orland.no +ørland.no +orskog.no +ørskog.no +orsta.no +ørsta.no +os.hedmark.no +os.hordaland.no +osen.no +osteroy.no +osterøy.no +ostre-toten.no +østre-toten.no +overhalla.no +ovre-eiker.no +øvre-eiker.no +oyer.no +øyer.no +oygarden.no +øygarden.no +oystre-slidre.no +øystre-slidre.no +porsanger.no +porsangu.no +porsáŋgu.no +porsgrunn.no +radoy.no +radøy.no +rakkestad.no +rana.no +ruovat.no +randaberg.no +rauma.no +rendalen.no +rennebu.no +rennesoy.no +rennesøy.no +rindal.no +ringebu.no +ringerike.no +ringsaker.no +rissa.no +risor.no +risør.no +roan.no +rollag.no +rygge.no +ralingen.no +rælingen.no +rodoy.no +rødøy.no +romskog.no +rømskog.no +roros.no +røros.no +rost.no +røst.no +royken.no +røyken.no +royrvik.no +røyrvik.no +rade.no +råde.no +salangen.no +siellak.no +saltdal.no +salat.no +sálát.no +sálat.no +samnanger.no +sande.more-og-romsdal.no +sande.møre-og-romsdal.no +sande.vestfold.no +sandefjord.no +sandnes.no +sandoy.no +sandøy.no +sarpsborg.no +sauda.no +sauherad.no +sel.no +selbu.no +selje.no +seljord.no +sigdal.no +siljan.no +sirdal.no +skaun.no +skedsmo.no +ski.no +skien.no +skiptvet.no +skjervoy.no +skjervøy.no +skierva.no +skiervá.no +skjak.no +skjåk.no +skodje.no +skanland.no +skånland.no +skanit.no +skánit.no +smola.no +smøla.no +snillfjord.no +snasa.no +snåsa.no +snoasa.no +snaase.no +snåase.no +sogndal.no +sokndal.no +sola.no +solund.no +songdalen.no +sortland.no +spydeberg.no +stange.no +stavanger.no +steigen.no +steinkjer.no +stjordal.no +stjørdal.no +stokke.no +stor-elvdal.no +stord.no +stordal.no +storfjord.no +omasvuotna.no +strand.no +stranda.no +stryn.no +sula.no +suldal.no +sund.no +sunndal.no +surnadal.no +sveio.no +svelvik.no +sykkylven.no +sogne.no +søgne.no +somna.no +sømna.no +sondre-land.no +søndre-land.no +sor-aurdal.no +sør-aurdal.no +sor-fron.no +sør-fron.no +sor-odal.no +sør-odal.no +sor-varanger.no +sør-varanger.no +matta-varjjat.no +mátta-várjjat.no +sorfold.no +sørfold.no +sorreisa.no +sørreisa.no +sorum.no +sørum.no +tana.no +deatnu.no +time.no +tingvoll.no +tinn.no +tjeldsund.no +dielddanuorri.no +tjome.no +tjøme.no +tokke.no +tolga.no +torsken.no +tranoy.no +tranøy.no +tromso.no +tromsø.no +tromsa.no +romsa.no +trondheim.no +troandin.no +trysil.no +trana.no +træna.no +trogstad.no +trøgstad.no +tvedestrand.no +tydal.no +tynset.no +tysfjord.no +divtasvuodna.no +divttasvuotna.no +tysnes.no +tysvar.no +tysvær.no +tonsberg.no +tønsberg.no +ullensaker.no +ullensvang.no +ulvik.no +utsira.no +vadso.no +vadsø.no +cahcesuolo.no +čáhcesuolo.no +vaksdal.no +valle.no +vang.no +vanylven.no +vardo.no +vardø.no +varggat.no +várggát.no +vefsn.no +vaapste.no +vega.no +vegarshei.no +vegårshei.no +vennesla.no +verdal.no +verran.no +vestby.no +vestnes.no +vestre-slidre.no +vestre-toten.no +vestvagoy.no +vestvågøy.no +vevelstad.no +vik.no +vikna.no +vindafjord.no +volda.no +voss.no +varoy.no +værøy.no +vagan.no +vågan.no +voagat.no +vagsoy.no +vågsøy.no +vaga.no +vågå.no +valer.ostfold.no +våler.østfold.no +valer.hedmark.no +våler.hedmark.no + +// np : http://www.mos.com.np/register.html +*.np + +// nr : http://cenpac.net.nr/dns/index.html +// Confirmed by registry 2008-06-17 +nr +biz.nr +info.nr +gov.nr +edu.nr +org.nr +net.nr +com.nr + +// nu : http://en.wikipedia.org/wiki/.nu +nu + +// nz : http://en.wikipedia.org/wiki/.nz +*.nz + +// om : http://en.wikipedia.org/wiki/.om +*.om +!mediaphone.om +!nawrastelecom.om +!nawras.om +!omanmobile.om +!omanpost.om +!omantel.om +!rakpetroleum.om +!siemens.om +!songfest.om +!statecouncil.om + +// org : http://en.wikipedia.org/wiki/.org +org + +// pa : http://www.nic.pa/ +// Some additional second level "domains" resolve directly as hostnames, such as +// pannet.pa, so we add a rule for "pa". +pa +ac.pa +gob.pa +com.pa +org.pa +sld.pa +edu.pa +net.pa +ing.pa +abo.pa +med.pa +nom.pa + +// pe : https://www.nic.pe/InformeFinalComision.pdf +pe +edu.pe +gob.pe +nom.pe +mil.pe +org.pe +com.pe +net.pe + +// pf : http://www.gobin.info/domainname/formulaire-pf.pdf +pf +com.pf +org.pf +edu.pf + +// pg : http://en.wikipedia.org/wiki/.pg +*.pg + +// ph : http://www.domains.ph/FAQ2.asp +// Submitted by registry 2008-06-13 +ph +com.ph +net.ph +org.ph +gov.ph +edu.ph +ngo.ph +mil.ph +i.ph + +// pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK +pk +com.pk +net.pk +edu.pk +org.pk +fam.pk +biz.pk +web.pk +gov.pk +gob.pk +gok.pk +gon.pk +gop.pk +gos.pk +info.pk + +// pl : http://www.dns.pl/english/ +pl +// NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html +aid.pl +agro.pl +atm.pl +auto.pl +biz.pl +com.pl +edu.pl +gmina.pl +gsm.pl +info.pl +mail.pl +miasta.pl +media.pl +mil.pl +net.pl +nieruchomosci.pl +nom.pl +org.pl +pc.pl +powiat.pl +priv.pl +realestate.pl +rel.pl +sex.pl +shop.pl +sklep.pl +sos.pl +szkola.pl +targi.pl +tm.pl +tourism.pl +travel.pl +turystyka.pl +// ICM functional domains (icm.edu.pl) +6bone.pl +art.pl +mbone.pl +// Government domains (administred by ippt.gov.pl) +gov.pl +uw.gov.pl +um.gov.pl +ug.gov.pl +upow.gov.pl +starostwo.gov.pl +so.gov.pl +sr.gov.pl +po.gov.pl +pa.gov.pl +// other functional domains +ngo.pl +irc.pl +usenet.pl +// NASK geographical domains : http://www.dns.pl/english/dns-regiony.html +augustow.pl +babia-gora.pl +bedzin.pl +beskidy.pl +bialowieza.pl +bialystok.pl +bielawa.pl +bieszczady.pl +boleslawiec.pl +bydgoszcz.pl +bytom.pl +cieszyn.pl +czeladz.pl +czest.pl +dlugoleka.pl +elblag.pl +elk.pl +glogow.pl +gniezno.pl +gorlice.pl +grajewo.pl +ilawa.pl +jaworzno.pl +jelenia-gora.pl +jgora.pl +kalisz.pl +kazimierz-dolny.pl +karpacz.pl +kartuzy.pl +kaszuby.pl +katowice.pl +kepno.pl +ketrzyn.pl +klodzko.pl +kobierzyce.pl +kolobrzeg.pl +konin.pl +konskowola.pl +kutno.pl +lapy.pl +lebork.pl +legnica.pl +lezajsk.pl +limanowa.pl +lomza.pl +lowicz.pl +lubin.pl +lukow.pl +malbork.pl +malopolska.pl +mazowsze.pl +mazury.pl +mielec.pl +mielno.pl +mragowo.pl +naklo.pl +nowaruda.pl +nysa.pl +olawa.pl +olecko.pl +olkusz.pl +olsztyn.pl +opoczno.pl +opole.pl +ostroda.pl +ostroleka.pl +ostrowiec.pl +ostrowwlkp.pl +pila.pl +pisz.pl +podhale.pl +podlasie.pl +polkowice.pl +pomorze.pl +pomorskie.pl +prochowice.pl +pruszkow.pl +przeworsk.pl +pulawy.pl +radom.pl +rawa-maz.pl +rybnik.pl +rzeszow.pl +sanok.pl +sejny.pl +siedlce.pl +slask.pl +slupsk.pl +sosnowiec.pl +stalowa-wola.pl +skoczow.pl +starachowice.pl +stargard.pl +suwalki.pl +swidnica.pl +swiebodzin.pl +swinoujscie.pl +szczecin.pl +szczytno.pl +tarnobrzeg.pl +tgory.pl +turek.pl +tychy.pl +ustka.pl +walbrzych.pl +warmia.pl +warszawa.pl +waw.pl +wegrow.pl +wielun.pl +wlocl.pl +wloclawek.pl +wodzislaw.pl +wolomin.pl +wroclaw.pl +zachpomor.pl +zagan.pl +zarow.pl +zgora.pl +zgorzelec.pl +// TASK geographical domains (www.task.gda.pl/uslugi/dns) +gda.pl +gdansk.pl +gdynia.pl +med.pl +sopot.pl +// other geographical domains +gliwice.pl +krakow.pl +poznan.pl +wroc.pl +zakopane.pl + +// pm : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +pm + +// pn : http://www.government.pn/PnRegistry/policies.htm +pn +gov.pn +co.pn +org.pn +edu.pn +net.pn + +// pr : http://www.nic.pr/index.asp?f=1 +pr +com.pr +net.pr +org.pr +gov.pr +edu.pr +isla.pr +pro.pr +biz.pr +info.pr +name.pr +// these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr +est.pr +prof.pr +ac.pr + +// pro : http://www.nic.pro/support_faq.htm +pro +aca.pro +bar.pro +cpa.pro +jur.pro +law.pro +med.pro +eng.pro + +// ps : http://en.wikipedia.org/wiki/.ps +// http://www.nic.ps/registration/policy.html#reg +ps +edu.ps +gov.ps +sec.ps +plo.ps +com.ps +org.ps +net.ps + +// pt : http://online.dns.pt/dns/start_dns +pt +net.pt +gov.pt +org.pt +edu.pt +int.pt +publ.pt +com.pt +nome.pt + +// pw : http://en.wikipedia.org/wiki/.pw +pw +co.pw +ne.pw +or.pw +ed.pw +go.pw +belau.pw + +// py : http://www.nic.py/faq_a.html#faq_b +*.py + +// qa : http://domains.qa/en/ +qa +com.qa +edu.qa +gov.qa +mil.qa +name.qa +net.qa +org.qa +sch.qa + +// re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs +re +com.re +asso.re +nom.re + +// ro : http://www.rotld.ro/ +ro +com.ro +org.ro +tm.ro +nt.ro +nom.ro +info.ro +rec.ro +arts.ro +firm.ro +store.ro +www.ro + +// rs : http://en.wikipedia.org/wiki/.rs +rs +co.rs +org.rs +edu.rs +ac.rs +gov.rs +in.rs + +// ru : http://www.cctld.ru/ru/docs/aktiv_8.php +// Industry domains +ru +ac.ru +com.ru +edu.ru +int.ru +net.ru +org.ru +pp.ru +// Geographical domains +adygeya.ru +altai.ru +amur.ru +arkhangelsk.ru +astrakhan.ru +bashkiria.ru +belgorod.ru +bir.ru +bryansk.ru +buryatia.ru +cbg.ru +chel.ru +chelyabinsk.ru +chita.ru +chukotka.ru +chuvashia.ru +dagestan.ru +dudinka.ru +e-burg.ru +grozny.ru +irkutsk.ru +ivanovo.ru +izhevsk.ru +jar.ru +joshkar-ola.ru +kalmykia.ru +kaluga.ru +kamchatka.ru +karelia.ru +kazan.ru +kchr.ru +kemerovo.ru +khabarovsk.ru +khakassia.ru +khv.ru +kirov.ru +koenig.ru +komi.ru +kostroma.ru +krasnoyarsk.ru +kuban.ru +kurgan.ru +kursk.ru +lipetsk.ru +magadan.ru +mari.ru +mari-el.ru +marine.ru +mordovia.ru +mosreg.ru +msk.ru +murmansk.ru +nalchik.ru +nnov.ru +nov.ru +novosibirsk.ru +nsk.ru +omsk.ru +orenburg.ru +oryol.ru +palana.ru +penza.ru +perm.ru +pskov.ru +ptz.ru +rnd.ru +ryazan.ru +sakhalin.ru +samara.ru +saratov.ru +simbirsk.ru +smolensk.ru +spb.ru +stavropol.ru +stv.ru +surgut.ru +tambov.ru +tatarstan.ru +tom.ru +tomsk.ru +tsaritsyn.ru +tsk.ru +tula.ru +tuva.ru +tver.ru +tyumen.ru +udm.ru +udmurtia.ru +ulan-ude.ru +vladikavkaz.ru +vladimir.ru +vladivostok.ru +volgograd.ru +vologda.ru +voronezh.ru +vrn.ru +vyatka.ru +yakutia.ru +yamal.ru +yaroslavl.ru +yekaterinburg.ru +yuzhno-sakhalinsk.ru +// More geographical domains +amursk.ru +baikal.ru +cmw.ru +fareast.ru +jamal.ru +kms.ru +k-uralsk.ru +kustanai.ru +kuzbass.ru +magnitka.ru +mytis.ru +nakhodka.ru +nkz.ru +norilsk.ru +oskol.ru +pyatigorsk.ru +rubtsovsk.ru +snz.ru +syzran.ru +vdonsk.ru +zgrad.ru +// State domains +gov.ru +mil.ru +// Technical domains +test.ru + +// rw : http://www.nic.rw/cgi-bin/policy.pl +rw +gov.rw +net.rw +edu.rw +ac.rw +com.rw +co.rw +int.rw +mil.rw +gouv.rw + +// sa : http://www.nic.net.sa/ +sa +com.sa +net.sa +org.sa +gov.sa +med.sa +pub.sa +edu.sa +sch.sa + +// sb : http://www.sbnic.net.sb/ +// Submitted by registry 2008-06-08 +sb +com.sb +edu.sb +gov.sb +net.sb +org.sb + +// sc : http://www.nic.sc/ +sc +com.sc +gov.sc +net.sc +org.sc +edu.sc + +// sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm +// Submitted by registry 2008-06-17 +sd +com.sd +net.sd +org.sd +edu.sd +med.sd +gov.sd +info.sd + +// se : http://en.wikipedia.org/wiki/.se +// Submitted by registry 2008-06-24 +se +a.se +ac.se +b.se +bd.se +brand.se +c.se +d.se +e.se +f.se +fh.se +fhsk.se +fhv.se +g.se +h.se +i.se +k.se +komforb.se +kommunalforbund.se +komvux.se +l.se +lanbib.se +m.se +n.se +naturbruksgymn.se +o.se +org.se +p.se +parti.se +pp.se +press.se +r.se +s.se +sshn.se +t.se +tm.se +u.se +w.se +x.se +y.se +z.se + +// sg : http://www.nic.net.sg/sub_policies_agreement/2ld.html +sg +com.sg +net.sg +org.sg +gov.sg +edu.sg +per.sg + +// sh : http://www.nic.sh/rules.html +// list of 2nd level domains ? +sh + +// si : http://en.wikipedia.org/wiki/.si +si + +// sj : No registrations at this time. +// Submitted by registry 2008-06-16 + +// sk : http://en.wikipedia.org/wiki/.sk +// list of 2nd level domains ? +sk + +// sl : http://www.nic.sl +// Submitted by registry 2008-06-12 +sl +com.sl +net.sl +edu.sl +gov.sl +org.sl + +// sm : http://en.wikipedia.org/wiki/.sm +sm + +// sn : http://en.wikipedia.org/wiki/.sn +sn +art.sn +com.sn +edu.sn +gouv.sn +org.sn +perso.sn +univ.sn + +// so : http://www.soregistry.com/ +so +com.so +net.so +org.so + +// sr : http://en.wikipedia.org/wiki/.sr +sr + +// st : http://www.nic.st/html/policyrules/ +st +co.st +com.st +consulado.st +edu.st +embaixada.st +gov.st +mil.st +net.st +org.st +principe.st +saotome.st +store.st + +// su : http://en.wikipedia.org/wiki/.su +su + +// sv : http://www.svnet.org.sv/svpolicy.html +*.sv + +// sy : http://en.wikipedia.org/wiki/.sy +// see also: http://www.gobin.info/domainname/sy.doc +sy +edu.sy +gov.sy +net.sy +mil.sy +com.sy +org.sy + +// sz : http://en.wikipedia.org/wiki/.sz +// http://www.sispa.org.sz/ +sz +co.sz +ac.sz +org.sz + +// tc : http://en.wikipedia.org/wiki/.tc +tc + +// td : http://en.wikipedia.org/wiki/.td +td + +// tel: http://en.wikipedia.org/wiki/.tel +// http://www.telnic.org/ +tel + +// tf : http://en.wikipedia.org/wiki/.tf +tf + +// tg : http://en.wikipedia.org/wiki/.tg +// http://www.nic.tg/nictg/index.php implies no reserved 2nd-level domains, +// although this contradicts wikipedia. +tg + +// th : http://en.wikipedia.org/wiki/.th +// Submitted by registry 2008-06-17 +th +ac.th +co.th +go.th +in.th +mi.th +net.th +or.th + +// tj : http://www.nic.tj/policy.htm +tj +ac.tj +biz.tj +co.tj +com.tj +edu.tj +go.tj +gov.tj +int.tj +mil.tj +name.tj +net.tj +nic.tj +org.tj +test.tj +web.tj + +// tk : http://en.wikipedia.org/wiki/.tk +tk + +// tl : http://en.wikipedia.org/wiki/.tl +tl +gov.tl + +// tm : http://www.nic.tm/rules.html +// list of 2nd level tlds ? +tm + +// tn : http://en.wikipedia.org/wiki/.tn +// http://whois.ati.tn/ +tn +com.tn +ens.tn +fin.tn +gov.tn +ind.tn +intl.tn +nat.tn +net.tn +org.tn +info.tn +perso.tn +tourism.tn +edunet.tn +rnrt.tn +rns.tn +rnu.tn +mincom.tn +agrinet.tn +defense.tn +turen.tn + +// to : http://en.wikipedia.org/wiki/.to +// Submitted by registry 2008-06-17 +to +com.to +gov.to +net.to +org.to +edu.to +mil.to + +// tr : http://en.wikipedia.org/wiki/.tr +*.tr +!nic.tr +// Used by government in the TRNC +// http://en.wikipedia.org/wiki/.nc.tr +gov.nc.tr + +// travel : http://en.wikipedia.org/wiki/.travel +travel + +// tt : http://www.nic.tt/ +tt +co.tt +com.tt +org.tt +net.tt +biz.tt +info.tt +pro.tt +int.tt +coop.tt +jobs.tt +mobi.tt +travel.tt +museum.tt +aero.tt +name.tt +gov.tt +edu.tt + +// tv : http://en.wikipedia.org/wiki/.tv +// Not listing any 2LDs as reserved since none seem to exist in practice, +// Wikipedia notwithstanding. +tv + +// tw : http://en.wikipedia.org/wiki/.tw +tw +edu.tw +gov.tw +mil.tw +com.tw +net.tw +org.tw +idv.tw +game.tw +ebiz.tw +club.tw +網路.tw +組織.tw +商業.tw + +// tz : http://en.wikipedia.org/wiki/.tz +// Submitted by registry 2008-06-17 +// Updated from http://www.tznic.or.tz/index.php/domains.html 2010-10-25 +ac.tz +co.tz +go.tz +mil.tz +ne.tz +or.tz +sc.tz + +// ua : http://www.nic.net.ua/ +ua +com.ua +edu.ua +gov.ua +in.ua +net.ua +org.ua +// ua geo-names +cherkassy.ua +chernigov.ua +chernovtsy.ua +ck.ua +cn.ua +crimea.ua +cv.ua +dn.ua +dnepropetrovsk.ua +donetsk.ua +dp.ua +if.ua +ivano-frankivsk.ua +kh.ua +kharkov.ua +kherson.ua +khmelnitskiy.ua +kiev.ua +kirovograd.ua +km.ua +kr.ua +ks.ua +kv.ua +lg.ua +lugansk.ua +lutsk.ua +lviv.ua +mk.ua +nikolaev.ua +od.ua +odessa.ua +pl.ua +poltava.ua +rovno.ua +rv.ua +sebastopol.ua +sumy.ua +te.ua +ternopil.ua +uzhgorod.ua +vinnica.ua +vn.ua +zaporizhzhe.ua +zp.ua +zhitomir.ua +zt.ua + +// Private registries in .ua +co.ua +pp.ua + +// ug : http://www.registry.co.ug/ +ug +co.ug +ac.ug +sc.ug +go.ug +ne.ug +or.ug + +// uk : http://en.wikipedia.org/wiki/.uk +*.uk +*.sch.uk +!bl.uk +!british-library.uk +!icnet.uk +!jet.uk +!mod.uk +!nel.uk +!nhs.uk +!nic.uk +!nls.uk +!national-library-scotland.uk +!parliament.uk +!police.uk + +// us : http://en.wikipedia.org/wiki/.us +us +dni.us +fed.us +isa.us +kids.us +nsn.us +// us geographic names +ak.us +al.us +ar.us +as.us +az.us +ca.us +co.us +ct.us +dc.us +de.us +fl.us +ga.us +gu.us +hi.us +ia.us +id.us +il.us +in.us +ks.us +ky.us +la.us +ma.us +md.us +me.us +mi.us +mn.us +mo.us +ms.us +mt.us +nc.us +nd.us +ne.us +nh.us +nj.us +nm.us +nv.us +ny.us +oh.us +ok.us +or.us +pa.us +pr.us +ri.us +sc.us +sd.us +tn.us +tx.us +ut.us +vi.us +vt.us +va.us +wa.us +wi.us +wv.us +wy.us +// The registrar notes several more specific domains available in each state, +// such as state.*.us, dst.*.us, etc., but resolution of these is somewhat +// haphazard; in some states these domains resolve as addresses, while in others +// only subdomains are available, or even nothing at all. We include the +// most common ones where it's clear that different sites are different +// entities. +k12.ak.us +k12.al.us +k12.ar.us +k12.as.us +k12.az.us +k12.ca.us +k12.co.us +k12.ct.us +k12.dc.us +k12.de.us +k12.fl.us +k12.ga.us +k12.gu.us +// k12.hi.us Hawaii has a state-wide DOE login: bug 614565 +k12.ia.us +k12.id.us +k12.il.us +k12.in.us +k12.ks.us +k12.ky.us +k12.la.us +k12.ma.us +k12.md.us +k12.me.us +k12.mi.us +k12.mn.us +k12.mo.us +k12.ms.us +k12.mt.us +k12.nc.us +k12.nd.us +k12.ne.us +k12.nh.us +k12.nj.us +k12.nm.us +k12.nv.us +k12.ny.us +k12.oh.us +k12.ok.us +k12.or.us +k12.pa.us +k12.pr.us +k12.ri.us +k12.sc.us +k12.sd.us +k12.tn.us +k12.tx.us +k12.ut.us +k12.vi.us +k12.vt.us +k12.va.us +k12.wa.us +k12.wi.us +k12.wv.us +k12.wy.us + +cc.ak.us +cc.al.us +cc.ar.us +cc.as.us +cc.az.us +cc.ca.us +cc.co.us +cc.ct.us +cc.dc.us +cc.de.us +cc.fl.us +cc.ga.us +cc.gu.us +cc.hi.us +cc.ia.us +cc.id.us +cc.il.us +cc.in.us +cc.ks.us +cc.ky.us +cc.la.us +cc.ma.us +cc.md.us +cc.me.us +cc.mi.us +cc.mn.us +cc.mo.us +cc.ms.us +cc.mt.us +cc.nc.us +cc.nd.us +cc.ne.us +cc.nh.us +cc.nj.us +cc.nm.us +cc.nv.us +cc.ny.us +cc.oh.us +cc.ok.us +cc.or.us +cc.pa.us +cc.pr.us +cc.ri.us +cc.sc.us +cc.sd.us +cc.tn.us +cc.tx.us +cc.ut.us +cc.vi.us +cc.vt.us +cc.va.us +cc.wa.us +cc.wi.us +cc.wv.us +cc.wy.us + +lib.ak.us +lib.al.us +lib.ar.us +lib.as.us +lib.az.us +lib.ca.us +lib.co.us +lib.ct.us +lib.dc.us +lib.de.us +lib.fl.us +lib.ga.us +lib.gu.us +lib.hi.us +lib.ia.us +lib.id.us +lib.il.us +lib.in.us +lib.ks.us +lib.ky.us +lib.la.us +lib.ma.us +lib.md.us +lib.me.us +lib.mi.us +lib.mn.us +lib.mo.us +lib.ms.us +lib.mt.us +lib.nc.us +lib.nd.us +lib.ne.us +lib.nh.us +lib.nj.us +lib.nm.us +lib.nv.us +lib.ny.us +lib.oh.us +lib.ok.us +lib.or.us +lib.pa.us +lib.pr.us +lib.ri.us +lib.sc.us +lib.sd.us +lib.tn.us +lib.tx.us +lib.ut.us +lib.vi.us +lib.vt.us +lib.va.us +lib.wa.us +lib.wi.us +lib.wv.us +lib.wy.us + +// k12.ma.us contains school districts in Massachusetts. The 4LDs are +// managed indepedently except for private (PVT), charter (CHTR) and +// parochial (PAROCH) schools. Those are delegated dorectly to the +// 5LD operators. +pvt.k12.ma.us +chtr.k12.ma.us +paroch.k12.ma.us + +// uy : http://www.antel.com.uy/ +*.uy + +// uz : http://www.reg.uz/registerr.html +// are there other 2nd level tlds ? +uz +com.uz +co.uz + +// va : http://en.wikipedia.org/wiki/.va +va + +// vc : http://en.wikipedia.org/wiki/.vc +// Submitted by registry 2008-06-13 +vc +com.vc +net.vc +org.vc +gov.vc +mil.vc +edu.vc + +// ve : http://registro.nic.ve/nicve/registro/index.html +*.ve + +// vg : http://en.wikipedia.org/wiki/.vg +vg + +// vi : http://www.nic.vi/newdomainform.htm +// http://www.nic.vi/Domain_Rules/body_domain_rules.html indicates some other +// TLDs are "reserved", such as edu.vi and gov.vi, but doesn't actually say they +// are available for registration (which they do not seem to be). +vi +co.vi +com.vi +k12.vi +net.vi +org.vi + +// vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp +vn +com.vn +net.vn +org.vn +edu.vn +gov.vn +int.vn +ac.vn +biz.vn +info.vn +name.vn +pro.vn +health.vn + +// vu : http://en.wikipedia.org/wiki/.vu +// list of 2nd level tlds ? +vu + +// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +wf + +// ws : http://en.wikipedia.org/wiki/.ws +// http://samoanic.ws/index.dhtml +ws +com.ws +net.ws +org.ws +gov.ws +edu.ws + +// yt : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +yt + +// IDN ccTLDs +// Please sort by ISO 3166 ccTLD, then punicode string +// when submitting patches and follow this format: +// ("" ) : +// [optional sponsoring org] +// + +// xn--mgbaam7a8h ("Emerat" Arabic) : AE +//http://nic.ae/english/arabicdomain/rules.jsp +امارات + +// xn--54b7fta0cc ("Bangla" Bangla) : BD +বাংলা + +// xn--fiqs8s ("China" Chinese-Han-Simplified <.Zhonggou>) : CN +// CNNIC +// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +中国 + +// xn--fiqz9s ("China" Chinese-Han-Traditional <.Zhonggou>) : CN +// CNNIC +// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +中國 + +// xn--lgbbat1ad8j ("Algeria / Al Jazair" Arabic) : DZ +الجزائر + +// xn--wgbh1c ("Egypt" Arabic .masr) : EG +// http://www.dotmasr.eg/ +مصر + +// xn--node ("ge" Georgian (Mkhedruli)) : GE +გე + +// xn--j6w193g ("Hong Kong" Chinese-Han) : HK +// https://www2.hkirc.hk/register/rules.jsp +香港 + +// xn--h2brj9c ("Bharat" Devanagari) : IN +// India +भारत + +// xn--mgbbh1a71e ("Bharat" Arabic) : IN +// India +بھارت + +// xn--fpcrj9c3d ("Bharat" Telugu) : IN +// India +భారత్ + +// xn--gecrj9c ("Bharat" Gujarati) : IN +// India +ભારત + +// xn--s9brj9c ("Bharat" Gurmukhi) : IN +// India +ਭਾਰਤ + +// xn--45brj9c ("Bharat" Bengali) : IN +// India +ভারত + +// xn--xkc2dl3a5ee0h ("India" Tamil) : IN +// India +இந்தியா + +// xn--mgba3a4f16a ("Iran" Persian) : IR +ایران + +// xn--mgba3a4fra ("Iran" Arabic) : IR +ايران + +//xn--mgbayh7gpa ("al-Ordon" Arabic) JO +//National Information Technology Center (NITC) +//Royal Scientific Society, Al-Jubeiha +الاردن + +// xn--3e0b707e ("Republic of Korea" Hangul) : KR +한국 + +// xn--fzc2c9e2c ("Lanka" Sinhalese-Sinhala) : LK +// http://nic.lk +ලංකා + +// xn--xkc2al3hye2a ("Ilangai" Tamil) : LK +// http://nic.lk +இலங்கை + +// xn--mgbc0a9azcg ("Morocco / al-Maghrib" Arabic) : MA +المغرب + +// xn--mgb9awbf ("Oman" Arabic) : OM +عمان + +// xn--ygbi2ammx ("Falasteen" Arabic) : PS +// The Palestinian National Internet Naming Authority (PNINA) +// http://www.pnina.ps +فلسطين + +// xn--90a3ac ("srb" Cyrillic) : RS +срб + +// xn--p1ai ("rf" Russian-Cyrillic) : RU +// http://www.cctld.ru/en/docs/rulesrf.php +рф + +// xn--wgbl6a ("Qatar" Arabic) : QA +// http://www.ict.gov.qa/ +قطر + +// xn--mgberp4a5d4ar ("AlSaudiah" Arabic) : SA +// http://www.nic.net.sa/ +السعودية + +// xn--mgberp4a5d4a87g ("AlSaudiah" Arabic) variant : SA +السعودیة + +// xn--mgbqly7c0a67fbc ("AlSaudiah" Arabic) variant : SA +السعودیۃ + +// xn--mgbqly7cvafr ("AlSaudiah" Arabic) variant : SA +السعوديه + +// xn--ogbpf8fl ("Syria" Arabic) : SY +سورية + +// xn--mgbtf8fl ("Syria" Arabic) variant : SY +سوريا + +// xn--yfro4i67o Singapore ("Singapore" Chinese-Han) : SG +新加坡 + +// xn--clchc0ea0b2g2a9gcd ("Singapore" Tamil) : SG +சிங்கப்பூர் + +// xn--o3cw4h ("Thai" Thai) : TH +// http://www.thnic.co.th +ไทย + +// xn--pgbs0dh ("Tunis") : TN +// http://nic.tn +تونس + +// xn--kpry57d ("Taiwan" Chinese-Han-Traditional) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台灣 + +// xn--kprw13d ("Taiwan" Chinese-Han-Simplified) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台湾 + +// xn--nnx388a ("Taiwan") variant : TW +臺灣 + +// xn--j1amh ("ukr" Cyrillic) : UA +укр + +// xn--mgb2ddes ("AlYemen" Arabic) : YE +اليمن + +// xxx : http://icmregistry.com +xxx + +// ye : http://www.y.net.ye/services/domain_name.htm +*.ye + +// za : http://www.zadna.org.za/slds.html +*.za + +// zm : http://en.wikipedia.org/wiki/.zm +*.zm + +// zw : http://en.wikipedia.org/wiki/.zw +*.zw + +// ===END ICANN DOMAINS=== +// ===BEGIN PRIVATE DOMAINS=== + +// info.at : http://www.info.at/ +biz.at +info.at + +// priv.at : http://www.nic.priv.at/ +// Submitted by registry 2008-06-09 +priv.at + +// co.ca : http://registry.co.ca +co.ca + +// CentralNic : http://www.centralnic.com/names/domains +// Confirmed by registry 2008-06-09 +ar.com +br.com +cn.com +de.com +eu.com +gb.com +gr.com +hu.com +jpn.com +kr.com +no.com +qc.com +ru.com +sa.com +se.com +uk.com +us.com +uy.com +za.com +gb.net +jp.net +se.net +uk.net +ae.org +us.org +com.de + +// Opera Software, A.S.A. +// Requested by Yngve Pettersen 2009-11-26 +operaunite.com + +// Google, Inc. +// Requested by Eduardo Vela 2010-09-06 +appspot.com + +// iki.fi : Submitted by Hannu Aronsson 2009-11-05 +iki.fi + +// c.la : http://www.c.la/ +c.la + +// ZaNiC : http://www.za.net/ +// Confirmed by registry 2009-10-03 +za.net +za.org + +// CoDNS B.V. +// Added 2010-05-23. +co.nl +co.no + +// Mainseek Sp. z o.o. : http://www.co.pl/ +co.pl + +// DynDNS.com : http://www.dyndns.com/services/dns/dyndns/ +dyndns-at-home.com +dyndns-at-work.com +dyndns-blog.com +dyndns-free.com +dyndns-home.com +dyndns-ip.com +dyndns-mail.com +dyndns-office.com +dyndns-pics.com +dyndns-remote.com +dyndns-server.com +dyndns-web.com +dyndns-wiki.com +dyndns-work.com +dyndns.biz +dyndns.info +dyndns.org +dyndns.tv +at-band-camp.net +ath.cx +barrel-of-knowledge.info +barrell-of-knowledge.info +better-than.tv +blogdns.com +blogdns.net +blogdns.org +blogsite.org +boldlygoingnowhere.org +broke-it.net +buyshouses.net +cechire.com +dnsalias.com +dnsalias.net +dnsalias.org +dnsdojo.com +dnsdojo.net +dnsdojo.org +does-it.net +doesntexist.com +doesntexist.org +dontexist.com +dontexist.net +dontexist.org +doomdns.com +doomdns.org +dvrdns.org +dyn-o-saur.com +dynalias.com +dynalias.net +dynalias.org +dynathome.net +dyndns.ws +endofinternet.net +endofinternet.org +endoftheinternet.org +est-a-la-maison.com +est-a-la-masion.com +est-le-patron.com +est-mon-blogueur.com +for-better.biz +for-more.biz +for-our.info +for-some.biz +for-the.biz +forgot.her.name +forgot.his.name +from-ak.com +from-al.com +from-ar.com +from-az.net +from-ca.com +from-co.net +from-ct.com +from-dc.com +from-de.com +from-fl.com +from-ga.com +from-hi.com +from-ia.com +from-id.com +from-il.com +from-in.com +from-ks.com +from-ky.com +from-la.net +from-ma.com +from-md.com +from-me.org +from-mi.com +from-mn.com +from-mo.com +from-ms.com +from-mt.com +from-nc.com +from-nd.com +from-ne.com +from-nh.com +from-nj.com +from-nm.com +from-nv.com +from-ny.net +from-oh.com +from-ok.com +from-or.com +from-pa.com +from-pr.com +from-ri.com +from-sc.com +from-sd.com +from-tn.com +from-tx.com +from-ut.com +from-va.com +from-vt.com +from-wa.com +from-wi.com +from-wv.com +from-wy.com +ftpaccess.cc +fuettertdasnetz.de +game-host.org +game-server.cc +getmyip.com +gets-it.net +go.dyndns.org +gotdns.com +gotdns.org +groks-the.info +groks-this.info +ham-radio-op.net +here-for-more.info +hobby-site.com +hobby-site.org +home.dyndns.org +homedns.org +homeftp.net +homeftp.org +homeip.net +homelinux.com +homelinux.net +homelinux.org +homeunix.com +homeunix.net +homeunix.org +iamallama.com +in-the-band.net +is-a-anarchist.com +is-a-blogger.com +is-a-bookkeeper.com +is-a-bruinsfan.org +is-a-bulls-fan.com +is-a-candidate.org +is-a-caterer.com +is-a-celticsfan.org +is-a-chef.com +is-a-chef.net +is-a-chef.org +is-a-conservative.com +is-a-cpa.com +is-a-cubicle-slave.com +is-a-democrat.com +is-a-designer.com +is-a-doctor.com +is-a-financialadvisor.com +is-a-geek.com +is-a-geek.net +is-a-geek.org +is-a-green.com +is-a-guru.com +is-a-hard-worker.com +is-a-hunter.com +is-a-knight.org +is-a-landscaper.com +is-a-lawyer.com +is-a-liberal.com +is-a-libertarian.com +is-a-linux-user.org +is-a-llama.com +is-a-musician.com +is-a-nascarfan.com +is-a-nurse.com +is-a-painter.com +is-a-patsfan.org +is-a-personaltrainer.com +is-a-photographer.com +is-a-player.com +is-a-republican.com +is-a-rockstar.com +is-a-socialist.com +is-a-soxfan.org +is-a-student.com +is-a-teacher.com +is-a-techie.com +is-a-therapist.com +is-an-accountant.com +is-an-actor.com +is-an-actress.com +is-an-anarchist.com +is-an-artist.com +is-an-engineer.com +is-an-entertainer.com +is-by.us +is-certified.com +is-found.org +is-gone.com +is-into-anime.com +is-into-cars.com +is-into-cartoons.com +is-into-games.com +is-leet.com +is-lost.org +is-not-certified.com +is-saved.org +is-slick.com +is-uberleet.com +is-very-bad.org +is-very-evil.org +is-very-good.org +is-very-nice.org +is-very-sweet.org +is-with-theband.com +isa-geek.com +isa-geek.net +isa-geek.org +isa-hockeynut.com +issmarterthanyou.com +isteingeek.de +istmein.de +kicks-ass.net +kicks-ass.org +knowsitall.info +land-4-sale.us +lebtimnetz.de +leitungsen.de +likes-pie.com +likescandy.com +merseine.nu +mine.nu +misconfused.org +mypets.ws +myphotos.cc +neat-url.com +office-on-the.net +on-the-web.tv +podzone.net +podzone.org +readmyblog.org +saves-the-whales.com +scrapper-site.net +scrapping.cc +selfip.biz +selfip.com +selfip.info +selfip.net +selfip.org +sells-for-less.com +sells-for-u.com +sells-it.net +sellsyourhome.org +servebbs.com +servebbs.net +servebbs.org +serveftp.net +serveftp.org +servegame.org +shacknet.nu +simple-url.com +space-to-rent.com +stuff-4-sale.org +stuff-4-sale.us +teaches-yoga.com +thruhere.net +traeumtgerade.de +webhop.biz +webhop.info +webhop.net +webhop.org +worse-than.tv +writesthisblog.com + +// ===END PRIVATE DOMAINS=== diff --git a/src/test/java/org/archive/url/PublicSuffixesTest.java b/src/test/java/org/archive/url/PublicSuffixesTest.java new file mode 100644 index 00000000..e2bb288a --- /dev/null +++ b/src/test/java/org/archive/url/PublicSuffixesTest.java @@ -0,0 +1,193 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.url; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.url.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)\n" + + " \"ac,\"\n", dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"com,\"\n" + + " \"\"\n", dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"com,\"\n" + + " \"edu,\"\n" + + " \"\"\n", dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"*,\"\n", dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)\n" + + " \"ac,\"\n" + + " \"!hoge,\"\n" + + " \"*,\"\n" + + " \"\"\n", dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,tokyo,public,assigned,www,", + "jp,tokyo,public,assigned,"); + matchPrefix("jp,tokyo,public,assigned,", "jp,tokyo,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,metro,subdomain,", "jp,tokyo,metro,"); + matchPrefix("jp,tokyo,metro,", "jp,tokyo,metro,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From 041d79fb20471f4028430fedfefcdc4c830dd45b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 10 Oct 2013 13:46:27 +0100 Subject: [PATCH 04/28] Had to prevent old version of hsqldb being brought in by hadoop-commons. --- pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pom.xml b/pom.xml index a0389be1..f96bd1c6 100644 --- a/pom.xml +++ b/pom.xml @@ -93,6 +93,10 @@ tomcat jasper-compiler + + hsqldb + hsqldb + From b10a06f3c76e2ef0edd47a04c157cf5e60001549 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 25 Oct 2013 10:19:36 +0100 Subject: [PATCH 05/28] Updated to preferred name. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f96bd1c6..abe0fd25 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons - web-commons + commons-web 1.0.0-SNAPSHOT jar From 5cd5696f9e9c337a08dbfe6489cf750c7d6759f0 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:13:31 +0000 Subject: [PATCH 06/28] Added necessary info to POM. --- pom.xml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pom.xml b/pom.xml index abe0fd25..90297de8 100644 --- a/pom.xml +++ b/pom.xml @@ -17,6 +17,40 @@ iipc-web-commons https://github.com/iipc/iipc-web-commons + + The International Internet Preservation Consortium + http://netpreserve.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + many-devs + Many Others Developers Proceed Me + many@dev.org + + + anjackson + Andrew Jackson + Andrew.Jackson@bl.uk + + + + GitHub Issues + https://github.com/iipc/iipc-web-commons/issues + + + scm:git:git@github.com:iipc/iipc-web-commons.git + scm:git:git@github.com:iipc/iipc-web-commons.git + git@github.com:iipc/iipc-web-commons.git + + + UTF-8 ${maven.build.timestamp} From 0d4739a879d83226a39d72998b1710120ff385e0 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:18:35 +0000 Subject: [PATCH 07/28] [maven-release-plugin] prepare release commons-web-1.0.0 --- pom.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 90297de8..1f73b8e9 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,4 @@ - + 4.0.0 @@ -11,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0-SNAPSHOT + 1.0.0 jar iipc-web-commons From 33910fcf562a29f7a3ca1a96dcd64039c4a22101 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:18:42 +0000 Subject: [PATCH 08/28] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1f73b8e9..b0258bc9 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0 + 1.0.1-SNAPSHOT jar iipc-web-commons From 5c13423f99b7e11aaae97074a78cb8c6db48e575 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:26:30 +0000 Subject: [PATCH 09/28] [maven-release-plugin] rollback the release of commons-web-1.0.0 --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b0258bc9..90297de8 100644 --- a/pom.xml +++ b/pom.xml @@ -1,4 +1,5 @@ - + 4.0.0 @@ -10,7 +11,7 @@ org.netpreserve.commons commons-web - 1.0.1-SNAPSHOT + 1.0.0-SNAPSHOT jar iipc-web-commons From 940bc1b14c7bbcf5493e318122ab3915932f5f2d Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:27:48 +0000 Subject: [PATCH 10/28] Removed erroneous repository declaration. --- pom.xml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pom.xml b/pom.xml index 90297de8..6ea15e57 100644 --- a/pom.xml +++ b/pom.xml @@ -225,12 +225,4 @@ - - - repository - - ${repository.url} - - - From da9f80e191a6986c681d17c97667488e50752ad9 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:30:21 +0000 Subject: [PATCH 11/28] [maven-release-plugin] prepare release commons-web-1.0.0 --- pom.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 6ea15e57..3949245a 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,4 @@ - + 4.0.0 @@ -11,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0-SNAPSHOT + 1.0.0 jar iipc-web-commons From 33d0559d5c9011fe3a46c9acb0635b273ed5c698 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 8 Nov 2013 22:32:08 +0000 Subject: [PATCH 12/28] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3949245a..f285a382 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.0 + 1.0.1-SNAPSHOT jar iipc-web-commons From a732a9ee939fe44031fb6a493641598ca120b6dc Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 16:03:57 +0000 Subject: [PATCH 13/28] Removed older PublicSuffix code. --- .../java/org/archive/url/PublicSuffixes.java | 363 ------------------ .../org/archive/url/PublicSuffixesTest.java | 193 ---------- 2 files changed, 556 deletions(-) delete mode 100644 src/main/java/org/archive/url/PublicSuffixes.java delete mode 100644 src/test/java/org/archive/url/PublicSuffixesTest.java diff --git a/src/main/java/org/archive/url/PublicSuffixes.java b/src/main/java/org/archive/url/PublicSuffixes.java deleted file mode 100644 index 7c3df6b8..00000000 --- a/src/main/java/org/archive/url/PublicSuffixes.java +++ /dev/null @@ -1,363 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.url; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.commons.io.IOUtils; -import org.archive.util.TextUtils; - -/** - * Utility class for making use of the information about 'public suffixes' at - * http://publicsuffix.org. - * - * The public suffix list (once known as 'effective TLDs') was motivated by the - * need to decide on which broader domains a subdomain was allowed to set - * cookies. For example, a server at 'www.example.com' can set cookies for - * 'www.example.com' or 'example.com' but not 'com'. 'www.example.co.uk' can set - * cookies for 'www.example.co.uk' or 'example.co.uk' but not 'co.uk' or 'uk'. - * The number of rules for all top-level-domains and 2nd- or 3rd- level domains - * has become quite long; essentially the broadest domain a subdomain may assign - * to is the one that was sold/registered to a specific name registrant. - * - * This concept should be useful in other contexts, too. Grouping URIs (or - * queues of URIs to crawl) together with others sharing the same registered - * suffix may be useful for applying the same rules to all, such as assigning - * them to the same queue or crawler in a multi- machine setup. - * - * As of Heritrix3, we prefer the term 'Assignment Level Domain' (ALD) - * for such domains, by analogy to 'Top Level Domain' (TLD) or '2nd Level - * Domain' (2LD), etc. - * - * @author Gojomo - * - * this version of PublicSuffixes uses suffix-tree data structure for generating less - * redundant regular expression. It may be even possible to write a light-weight, - * thread-safe matcher based on this class. - * @author Kenji Nagahashi - */ -public class PublicSuffixes { - protected static Pattern topmostAssignedSurtPrefixPattern; - protected static String topmostAssignedSurtPrefixRegex; - - /** - * prefix tree node. each Node represents sequence of letters (prefix) - * and alternative sequences following it (list of Node's). Nodes in - * {@code branches} are sorted for skip list like lookup and for generating - * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char).) - * - * as is intended for internal use only, there's no access methods. procedures for updating - * prefix tree with new input are defined within this class ({@link #addBranch(CharSequence)}). - * - * terminal node could be represented in two different form: 1) Node with zero branches, - * or 2) Node with zero-length {@code cs}. So, root node must be initialized with empty (not null) - * {@code branches} unless empty string matches the overall pattern. - * {@code cs} must not be null except for root node. - */ - public static class Node implements Comparable { - protected CharSequence cs; - protected List branches; - public Node() { - this("", null); - } - protected Node(CharSequence cs) { - this(cs, null); - } - protected Node(CharSequence cs, List branches) { - this.cs = cs; - this.branches = branches; - } - public void addBranch(CharSequence s) { - if (branches == null) { - branches = new ArrayList(); - branches.add(new Node("", null)); - } - for (int i = 0; i < branches.size(); i++) { - Node alt = branches.get(i); - if (alt.add(s)) return; - if (alt.compareTo(s.charAt(0)) > 0) { - Node alt1 = new Node(s, null); - branches.add(i, alt1); - return; - } - } - Node alt2 = new Node(s, null); - branches.add(alt2); - } - public boolean add(CharSequence s) { - int l = Math.min(s.length(), cs.length()); - int i = 0; - while (i < l && s.charAt(i) == cs.charAt(i)) - i++; - // zero-length match holds only when both cs and s are empty. - if (i == 0) return cs.length() == 0 && s.length() == 0; - if (i < cs.length()) { - CharSequence cs0 = cs.subSequence(0, i); - CharSequence cs1 = cs.subSequence(i, cs.length()); - CharSequence cs2 = s.subSequence(i, s.length()); - cs = cs0; - Node alt1 = new Node(cs1, branches); - (branches = new ArrayList()).add(alt1); - addBranch(cs2); - } else { - assert i == cs.length(); - addBranch(s.subSequence(i, s.length())); - } - return true; - } - public int compareTo(Node other) { - if (other.cs == null || other.cs.length() == 0) - return (cs == null || cs.length() == 0) ? 0 : -1; - return compareTo(other.cs.charAt(0)); - } - public int compareTo(char oc) { - if (cs == null || cs.length() == 0) return 1; - // '!' and '*' must come after ordinary letters, in this order, for regexp - // to work as intended. - char c = cs.charAt(0); - if (c == oc) return 0; - if (c == '!') return oc == '*' ? -1 : 1; - if (c == '*') return 1; - if (oc == '*' || oc == '!') return -1; - return Character.valueOf(c).compareTo(oc); - // for generating the same regexp as previous version. - //return Character.valueOf(oc).compareTo(c); - } - } - - /** - * Utility method for dumping a regex String, based on a published public - * suffix list, which matches any SURT-form hostname up through the broadest - * 'private' (assigned/sold) domain-segment. That is, for any of the - * SURT-form hostnames... - * - * com,example, com,example,www, com,example,california,www - * - * ...the regex will match 'com,example,'. - * - * @param args - * @throws IOException - */ - public static void main(String args[]) throws IOException { - InputStream is; - if (args.length == 0 || "=".equals(args[0])) { - // use bundled list - is = PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"); - } else { - is = new FileInputStream(args[0]); - } - BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); - String regex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(is); - - boolean needsClose = false; - BufferedWriter writer; - if (args.length >= 2) { - // write to specified file - writer = new BufferedWriter(new FileWriter(args[1])); - needsClose = true; - } else { - // write to stdout - writer = new BufferedWriter(new OutputStreamWriter(System.out)); - } - writer.append(regex); - writer.flush(); - if (needsClose) { - writer.close(); - } - } - /** - * Reads a file of the format promulgated by publicsuffix.org, ignoring - * comments and '!' exceptions/notations, converting domain segments to - * SURT-ordering. Leaves glob-style '*' wildcarding in place. Returns root - * node of SURT-ordered prefix tree. - * - * @param reader - * @return root of prefix tree node. - * @throws IOException - */ - protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws IOException { - // initializing with empty Alt list prevents empty pattern from being - // created for the first addBranch() - Node alt = new Node(null, new ArrayList()); - String line; - while ((line = reader.readLine()) != null) { - // discard whitespace, empty lines, comments, exceptions - line = line.trim(); - if (line.length() == 0 || line.startsWith("//")) continue; - // discard utf8 notation after entry - line = line.split("\\s+")[0]; - // TODO: maybe we don't need to create lower-cased String - line = line.toLowerCase(); - // SURT-order domain segments - String[] segs = line.split("\\."); - StringBuilder sb = new StringBuilder(); - for (int i = segs.length - 1; i >= 0; i--) { - if (segs[i].length() == 0) continue; - sb.append(segs[i]).append(','); - } - alt.addBranch(sb.toString()); - } - return alt; - } - /** - * utility function for dumping prefix tree structure. intended for debug use. - * @param alt root of prefix tree. - * @param lv indent level. 0 for root (no indent). - * @param out writer to send output to. - */ - public static void dump(Node alt, int lv, PrintWriter out) { - for (int i = 0; i < lv; i++) - out.print(" "); - out.println(alt.cs != null ? ('"'+alt.cs.toString()+'"') : "(null)"); - if (alt.branches != null) { - for (Node br : alt.branches) { - dump(br, lv + 1, out); - } - } - } - /** - * bulids regular expression from prefix-tree {@code alt} into buffer {@code sb}. - * @param alt prefix tree root. - * @param sb StringBuffer to store regular expression. - */ - protected static void buildRegex(Node alt, StringBuilder sb) { - String close = null; - if (alt.cs != null) { - // actually '!' always be the first character, because it is - // always used along with '*'. - for (int i = 0; i < alt.cs.length(); i++) { - char c = alt.cs.charAt(i); - if (c == '!') { - if (close != null) - throw new RuntimeException("more than one '!'"); - sb.append("(?="); - close = ")"; - } else if (c == '*') { - sb.append("[-\\w]+"); - } else { - sb.append(c); - } - } - } - if (alt.branches != null) { - // alt.branches.size() should always be > 1 - if (alt.branches.size() > 1) { - sb.append("(?:"); - } - String sep = ""; - for (Node alt1 : alt.branches) { - sb.append(sep); sep = "|"; - buildRegex(alt1, sb); - } - if (alt.branches.size() > 1) { - sb.append(")"); - } - } - if (close != null) - sb.append(close); - } - - /** - * Converts SURT-ordered list of public prefixes into a Java regex which - * matches the public-portion "plus one" segment, giving the domain on which - * cookies can be set or other policy grouping should occur. Also adds to - * regex a fallback matcher that for any new/unknown TLDs assumes the - * second-level domain is assignable. (Eg: 'zzz,example,'). - * - * @param list - * @return - */ - private static String surtPrefixRegexFromTrie(Node trie) { - StringBuilder regex = new StringBuilder(); - regex.append("(?ix)^\n"); - trie.addBranch("*,"); // for new/unknown TLDs - buildRegex(trie, regex); - regex.append("\n([-\\w]+,)"); - return regex.toString(); - } - - public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { - if (topmostAssignedSurtPrefixPattern == null) { - topmostAssignedSurtPrefixPattern = Pattern - .compile(getTopmostAssignedSurtPrefixRegex()); - } - return topmostAssignedSurtPrefixPattern; - } - - public static synchronized String getTopmostAssignedSurtPrefixRegex() { - if (topmostAssignedSurtPrefixRegex == null) { - // use bundled list - try { - BufferedReader reader = new BufferedReader(new InputStreamReader( - PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"), "UTF-8")); - topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(reader); - } catch (UnsupportedEncodingException ex) { - // should never happen - throw new RuntimeException(ex); - } - } - return topmostAssignedSurtPrefixRegex; - } - - public static String getTopmostAssignedSurtPrefixRegex(BufferedReader reader) { - try { - Node trie = readPublishedFileToSurtTrie(reader); - return surtPrefixRegexFromTrie(trie); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Truncate SURT to its topmost assigned domain segment; that is, - * the public suffix plus one segment, but as a SURT-ordered prefix. - * - * if the pattern doesn't match, the passed-in SURT is returned. - * - * @param surt SURT to truncate - * @return truncated-to-topmost-assigned SURT prefix - */ - public static String reduceSurtToAssignmentLevel(String surt) { - Matcher matcher = TextUtils.getMatcher( - getTopmostAssignedSurtPrefixRegex(), surt); - if (matcher.find()) { - surt = matcher.group(); - } - TextUtils.recycleMatcher(matcher); - return surt; - } -} diff --git a/src/test/java/org/archive/url/PublicSuffixesTest.java b/src/test/java/org/archive/url/PublicSuffixesTest.java deleted file mode 100644 index e2bb288a..00000000 --- a/src/test/java/org/archive/url/PublicSuffixesTest.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.url; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.url.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,tokyo,public,assigned,www,", - "jp,tokyo,public,assigned,"); - matchPrefix("jp,tokyo,public,assigned,", "jp,tokyo,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,metro,subdomain,", "jp,tokyo,metro,"); - matchPrefix("jp,tokyo,metro,", "jp,tokyo,metro,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} From 5226f18045b2f6eb01e3526198f79e8149964cc1 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 20:21:21 +0000 Subject: [PATCH 14/28] Added config for deploying snapshots. --- .travis.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 52ea3bf1..c2fc63fd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,20 @@ language: java jdk: - oraclejdk7 +before_install: "git clone git@github.com:iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" - - "ulimit -a" - - "ulimit -u 2048" - - "ulimit -a" +script: "target/travis/deploy-if.sh" + +# whitelist +branches: + only: + - master + +env: + global: + - secure: "qDKjVdoe4Qcz4WfXiQydU7tyl51T62FUJrjqu4FUPBcgeQhFQiggwhpaE6xCOzOpxbsuBi2R1c8gMQf5esE5iDL5jZMu+kz++dYbuzMTd13ttvZWMW5wRPH0H8iHk609FP/RDtVKKBr7WO0JvvIAZEhWNHZrLXBrrKgdTey171g=" + - secure: "FXGBKJNP9X7ePJfS4eYTZtoFo4RT1sxor34XxncSJr7uV6ggtZb4B4WNd16IlLcDk6E32sx8YoWdltaOGwQ5Vg/kux5Ko/wKZCoccS018Ln1bRT86dD1KoPY34rGoNJVQxe7J/1MPqpBKwmi2XCKfzpsEh3W7bbIqg8w9MEOOZA=" + From 70a49689f3db72649079208e5fe1947b25f90f5b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 21:03:53 +0000 Subject: [PATCH 15/28] Switched to HTTPS. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c2fc63fd..9d7d2e50 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: java jdk: - oraclejdk7 -before_install: "git clone git@github.com:iipc/travis.git target/travis" +before_install: "git clone https://github.com/iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" From 8e98a2a32069e1e6b606cf1a6fed7ae130b8e888 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 21:27:33 +0000 Subject: [PATCH 16/28] Attempting rebuild. --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 9d7d2e50..fc98b3c9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,9 @@ language: java jdk: - oraclejdk7 + before_install: "git clone https://github.com/iipc/travis.git target/travis" + before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" From 1f246c9257d6a8828150a165a2b3cb43a07508b2 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 22:01:59 +0000 Subject: [PATCH 17/28] whitespace tidy up --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fc98b3c9..88c5f0fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,5 @@ language: java + jdk: - oraclejdk7 @@ -10,7 +11,7 @@ before_script: - "echo $JAVA_OPTS" script: "target/travis/deploy-if.sh" -# whitelist +# whitelist in the master branch only branches: only: - master From 59045f16438881a0e67ba3846c8420e56c27c74b Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 11 Dec 2013 22:47:39 +0000 Subject: [PATCH 18/28] One more tweak. --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 88c5f0fb..c1dbf348 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,13 +3,16 @@ language: java jdk: - oraclejdk7 -before_install: "git clone https://github.com/iipc/travis.git target/travis" +before_install: + - "git clone https://github.com/iipc/travis.git target/travis" before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" -script: "target/travis/deploy-if.sh" + +script: + - "target/travis/deploy-if.sh" # whitelist in the master branch only branches: From ac864906fc365c7812e2209f3db18722517d141e Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:09:09 +0000 Subject: [PATCH 19/28] Upped number of open files allowed. --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index c1dbf348..1cdf7339 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,9 @@ before_script: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - "echo $JAVA_OPTS" + - "ulimit -a" + - "ulimit -u 2048" + - "ulimit -a" script: - "target/travis/deploy-if.sh" From a3e17dd37d9472c1f5d6c29b3b0feeb44cf22efd Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:33:08 +0000 Subject: [PATCH 20/28] Added build status. --- .travis.yml | 5 +---- README.md | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1cdf7339..0dfd3f7f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,12 +7,9 @@ before_install: - "git clone https://github.com/iipc/travis.git target/travis" before_script: - - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx1024m" - - "echo $JAVA_OPTS" - - "ulimit -a" + - "export MAVEN_OPTS=-Xmx512m" - "ulimit -u 2048" - - "ulimit -a" script: - "target/travis/deploy-if.sh" diff --git a/README.md b/README.md index b70f8318..ae865f7e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ OpenWayback Web Commons ======================= +[![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) + This repository contains common utility code for the OpenWayback project. From 7653cc0dbc5dc75167761a98de01bec79bf530bd Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:44:14 +0000 Subject: [PATCH 21/28] Added link to parent project. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ae865f7e..9bd2e12a 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,6 @@ OpenWayback Web Commons [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) -This repository contains common utility code for the OpenWayback project. +This repository contains common utility code for the [OpenWayback][1] project. + +[1]: https://github.com/iipc/openwayback From 3f397e63873d5c7ed38c2190d55f2870498c6ea5 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 09:49:11 +0000 Subject: [PATCH 22/28] Bumped version number due to large number of changes adding functionality. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a1d3de27..0c1a06d4 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.0.1-SNAPSHOT + 1.1.0-SNAPSHOT jar iipc-web-commons From a75863022c02659e6eb590b586a4822757dc8d8f Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 11:12:12 +0000 Subject: [PATCH 23/28] [maven-release-plugin] prepare release commons-web-1.1.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0c1a06d4..cfb3a0cb 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.1.0-SNAPSHOT + 1.1.0 jar iipc-web-commons From d898af145b827d4ebf08fdc858d90dc24b702485 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Thu, 12 Dec 2013 11:12:16 +0000 Subject: [PATCH 24/28] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cfb3a0cb..b6c8994d 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.netpreserve.commons commons-web - 1.1.0 + 1.1.1-SNAPSHOT jar iipc-web-commons From c94e14b4f0ce990a147b306a9f6986f9f79183b1 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 16 Jan 2014 18:45:02 -0800 Subject: [PATCH 25/28] use iipc-web-commons consistently as the project name --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9bd2e12a..ee3f62ce 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -OpenWayback Web Commons +IIPC Web Commons ======================= [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) -This repository contains common utility code for the [OpenWayback][1] project. +This repository contains common utility code for [OpenWayback][1] and other projects. [1]: https://github.com/iipc/openwayback diff --git a/pom.xml b/pom.xml index b6c8994d..21bca897 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons - commons-web + iipc-web-commons 1.1.1-SNAPSHOT jar From 7bc7cccc2d8a934c9eddbfdb44c1e06a60fcd075 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 16 Jan 2014 19:22:34 -0800 Subject: [PATCH 26/28] Merge from iipc master --- .travis.yml | 26 +++ LICENSE | 191 ++++++++++++++++++ README.md | 8 + pom.xml | 92 +++++---- .../org/archive/url/UsableURIFactory.java | 2 +- 5 files changed, 281 insertions(+), 38 deletions(-) create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.md diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..0dfd3f7f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +language: java + +jdk: + - oraclejdk7 + +before_install: + - "git clone https://github.com/iipc/travis.git target/travis" + +before_script: + - "export JAVA_OPTS=-Xmx1024m" + - "export MAVEN_OPTS=-Xmx512m" + - "ulimit -u 2048" + +script: + - "target/travis/deploy-if.sh" + +# whitelist in the master branch only +branches: + only: + - master + +env: + global: + - secure: "qDKjVdoe4Qcz4WfXiQydU7tyl51T62FUJrjqu4FUPBcgeQhFQiggwhpaE6xCOzOpxbsuBi2R1c8gMQf5esE5iDL5jZMu+kz++dYbuzMTd13ttvZWMW5wRPH0H8iHk609FP/RDtVKKBr7WO0JvvIAZEhWNHZrLXBrrKgdTey171g=" + - secure: "FXGBKJNP9X7ePJfS4eYTZtoFo4RT1sxor34XxncSJr7uV6ggtZb4B4WNd16IlLcDk6E32sx8YoWdltaOGwQ5Vg/kux5Ko/wKZCoccS018Ln1bRT86dD1KoPY34rGoNJVQxe7J/1MPqpBKwmi2XCKfzpsEh3W7bbIqg8w9MEOOZA=" + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..37ec93a1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 00000000..9bd2e12a --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +OpenWayback Web Commons +======================= + +[![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) + +This repository contains common utility code for the [OpenWayback][1] project. + +[1]: https://github.com/iipc/openwayback diff --git a/pom.xml b/pom.xml index 4a7290c7..b6c8994d 100644 --- a/pom.xml +++ b/pom.xml @@ -1,15 +1,55 @@ - + 4.0.0 - org.archive - ia-web-commons + + org.sonatype.oss + oss-parent + 7 + + + + org.netpreserve.commons + commons-web 1.1.1-SNAPSHOT jar - ia-web-commons - http://maven.apache.org - + iipc-web-commons + https://github.com/iipc/iipc-web-commons + + + The International Internet Preservation Consortium + http://netpreserve.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + many-devs + Many Others Developers Proceed Me + many@dev.org + + + anjackson + Andrew Jackson + Andrew.Jackson@bl.uk + + + + GitHub Issues + https://github.com/iipc/iipc-web-commons/issues + + + scm:git:git@github.com:iipc/iipc-web-commons.git + scm:git:git@github.com:iipc/iipc-web-commons.git + git@github.com:iipc/iipc-web-commons.git + + + UTF-8 ${maven.build.timestamp} @@ -42,7 +82,7 @@ - org.mozilla + com.googlecode.juniversalchardet juniversalchardet 1.0.3 @@ -86,6 +126,10 @@ tomcat jasper-compiler + + hsqldb + hsqldb + @@ -115,8 +159,8 @@ it.unimi.dsi - mg4j - 1.0.1 + dsiutils + 2.0.12 compile @@ -155,7 +199,7 @@ jar-with-dependencies - ia-web-commons + iipc-web-commons @@ -176,24 +220,6 @@ - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - cloudera Cloudera Hadoop @@ -214,12 +240,4 @@ - - - repository - - ${repository.url} - - - diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 46b8e119..9118b850 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,7 +20,7 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.mg4j.util.MutableString; +import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; From 2d5ab076e8d440f2352c8fed0874f5eb5c548383 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Tue, 28 Jan 2014 16:18:31 +0000 Subject: [PATCH 27/28] Switched to webarchive-commons naming. --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ee3f62ce..fc45fdb3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -IIPC Web Commons -======================= +IIPC Web Archive Commons +======================== [![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) diff --git a/pom.xml b/pom.xml index 21bca897..5855e09b 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons - iipc-web-commons + webarchive-commons 1.1.1-SNAPSHOT jar From d05415f1a20dc42d091ed9b84bc70d14a234cd25 Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Wed, 12 Feb 2014 16:34:07 +0000 Subject: [PATCH 28/28] Updated to new name. --- README.md | 2 +- pom.xml | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index fc45fdb3..72858a52 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ IIPC Web Archive Commons ======================== -[![Build Status](https://travis-ci.org/iipc/iipc-web-commons.png?branch=master)](https://travis-ci.org/iipc/iipc-web-commons/) +[![Build Status](https://travis-ci.org/iipc/webarchive-commons.png?branch=master)](https://travis-ci.org/iipc/webarchive-commons/) This repository contains common utility code for [OpenWayback][1] and other projects. diff --git a/pom.xml b/pom.xml index 5855e09b..cfd201b0 100644 --- a/pom.xml +++ b/pom.xml @@ -13,8 +13,8 @@ 1.1.1-SNAPSHOT jar - iipc-web-commons - https://github.com/iipc/iipc-web-commons + webarchive-commons + https://github.com/iipc/webarchive-commons The International Internet Preservation Consortium @@ -41,12 +41,12 @@ GitHub Issues - https://github.com/iipc/iipc-web-commons/issues + https://github.com/iipc/webarchive-commons/issues - scm:git:git@github.com:iipc/iipc-web-commons.git - scm:git:git@github.com:iipc/iipc-web-commons.git - git@github.com:iipc/iipc-web-commons.git + scm:git:git@github.com:iipc/webarchive-commons.git + scm:git:git@github.com:iipc/webarchive-commons.git + git@github.com:iipc/webarchive-commons.git @@ -199,7 +199,7 @@ jar-with-dependencies - iipc-web-commons + webarchive-commons