From c2530d77b73838c31f4e83f2be941ec61032ebb2 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 16 Mar 2021 11:58:11 +0100 Subject: [PATCH 001/169] Fix InterruptibleCharSequenceTest (testInterruptibility) to run on JDK 11 - if thread running the regexp matching is already finished after the initial/current sleeping time, rerun the test again with a shorter sleeping time until the expected RuntimeException is hit --- .../util/InterruptibleCharSequenceTest.java | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java b/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java index a3a5f180..8b5c5d1b 100644 --- a/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java +++ b/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java @@ -107,14 +107,24 @@ public void testNoninterruptible() throws InterruptedException { } public void testInterruptibility() throws InterruptedException { - BlockingQueue q = new LinkedBlockingQueue(); - Thread t = tryMatchInThread(new InterruptibleCharSequence(INPUT), BACKTRACKER, q); - Thread.sleep(500); - t.interrupt(); - Object result = q.take(); - if(result instanceof Boolean) { - System.err.println(result+" match beat interrupt"); + long sleepMillis = 512; + while (sleepMillis > 0) { + BlockingQueue q = new LinkedBlockingQueue(); + Thread t = tryMatchInThread(new InterruptibleCharSequence(INPUT), BACKTRACKER, q); + Thread.sleep(sleepMillis); + if (t.getState() == Thread.State.TERMINATED) { + sleepMillis /= 2; + System.err.println("already done, retrying with shorter sleep time: " + sleepMillis + "ms"); + continue; + } + t.interrupt(); + Object result = q.take(); + if(result instanceof Boolean) { + System.err.println(result+" match beat interrupt"); + } + assertTrue("exception not thrown",result instanceof RuntimeException); + return; } - assertTrue("exception not thrown",result instanceof RuntimeException); + fail("failed to interrupt InterruptibleCharSequence with given sleeping intervals"); } } From ad6904bc0d43538806d3a4c00c636183e40392a4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 17:20:03 +0000 Subject: [PATCH 002/169] Bump commons-io from 2.4 to 2.7 Bumps commons-io from 2.4 to 2.7. Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5ca7e1a3..67785dd2 100644 --- a/pom.xml +++ b/pom.xml @@ -152,7 +152,7 @@ commons-io commons-io - 2.4 + 2.7 From efbd7616bbc4b7d700b3923a52ade2de2f5a00e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristinn=20Sigur=C3=B0sson?= Date: Fri, 3 Nov 2023 09:32:36 +0000 Subject: [PATCH 003/169] Update to dsiutils 2.2.8 Oldest version to not depend on log4j 1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5ca7e1a3..067ae72b 100644 --- a/pom.xml +++ b/pom.xml @@ -163,7 +163,7 @@ it.unimi.dsi dsiutils - 2.0.12 + 2.2.8 compile From 4a9f2807396a6199b9681a02dceb1b63e59f4863 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 10 Sep 2024 14:03:58 +0900 Subject: [PATCH 004/169] Add github action to run tests --- .github/workflows/maven.yml | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/maven.yml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 00000000..ea8a34e9 --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,38 @@ +name: Java CI with Maven + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + strategy: + matrix: + jdk: [8, 11, 17, 21, 22] + + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + cache: maven + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Build with Maven + run: mvn -B package --file pom.xml + + # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive + - name: Update dependency graph + uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 From b389fe5b3b880f9eeb7a6b3612a91724ad968347 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 10 Sep 2024 14:10:57 +0900 Subject: [PATCH 005/169] Update source & target version from 1.6 to 8 1.6 isn't supported on newer JDK versions. --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 5ca7e1a3..3272c7b6 100644 --- a/pom.xml +++ b/pom.xml @@ -204,8 +204,8 @@ maven-compiler-plugin 2.3.2 - 1.6 - 1.6 + 8 + 8 From 7f9dc992fe374d3230f77e979971e6971414f86e Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 10 Sep 2024 14:38:48 +0900 Subject: [PATCH 006/169] Add matrix config to maven-dependency-submission-action --- .github/workflows/maven.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index ea8a34e9..8b675913 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -36,3 +36,6 @@ jobs: # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - name: Update dependency graph uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 + with: + directory: ${{ matrix.directory }} + correlator: ${{ github.job }}-${{ matrix.directory }} \ No newline at end of file From db88f33fb492f0f0a98ff7d851ff9cb06e0a5ec5 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 10 Sep 2024 14:41:47 +0900 Subject: [PATCH 007/169] Update to maven-dependency-submission-action@v4.1.1 and remove matrix stuff I don't think the matrix stuff actually fixes the problem. --- .github/workflows/maven.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 8b675913..db0d8677 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -35,7 +35,4 @@ jobs: # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - name: Update dependency graph - uses: advanced-security/maven-dependency-submission-action@571e99aab1055c2e71a1e2309b9691de18d6b7d6 - with: - directory: ${{ matrix.directory }} - correlator: ${{ github.job }}-${{ matrix.directory }} \ No newline at end of file + uses: advanced-security/maven-dependency-submission-action@v4.1.1 \ No newline at end of file From 0f0d68bb771d5e2fb66590c782aa6da50043599a Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 10 Sep 2024 14:46:03 +0900 Subject: [PATCH 008/169] Only run maven-dependency-submission-action on push Hopefully fixes test failures on PRs. --- .github/workflows/maven.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index db0d8677..8bb55c4e 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -35,4 +35,5 @@ jobs: # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - name: Update dependency graph + if: ${{ github.event_name == 'push' }} uses: advanced-security/maven-dependency-submission-action@v4.1.1 \ No newline at end of file From cddea766638c92bd74072346de3466b2a570c714 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 05:51:53 +0000 Subject: [PATCH 009/169] Bump org.json:json from 20131018 to 20231013 Bumps [org.json:json](https://github.com/douglascrockford/JSON-java) from 20131018 to 20231013. - [Release notes](https://github.com/douglascrockford/JSON-java/releases) - [Changelog](https://github.com/stleary/JSON-java/blob/master/docs/RELEASES.md) - [Commits](https://github.com/douglascrockford/JSON-java/commits) --- updated-dependencies: - dependency-name: org.json:json dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fee61789..a496d3b8 100644 --- a/pom.xml +++ b/pom.xml @@ -76,7 +76,7 @@ org.json json - 20131018 + 20231013 org.htmlparser From cb1b2af3de78289c648d3856692a188d19e5a412 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 11 Sep 2024 14:13:02 +0900 Subject: [PATCH 010/169] Bump httpclient from 3.1 to 4.5.14 --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index a496d3b8..4c325df8 100644 --- a/pom.xml +++ b/pom.xml @@ -91,9 +91,9 @@ - commons-httpclient - commons-httpclient - 3.1 + org.apache.httpcomponents + httpclient + 4.5.14 From 573443b0a8d1541fe1a164dbf43ce81aa69e3c04 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 11 Sep 2024 14:16:19 +0900 Subject: [PATCH 011/169] Bump guava from 17.0 to 33.3.0-jre --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4c325df8..83d34490 100644 --- a/pom.xml +++ b/pom.xml @@ -70,7 +70,7 @@ com.google.guava guava - 17.0 + 33.3.0-jre From 282cecce3fc92cfc891624c4575ce49a63669f8d Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 11 Sep 2024 14:39:18 +0900 Subject: [PATCH 012/169] Bump hadoop from 0.20.2-cdh3u4 to 3.4.0 and mark optional Most dependent applications don't actually use hadoop and those that do will likely want to specify their own version of it. It also pulls in a lot of transitive dependencies that consuming projects often have to exclude. --- pom.xml | 66 +++++++++------------------------------------------------ 1 file changed, 10 insertions(+), 56 deletions(-) diff --git a/pom.xml b/pom.xml index 83d34490..8f4dba1a 100644 --- a/pom.xml +++ b/pom.xml @@ -98,42 +98,16 @@ org.apache.hadoop - hadoop-core - 0.20.2-cdh3u4 - - - commons-httpclient - commons-httpclient - - - javax.servlet - servlet-api - - - javax.servlet.jsp - jsp-api - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - tomcat - jasper-runtime - - - tomcat - jasper-compiler - - - hsqldb - hsqldb - - + hadoop-common + 3.4.0 + true + + + + org.apache.hadoop + hadoop-mapreduce-client-core + 3.4.0 + true @@ -257,26 +231,6 @@ - - - cloudera - Cloudera Hadoop - https://repository.cloudera.com/artifactory/cloudera-repos/ - default - - - true - daily - warn - - - true - daily - warn - - - - ) + * |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) * +---+---+---+---+---+---+---+---+---+---+ */ public class GZIPStaticHeader implements GZIPConstants { diff --git a/src/main/java/org/archive/io/ReplayCharSequence.java b/src/main/java/org/archive/io/ReplayCharSequence.java index aa9b9587..e456e293 100644 --- a/src/main/java/org/archive/io/ReplayCharSequence.java +++ b/src/main/java/org/archive/io/ReplayCharSequence.java @@ -59,7 +59,7 @@ public interface ReplayCharSequence extends CharSequence, Closeable { public long getDecodeExceptionCount(); /** - * Return the first coding-exception encountered, if the count > 0. + * Return the first coding-exception encountered, if the count > 0. * @return CharacterCodingException */ public CharacterCodingException getCodingException(); diff --git a/src/main/java/org/archive/io/arc/ARCWriter.java b/src/main/java/org/archive/io/arc/ARCWriter.java index 0bd0ef9b..c7042943 100644 --- a/src/main/java/org/archive/io/arc/ARCWriter.java +++ b/src/main/java/org/archive/io/arc/ARCWriter.java @@ -86,7 +86,7 @@ * write our own GZIP*Streams, ones that resettable and consious of gzip * members. * - *

This class will write until we hit >= maxSize. The check is done at + *

This class will write until we hit >= maxSize. The check is done at * record boundary. Records do not span ARC files. We will then close current * file and open another and then continue writing. * @@ -95,9 +95,9 @@ * alexa * ARC c-tools: *

- * % av_procarc hx20040109230030-0.arc.gz | av_ziparc > \
+ * % av_procarc hx20040109230030-0.arc.gz | av_ziparc > \
  *     /tmp/hx20040109230030-0.dat.gz
- * % av_ripdat /tmp/hx20040109230030-0.dat.gz > /tmp/hx20040109230030-0.cdx
+ * % av_ripdat /tmp/hx20040109230030-0.dat.gz > /tmp/hx20040109230030-0.cdx
  * 
* Examine the produced cdx file to make sure it makes sense. Search * for 'no-type 0'. If found, then we're opening a gzip record w/o data to diff --git a/src/main/java/org/archive/util/DateUtils.java b/src/main/java/org/archive/util/DateUtils.java index 0be20e63..7d6a7c98 100755 --- a/src/main/java/org/archive/util/DateUtils.java +++ b/src/main/java/org/archive/util/DateUtils.java @@ -557,7 +557,7 @@ private static String doubleToString(double val, int maxFractionDigits, int minF * Takes a byte size and formats it for display with 'friendly' units. *

* This involves converting it to the largest unit - * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. + * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. *

* Additionally, at least 2 significant digits are always displayed. *

From 0d881e967daf2a023006032dd0d015b714821b11 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Oct 2024 17:42:23 +0900 Subject: [PATCH 023/169] [maven-release-plugin] prepare release webarchive-commons-1.1.10 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f0c6ac73..2dd9223b 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.10-SNAPSHOT + 1.1.10 jar webarchive-commons From 76d95ccd75ddc31c5b8c3e9136f9e422ab528898 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Oct 2024 17:42:28 +0900 Subject: [PATCH 024/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2dd9223b..dc3088f0 100644 --- a/pom.xml +++ b/pom.xml @@ -9,7 +9,7 @@ org.netpreserve.commons webarchive-commons - 1.1.10 + 1.1.11-SNAPSHOT jar webarchive-commons From 835f4e115b2cd288bed3f703136a7325c81fa751 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Sat, 9 Nov 2024 20:27:47 +0100 Subject: [PATCH 025/169] Make MetaData multi-valued to preserve values of repeating WARC and HTTP headers - code cleanup: fix indentation, remove unneeded return statements --- src/main/java/org/archive/resource/MetaData.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/archive/resource/MetaData.java b/src/main/java/org/archive/resource/MetaData.java index 30ce849b..fb3b24a4 100755 --- a/src/main/java/org/archive/resource/MetaData.java +++ b/src/main/java/org/archive/resource/MetaData.java @@ -83,7 +83,6 @@ public int optInt(String key, int defaultValue) { return super.getInt(key); } catch(JSONException e) { LOG.severe(e.getMessage()); - return defaultValue; } } return defaultValue; @@ -106,7 +105,6 @@ public long optLong(String key, long defaultValue) { return super.getLong(key); } catch(JSONException e) { LOG.severe(e.getMessage()); - return defaultValue; } } return defaultValue; @@ -167,10 +165,10 @@ public JSONObject put(String key, Object value) { ((JSONArray) super.get(key)).put(value); return this; } else { - JSONArray array = new JSONArray(); - array.put(super.get(key)); - array.put(value); - super.put(key, array); + JSONArray array = new JSONArray(); + array.put(super.get(key)); + array.put(value); + super.put(key, array); } return super.accumulate(key, value); } From a4748d9e79abb972a6571f5f4d46951be6049b1a Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 27 Nov 2024 13:24:17 +0100 Subject: [PATCH 026/169] URLParser and WaybackURLKeyMaker fail on URLs with IPv6 address hostname --- src/main/java/org/archive/url/URLParser.java | 11 ++++++++++- .../java/org/archive/url/URLRegexTransformer.java | 4 ++++ src/test/java/org/archive/url/URLParserTest.java | 3 +++ .../java/org/archive/url/WaybackURLKeyMakerTest.java | 3 +++ 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/archive/url/URLParser.java b/src/main/java/org/archive/url/URLParser.java index a7860b02..bcd0b7fb 100644 --- a/src/main/java/org/archive/url/URLParser.java +++ b/src/main/java/org/archive/url/URLParser.java @@ -226,7 +226,16 @@ public static HandyURL parse(String urlString) throws URISyntaxException { String colonPort = null; int atIndex = uriAuthority.indexOf(COMMERCIAL_AT); - int portColonIndex = uriAuthority.indexOf(COLON,(atIndex<0)?0:atIndex); + int portColonIndex = -1; + int startColonIndex = 0; + if (atIndex > -1) { + startColonIndex = atIndex; + } + if (uriAuthority.charAt(startColonIndex) == '[') { + // IPv6 address + startColonIndex = uriAuthority.indexOf(']', (startColonIndex + 1)); + } + portColonIndex = uriAuthority.indexOf(COLON, startColonIndex); if(atIndex<0 && portColonIndex<0) { // most common case: neither userinfo nor port diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 617e0225..5f31c81c 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -121,6 +121,10 @@ public static String hostToSURT(String host) { // TODO: ensure we DONT reverse IP addresses! String parts[] = host.split("\\.",-1); if(parts.length == 1) { + // strip enclosing "[" and "]" from IPv6 hosts + if (host.charAt(0) == '[' && host.charAt(host.length() - 1) == ']') { + return host.substring(1, host.length() - 1); + } return host; } StringBuilder sb = new StringBuilder(host.length()); diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java index b060ffa7..68dfcd23 100644 --- a/src/test/java/org/archive/url/URLParserTest.java +++ b/src/test/java/org/archive/url/URLParserTest.java @@ -86,6 +86,9 @@ public void testParse() throws UnsupportedEncodingException, URISyntaxException checkParse(" \n http://:****@www.archive.org:8080/inde\rx.html?query#foo \r\n \t ", null, "http", "", "****", "www.archive.org", 8080, "/index.html", "query", "foo", "http://:****@www.archive.org:8080/index.html?query#foo", "/index.html?query"); + checkParse("https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt", null, "https", null, null, + "[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]", -1, "/robots.txt", null, null, + "https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt", "/robots.txt"); } private void checkParse(String s, String opaque, String scheme, String authUser, diff --git a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java index 26161456..1a1403ee 100644 --- a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java +++ b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java @@ -23,6 +23,9 @@ public void testMakeKey() throws URISyntaxException { assertEquals("org,archive)/goo?a&b", km.makeKey("http://archive.org/goo/?b&a")); assertEquals("org,archive)/goo?a=1&a=2&b", km.makeKey("http://archive.org/goo/?a=2&b&a=1")); assertEquals("org,archive)/", km.makeKey("http://archive.org:/")); + assertEquals("192,211,203,34)/robots.txt", km.makeKey("https://34.203.211.192/robots.txt")); + assertEquals("2600:1f18:200d:fb00:2b74:867c:ab0c:150a)/robots.txt", + km.makeKey("https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt")); } } From 8e89847d79ea2882bc55e2d00939fd8a2ca21865 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 21:51:58 +0900 Subject: [PATCH 027/169] Update release plugins --- pom.xml | 110 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/pom.xml b/pom.xml index dc3088f0..048787a5 100644 --- a/pom.xml +++ b/pom.xml @@ -1,12 +1,6 @@ 4.0.0 - - org.sonatype.oss - oss-parent - 7 - - org.netpreserve.commons webarchive-commons 1.1.11-SNAPSHOT @@ -45,19 +39,13 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git - git@github.com:iipc/webarchive-commons.git + https://github.com/iipc/webarchive-commons UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss - - - sonatype-nexus-staging - https://oss.sonatype.org/service/local/staging/deploy/maven2/ - sonatype-nexus-snapshots - https://oss.sonatype.org/content/repositories/snapshots/ @@ -201,24 +189,6 @@ 8 - - maven-assembly-plugin - 2.4 - - - jar-with-dependencies - - webarchive-commons - - - - package - - single - - - - org.apache.maven.plugins maven-enforcer-plugin @@ -251,17 +221,71 @@ - + + + release + + + ossrh + https://oss.sonatype.org/content/repositories/snapshots + + + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.7 + true + + ossrh + https://oss.sonatype.org/ + true + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9.1 + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.5 + + + sign-artifacts + verify + + sign + + + + + + + + From 829566b1385a8dae6bc9774cd1299469f37e78c3 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 21:53:22 +0900 Subject: [PATCH 028/169] [maven-release-plugin] prepare release webarchive-commons-1.1.11 --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 048787a5..28bd9145 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.1.11-SNAPSHOT + 1.1.11 jar webarchive-commons @@ -40,7 +40,8 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - + webarchive-commons-1.1.11 + UTF-8 From 9b0bbcfdeea7a9c2ac9a28b245bce2f8e9df5dce Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 21:53:27 +0900 Subject: [PATCH 029/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 28bd9145..c86add9f 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.1.11 + 1.1.12-SNAPSHOT jar webarchive-commons @@ -40,7 +40,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-1.1.11 + HEAD From 9e4723b313a542320a4f09f4b4e2dbccdc0f58ac Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 21:58:55 +0900 Subject: [PATCH 030/169] Update CHANGES.md --- CHANGES.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 6fe7c4bd..579b659f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,14 @@ +1.1.11 +------ + +#### Bug fixes + +* Fixed URLParser and WaybackURLKeyMaker failing on URLs with IPv6 address hostnames [#100](https://github.com/iipc/webarchive-commons/pull/100) + 1.1.10 ------ -#### Fixes +#### Bug fixes * [WAT extractor: do not fail on missing WARC-Filename in warcinfo record](https://github.com/iipc/webarchive-commons/pull/89) * [ExtractingParseObserver: extract rel, hreflang and type attributes](https://github.com/iipc/webarchive-commons/pull/86) From cd2da63f1f56d41705e014e2c3290635fcc99099 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 22:00:18 +0900 Subject: [PATCH 031/169] Add description to pom.xml (now mandatory for central) --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index c86add9f..18aca329 100644 --- a/pom.xml +++ b/pom.xml @@ -8,6 +8,7 @@ webarchive-commons https://github.com/iipc/webarchive-commons + Common web archive utility code The International Internet Preservation Consortium From 7b6df0c619899ae70e350fb0d955c00b59ba68e5 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 22:02:31 +0900 Subject: [PATCH 032/169] [maven-release-plugin] prepare release webarchive-commons-1.1.11 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 18aca329..a57230d9 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.1.12-SNAPSHOT + 1.1.11 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - HEAD + webarchive-commons-1.1.11 From a70f23e8b654d3a661877641f2fa7e51d696ceeb Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 22:02:36 +0900 Subject: [PATCH 033/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index a57230d9..18aca329 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.1.11 + 1.1.12-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-1.1.11 + HEAD From 0514b2387decaf5e40e24bcda0f7c70b438d0997 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 27 Nov 2024 22:08:04 +0900 Subject: [PATCH 034/169] Add Maven Central and Javadoc shields to README --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 72858a52..55be6e68 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ IIPC Web Archive Commons ======================== - -[![Build Status](https://travis-ci.org/iipc/webarchive-commons.png?branch=master)](https://travis-ci.org/iipc/webarchive-commons/) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.netpreserve.commons/webarchive-commons/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.netpreserve.commons/webarchive-commons) [![Javadoc](https://javadoc.io/badge2/org.netpreserve.commons/webarchive-commons/javadoc.svg)](https://www.javadoc.io/doc/org.netpreserve.commons/webarchive-commons) This repository contains common utility code for [OpenWayback][1] and other projects. From c6095082fdecadd6882456a51c5f91b8a3d4faa5 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 15:42:05 +0900 Subject: [PATCH 035/169] Bump guava from 33.3.0-jre to 33.3.1-jre --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 18aca329..0ac11df9 100644 --- a/pom.xml +++ b/pom.xml @@ -60,7 +60,7 @@ com.google.guava guava - 33.3.0-jre + 33.3.1-jre From 23c8887c2a3eb4d4d5b0bac0cf805c71fcaeabaf Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 15:42:41 +0900 Subject: [PATCH 036/169] Bump commons-io from 2.14.0 to 2.18.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0ac11df9..84822a4f 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,7 @@ commons-io commons-io - 2.14.0 + 2.18.0 From f13c7b2a3b254a83827ad5a1c27131c6980c79eb Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 15:47:28 +0900 Subject: [PATCH 037/169] Bump commons-lang from 2.5 to 2.6 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 84822a4f..3d5f995f 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ commons-lang commons-lang - 2.5 + 2.6 From 5528afc05f77189b7ef59dbb9cdcce2bd35656e7 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:24:04 +0900 Subject: [PATCH 038/169] Bump junit from 4.13.1 to 4.13.2 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3d5f995f..46f26766 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ junit junit - 4.13.1 + 4.13.2 From 7426c563310f73a0820a9af729b5f3621cea57f4 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:24:52 +0900 Subject: [PATCH 039/169] Bump hadoop from 3.4.0 to 3.4.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 46f26766..c1bc7798 100644 --- a/pom.xml +++ b/pom.xml @@ -95,7 +95,7 @@ org.apache.hadoop hadoop-common - 3.4.0 + 3.4.1 true @@ -108,7 +108,7 @@ org.apache.hadoop hadoop-mapreduce-client-core - 3.4.0 + 3.4.1 true From 88607b2ed67c8c73e8b199adf85ac1ddf2fcdddb Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:26:16 +0900 Subject: [PATCH 040/169] Bump httpcore from 4.3 to 4.4.16 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c1bc7798..a993945e 100644 --- a/pom.xml +++ b/pom.xml @@ -176,7 +176,7 @@ org.apache.httpcomponents httpcore - 4.3 + 4.4.16 From 0256ae6131e80c49e1ed4a16e5631ccff0d74e36 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:27:04 +0900 Subject: [PATCH 041/169] Bump htmlparser from 1.6 to 2.1 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a993945e..ce0a2aec 100644 --- a/pom.xml +++ b/pom.xml @@ -71,7 +71,7 @@ org.htmlparser htmlparser - 1.6 + 2.1 From e1d458a86a2203ca1cd5cab967fb17f268994082 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:27:31 +0900 Subject: [PATCH 042/169] Bump json from 20231013 to 20240303 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ce0a2aec..1023560c 100644 --- a/pom.xml +++ b/pom.xml @@ -66,7 +66,7 @@ org.json json - 20231013 + 20240303 org.htmlparser From c839700d472bac5b4625ea4fe10ef47ee02a5a31 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:35:12 +0900 Subject: [PATCH 043/169] Update CHANGES.md --- CHANGES.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 579b659f..e3afd137 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,21 @@ +1.2.0 +----- + +#### New features + +* MetaData is now multivalued to support repeated WARC and HTTP headers. [#98](https://github.com/iipc/webarchive-commons/pull/98/files) + +#### Dependency upgrades + +* commons-io 2.18.0 +* commons-lang 2.6 +* guava 33.3.1-jre +* hadoop 3.4.1 +* htmlparser 2.1 +* httpcore 4.4.16 +* json 20240303 +* junit 4.13.2 + 1.1.11 ------ From 91c01ddb0561798d204c957fefafa782c0b53921 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:37:15 +0900 Subject: [PATCH 044/169] [maven-release-plugin] prepare release webarchive-commons-1.2.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1023560c..12dfae9f 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.1.12-SNAPSHOT + 1.2.0 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - HEAD + webarchive-commons-1.2.0 From f37418d08d8fa7fd4ccad4fbb919cc0fc371f2f2 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 29 Nov 2024 16:37:20 +0900 Subject: [PATCH 045/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 12dfae9f..0d84b0d2 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.2.0 + 1.2.1-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-1.2.0 + HEAD From 3ae5720ad43e2e80b5ab853078e891ee53641a3c Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 3 Dec 2024 20:22:10 +0900 Subject: [PATCH 046/169] Remove dependency on dsiutils --- pom.xml | 16 ++-------------- .../java/org/archive/url/UsableURIFactory.java | 5 ++--- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/pom.xml b/pom.xml index 0d84b0d2..da2e14da 100644 --- a/pom.xml +++ b/pom.xml @@ -150,20 +150,8 @@ it.unimi.dsi - dsiutils - 2.2.8 - compile - - - ch.qos.logback - logback-classic - - - - commons-collections - commons-collections - - + fastutil + 7.0.10 diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index d44b5c84..3dfc33a7 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,7 +20,6 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; @@ -485,7 +484,7 @@ private String fixup(String uri, final URI base, final String charset) // Preallocate. The '1's and '2's in below are space for ':', // '//', etc. URI characters. - MutableString s = new MutableString( + StringBuilder s = new StringBuilder( ((uriScheme != null)? uriScheme.length(): 0) + 1 // ';' + ((uriAuthority != null)? uriAuthority.length(): 0) @@ -707,7 +706,7 @@ private String checkPort(String uriAuthority) * @param substr Suffix or prefix to use if str is not null. * @param suffix True if substr is a suffix. */ - private void appendNonNull(MutableString b, String str, String substr, + private void appendNonNull(StringBuilder b, String str, String substr, boolean suffix) { if (str != null && str.length() > 0) { if (!suffix) { From 33556bf741eaa10421b9214bbbd69f40618d27d1 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 3 Dec 2024 20:38:46 +0900 Subject: [PATCH 047/169] Remove pom-cdh4.xml --- pom-cdh4.xml | 229 --------------------------------------------------- 1 file changed, 229 deletions(-) delete mode 100644 pom-cdh4.xml diff --git a/pom-cdh4.xml b/pom-cdh4.xml deleted file mode 100644 index de19d8d0..00000000 --- a/pom-cdh4.xml +++ /dev/null @@ -1,229 +0,0 @@ - - 4.0.0 - - org.archive - ia-web-commons - 1.0-SNAPSHOT - jar - - ia-web-commons - http://maven.apache.org - - - UTF-8 - ${maven.build.timestamp} - yyyyMMddhhmmss - - - - - junit - junit - 3.8.1 - test - - - - com.google.guava - guava - 14.0.1 - - - - org.json - json - 20090211 - - - org.htmlparser - htmlparser - 1.6 - - - - org.mozilla - juniversalchardet - 1.0.3 - - - - commons-httpclient - commons-httpclient - 3.1 - - - - org.apache.hadoop - hadoop-core - 2.0.0-mr1-cdh4.2.0 - - - commons-httpclient - commons-httpclient - - - javax.servlet - servlet-api - - - javax.servlet.jsp - jsp-api - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - tomcat - jasper-runtime - - - tomcat - jasper-compiler - - - - - org.apache.hadoop - hadoop-common - 2.0.0-cdh4.2.0 - - - org.apache.hadoop - hadoop-mapreduce-client-common - 2.0.0-cdh4.2.0 - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.0.0-cdh4.2.0 - - - - org.apache.pig - pig - 0.11.1 - provided - - - - commons-lang - commons-lang - 2.5 - - - - commons-io - commons-io - 2.4 - - - - org.gnu.inet - libidn - 1.15 - - - it.unimi.dsi - mg4j - 1.0.1 - compile - - - org.apache.httpcomponents - httpcore - 4.3 - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.6 - 1.6 - - - - maven-assembly-plugin - 2.4 - - - jar-with-dependencies - - ia-web-commons - - - - package - - single - - - - - - - - src/main/resources - true - - - - - - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - - - cloudera - Cloudera Hadoop - https://repository.cloudera.com/artifactory/cloudera-repos/ - default - - - true - daily - warn - - - true - daily - warn - - - - - - - - repository - - ${repository.url} - - - - From 4bb03baec41d90795e312e4a2865abb0395670f3 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 3 Dec 2024 20:42:29 +0900 Subject: [PATCH 048/169] Use Files.createLink instead of shelling out to ln --- .../io/ObjectPlusFilesOutputStream.java | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java b/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java index 224f24e7..bd5c1eea 100644 --- a/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java +++ b/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java @@ -18,10 +18,8 @@ */ package org.archive.io; -import java.io.File; -import java.io.IOException; -import java.io.ObjectOutputStream; -import java.io.OutputStream; +import java.io.*; +import java.nio.file.Files; import java.util.LinkedList; import org.archive.util.FileUtils; @@ -116,19 +114,10 @@ public void snapshotAppendOnlyFile(File file) throws IOException { * @throws IOException */ private void hardlinkOrCopy(File file, File destination) throws IOException { - // For Linux/UNIX, try a hard link first. - Process link = Runtime.getRuntime().exec("ln "+file.getAbsolutePath()+" "+destination.getAbsolutePath()); - // TODO NTFS also supports hard links; add appropriate try try { - link.waitFor(); - } catch (InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - if(link.exitValue()!=0) { - // hard link failed + Files.createLink(destination.toPath(), file.toPath()); + } catch (UnsupportedEncodingException e) { FileUtils.copyFile(file,destination); } } - } From 328aef2788313a2abc6123c385f9c31b863d6f1b Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 4 Dec 2024 15:07:23 +0900 Subject: [PATCH 049/169] Remove dependency on fastutil Fastutil is our largest dependency and consumes a third of the overall Heritrix distribution size. If we update to the latest version it will be even larger. But we're only using two tiny classes from it: the trivial RepositionableStream interface and the unsynchronized FastBufferedOutputStream. Some downstream users (e.g. lockss-core) actually implement RepositionableStream, so to preserve API compatiblity this change includes a copy of just that interface while keeping the same package name. Regarding FastBufferedOutputStream, for WARC writing the outer GZIPOutputStream is synchronized anyway. And RecordingOutputStream will typically be doing moderately large writes copying from the network. So in both usages it seems unlikely that there's much practical benefit in using it here over the standard BufferedOutputStream. The JVM JIT has a lot of optimizations for synchronized these days too. --- pom.xml | 5 --- .../dsi/fastutil/io/RepositionableStream.java | 42 +++++++++++++++++++ .../org/archive/io/RecordingOutputStream.java | 5 +-- .../java/org/archive/io/WriterPoolMember.java | 5 +-- 4 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java diff --git a/pom.xml b/pom.xml index da2e14da..5e5fa419 100644 --- a/pom.xml +++ b/pom.xml @@ -148,11 +148,6 @@ libidn 1.15 - - it.unimi.dsi - fastutil - 7.0.10 - diff --git a/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java b/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java new file mode 100644 index 00000000..a81645f0 --- /dev/null +++ b/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java @@ -0,0 +1,42 @@ +// copied from fastutil, keeping the original package name to avoid breaking +// compatibility with existing user code that implements this interface +package it.unimi.dsi.fastutil.io; + +/* + * Copyright (C) 2005-2015 Sebastiano Vigna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** A basic interface specifying positioning methods for a byte stream. + * + * @author Sebastiano Vigna + * @since 4.4 + */ + +public interface RepositionableStream { + + /** Sets the current stream position. + * + * @param newPosition the new stream position. + */ + void position( long newPosition ) throws java.io.IOException; + + /** Returns the current stream position. + * + * @return the current stream position. + */ + long position() throws java.io.IOException; + +} diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java index 7d2ff212..6c77997b 100644 --- a/src/main/java/org/archive/io/RecordingOutputStream.java +++ b/src/main/java/org/archive/io/RecordingOutputStream.java @@ -19,8 +19,7 @@ package org.archive.io; -import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; - +import java.io.BufferedOutputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -207,7 +206,7 @@ public void open(OutputStream wrappedStream) throws IOException { protected OutputStream ensureDiskStream() throws FileNotFoundException { if (this.diskStream == null) { FileOutputStream fis = new FileOutputStream(this.backingFilename); - this.diskStream = new FastBufferedOutputStream(fis); + this.diskStream = new BufferedOutputStream(fis); } return this.diskStream; } diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index 893007ec..e10d443b 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -19,8 +19,7 @@ package org.archive.io; -import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; - +import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -200,7 +199,7 @@ protected String createFile(final File file) throws IOException { close(); this.f = file; FileOutputStream fos = new FileOutputStream(this.f); - this.countOut = new MiserOutputStream(new FastBufferedOutputStream(fos),settings.getFrequentFlushes()); + this.countOut = new MiserOutputStream(new BufferedOutputStream(fos),settings.getFrequentFlushes()); this.out = this.countOut; logger.fine("Opened " + this.f.getAbsolutePath()); return this.f.getName(); From 8988fbbc3528afcc7f792bcc967189311e8a1286 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 4 Dec 2024 16:54:03 +0900 Subject: [PATCH 050/169] Deprecate some classes specific to HttpClient 3 These are intended to be removed in webarchive-commons 2. #78 --- .../java/org/archive/httpclient/HttpRecorderGetMethod.java | 2 ++ src/main/java/org/archive/httpclient/HttpRecorderMethod.java | 2 ++ .../java/org/archive/httpclient/HttpRecorderPostMethod.java | 2 ++ .../org/archive/httpclient/SingleHttpConnectionManager.java | 2 ++ .../archive/httpclient/ThreadLocalHttpConnectionManager.java | 4 +++- .../util/binsearch/impl/HTTPSeekableLineReaderFactory.java | 1 + .../archive/util/binsearch/impl/http/ApacheHttp31SLR.java | 4 ++++ .../util/binsearch/impl/http/ApacheHttp31SLRFactory.java | 5 +++++ 8 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java index ef241b48..1a94af1f 100644 --- a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java +++ b/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java @@ -70,7 +70,9 @@ * * @author stack * @version $Revision$, $Date$ + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 */ +@Deprecated public class HttpRecorderGetMethod extends GetMethod { protected static Logger logger = diff --git a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderMethod.java index 932e7e98..b08bc0bd 100644 --- a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java +++ b/src/main/java/org/archive/httpclient/HttpRecorderMethod.java @@ -34,7 +34,9 @@ * * @author stack * @version $Revision$, $Date$ + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 */ +@Deprecated public class HttpRecorderMethod { protected static Logger logger = Logger.getLogger(HttpRecorderMethod.class.getName()); diff --git a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java index 20f1bfd1..d55d816a 100644 --- a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java +++ b/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java @@ -36,7 +36,9 @@ * * @author stack * @version $Date$ $Revision$ + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 */ +@Deprecated public class HttpRecorderPostMethod extends PostMethod { /** * Instance of http recorder method. diff --git a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java b/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java index 4ba6a837..d6cf27ab 100644 --- a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java +++ b/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java @@ -32,7 +32,9 @@ * with external mechanisms. * * @author gojomo + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 */ +@Deprecated public class SingleHttpConnectionManager extends SimpleHttpConnectionManager { public SingleHttpConnectionManager() { diff --git a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java b/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java index 91e850ea..16821b36 100644 --- a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java +++ b/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java @@ -36,8 +36,10 @@ * * Java >= 1.4 is recommended. * - * @author Christian Kohlschuetter + * @author Christian Kohlschuetter + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 */ +@Deprecated public final class ThreadLocalHttpConnectionManager implements HttpConnectionManager { diff --git a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java index b4a23db0..68ee6551 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java @@ -20,6 +20,7 @@ protected HTTPSeekableLineReaderFactory() public enum HttpLibs { + @Deprecated APACHE_31, APACHE_43, URLCONN, diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java index c4fdbba8..124d3d03 100644 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java +++ b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java @@ -14,6 +14,10 @@ import org.apache.commons.io.input.CountingInputStream; import org.archive.util.binsearch.impl.HTTPSeekableLineReader; +/** + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 + */ +@Deprecated public class ApacheHttp31SLR extends HTTPSeekableLineReader { private HttpClient http; diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java index bc5b83f4..2af03dab 100644 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java @@ -15,6 +15,11 @@ import org.archive.util.binsearch.impl.HTTPSeekableLineReader; import org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory; +/** + * + * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 + */ +@Deprecated public class ApacheHttp31SLRFactory extends HTTPSeekableLineReaderFactory { private final static Logger LOGGER = Logger.getLogger(ApacheHttp31SLRFactory.class.getName()); From b8a91bb3b7e8a36b2162251314ff52b42a379221 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Thu, 5 Dec 2024 07:49:10 +0900 Subject: [PATCH 051/169] Remove unused dependency on commons-collections --- pom.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pom.xml b/pom.xml index 5e5fa419..6dec154c 100644 --- a/pom.xml +++ b/pom.xml @@ -149,13 +149,6 @@ 1.15 - - - commons-collections - commons-collections - 3.2.2 - - org.apache.httpcomponents httpcore From a80b98dfe4b1c2a7556e7df2574c16426849f6d9 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Sat, 26 Aug 2023 20:05:34 -0400 Subject: [PATCH 052/169] Add failing test from Sebastian's issue --- src/test/java/org/archive/url/BasicURLCanonicalizerTest.java | 3 +++ src/test/java/org/archive/url/WaybackURLKeyMakerTest.java | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index c21bcbe8..cc100e4c 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -143,6 +143,9 @@ public void testUnescapeRepeatedly() { assertEquals("%",guc.unescapeRepeatedly("%25%32%35")); assertEquals("168.188.99.26",guc.unescapeRepeatedly("%31%36%38%2e%31%38%38%2e%39%39%2e%32%36")); + + assertEquals("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5", + guc.unescapeRepeatedly("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); } public void testAttemptIPFormats() throws URIException { diff --git a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java index 1a1403ee..86250972 100644 --- a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java +++ b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java @@ -26,6 +26,10 @@ public void testMakeKey() throws URISyntaxException { assertEquals("192,211,203,34)/robots.txt", km.makeKey("https://34.203.211.192/robots.txt")); assertEquals("2600:1f18:200d:fb00:2b74:867c:ab0c:150a)/robots.txt", km.makeKey("https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt")); + assertEquals("ua,1kr)/newslist.html?tag=%e4%ee%f8%ea%ee%eb%fc%ed%ee%e5", + km.makeKey("http://1kr.ua/newslist.html?tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); + assertEquals("com,aluroba)/tags/%c3%ce%ca%c7%d1%e5%c7.htm", + km.makeKey("http://www.aluroba.com/tags/%C3%CE%CA%C7%D1%E5%C7.htm")); } } From 5161306d9ec993d1986f0d092c056f33ba3abdfe Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Sun, 27 Aug 2023 13:01:19 -0400 Subject: [PATCH 053/169] Add non-UTF-8 encoded test from mailing list --- src/test/java/org/archive/url/WaybackURLKeyMakerTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java index 86250972..26371ba8 100644 --- a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java +++ b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java @@ -30,6 +30,8 @@ public void testMakeKey() throws URISyntaxException { km.makeKey("http://1kr.ua/newslist.html?tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); assertEquals("com,aluroba)/tags/%c3%ce%ca%c7%d1%e5%c7.htm", km.makeKey("http://www.aluroba.com/tags/%C3%CE%CA%C7%D1%E5%C7.htm")); + assertEquals("ac,insbase)/xoops2/modules/xpwiki?%a4%d5%a4%af%a4%aa%a4%ab%b8%a9%a4%aa%a4%aa%a4%ce%a4%b8%a4%e7%a4%a6%bb%d4", + km.makeKey("https://www.insbase.ac/xoops2/modules/xpwiki/?%A4%D5%A4%AF%A4%AA%A4%AB%B8%A9%A4%AA%A4%AA%A4%CE%A4%B8%A4%E7%A4%A6%BB%D4")); } } From f7be47bc523c4d06cc7960dc2d3b1b58f9580906 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Sun, 27 Aug 2023 13:11:30 -0400 Subject: [PATCH 054/169] Handle non-UTF-8 encoded characters. Fixes #6 --- .../archive/url/BasicURLCanonicalizer.java | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index c09ad6e6..37b448c1 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -15,18 +15,18 @@ /** * Canonicalizer that does more or less basic fixup. Based initially on rules * specified at https://developers.google.com/safe-browsing/developers_guide_v2# - * Canonicalization. These rules are designed for clients of google's + * Canonicalization. These rules are designed for clients of Google's * "experimental" Safe Browsing API to "check URLs against Google's * constantly-updated blacklists of suspected phishing and malware pages". * *

- * This class differs from google in treatment of non-ascii input. Google's + * This class differs from Google in treatment of non-ascii input. Google's * rules don't really address this except with one example test case, which * seems to suggest taking raw input bytes and pct-encoding them byte for byte. * Since the input to this class consists of java strings, not raw bytes, that - * wouldn't be possible, even if deemed preferable. Instead + * wouldn't be possible, even if deemed preferable. Instead, * BasicURLCanonicalizer expresses non-ascii characters pct-encoded UTF-8. */ public class BasicURLCanonicalizer implements URLCanonicalizer { @@ -212,6 +212,10 @@ protected static Charset UTF8() { return _UTF8; } + /** + * @param input String to be percent-encoded. Assumed to be fully unescaped. + * @return percent-encoded string + */ public String escapeOnce(String input) { if (input == null) { return null; @@ -243,6 +247,19 @@ public String escapeOnce(String input) { */ sb = new StringBuilder(input.substring(0, i)); } + if (b == '%' && i < utf8bytes.length - 2) { + // Any hex escapes left at this point represent non-UTF-8 encoded characters + // Unescape them, so they don't get double escaped + int hex1 = getHex(utf8bytes[i + 1]); + if (hex1 >= 0) { + int hex2 = getHex(utf8bytes[i + 2]); + if (hex2 >= 0) { + i = i+2; + b = hex1 * 16 + hex2; + } + } + + } sb.append("%"); String hex = Integer.toHexString(b).toUpperCase(); if (hex.length() == 1) { @@ -337,7 +354,7 @@ public String decode(String input) { * Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If * decoding of any portion fails, appends the un-decodable %xx%xx sequence * extracted from inputStr instead of decoded characters. See "bad unicode" - * tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense + * tests in BasicURLCanonicalizerTest#testDecode(). Variables only make sense * within context of {@link #decode(String)}. * * @param sb From 6a3cf1b317c87305d05faee73d2c3ee3f5ec08b0 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 11 Dec 2024 21:14:06 +0100 Subject: [PATCH 055/169] WAT: Duplicated payload metadata values for "Actual-Content-Length" and "Trailing-Slop-Length" --- .../org/archive/resource/arc/ARCResource.java | 2 + .../http/HTTPHeadersResourceFactory.java | 11 +++-- .../archive/resource/warc/WARCResource.java | 14 ++++-- .../record/WARCMetaDataResourceFactory.java | 10 +++- .../archive/resource/arc/ARCResourceTest.java | 48 +++++++++++++++++++ .../resource/warc/WARCResourceTest.java | 46 ++++++++++++++++++ 6 files changed, 123 insertions(+), 8 deletions(-) create mode 100644 src/test/java/org/archive/resource/arc/ARCResourceTest.java create mode 100644 src/test/java/org/archive/resource/warc/WARCResourceTest.java diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java index b6e0a1c1..b0195f08 100644 --- a/src/main/java/org/archive/resource/arc/ARCResource.java +++ b/src/main/java/org/archive/resource/arc/ARCResource.java @@ -64,10 +64,12 @@ public ARCResource(MetaData metaData, ResourceContainer container, } } + @Override public InputStream getInputStream() { return new EOFNotifyingInputStream(digIS, this); } + @Override public void notifyEOF() throws IOException { metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); String digString = Base32.encode(digIS.getMessageDigest().digest()); diff --git a/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java b/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java index 79805090..eb25d821 100644 --- a/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java +++ b/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java @@ -31,6 +31,7 @@ public HTTPHeadersResourceFactory(String name, String type) { parser = new HttpHeaderParser(); } + @Override public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { @@ -40,9 +41,13 @@ public Resource getResource(InputStream is, MetaData parentMetaData, if(headers.isCorrupt()) { parentMetaData.putBoolean(HTTP_HEADERS_CORRUPT, true); } - parentMetaData.putLong(PAYLOAD_LENGTH, bytes); - - parentMetaData.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(is)); + if (!parentMetaData.has(PAYLOAD_LENGTH) || bytes != parentMetaData.getLong(PAYLOAD_LENGTH)) { + parentMetaData.putLong(PAYLOAD_LENGTH, bytes); + } + long trailingSlopBytes = StreamCopy.readToEOF(is); + if (!parentMetaData.has(PAYLOAD_SLOP_BYTES) || trailingSlopBytes > 0) { + parentMetaData.putLong(PAYLOAD_SLOP_BYTES, trailingSlopBytes); + } if(type != null) { parentMetaData.putString(PAYLOAD_CONTENT_TYPE, type); } diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index d538a25d..a9c3fcc3 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -53,7 +53,7 @@ public WARCResource(MetaData metaData, ResourceContainer container, countingIS = new CountingInputStream( ByteStreams.limit(response, length)); } else { - throw new ResourceParseException(null); + throw new ResourceParseException(new Exception("Zero or negative length: " + length)); } try { digIS = new DigestInputStream(countingIS, @@ -63,14 +63,18 @@ public WARCResource(MetaData metaData, ResourceContainer container, } } + @Override public InputStream getInputStream() { return new EOFNotifyingInputStream(digIS, this); } + @Override public void notifyEOF() throws IOException { String digString = Base32.encode(digIS.getMessageDigest().digest()); if(container.isCompressed()) { - metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); + if (!metaData.has(PAYLOAD_LENGTH) || countingIS.getCount() != metaData.getLong(PAYLOAD_LENGTH)) { + metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); + } metaData.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(response)); metaData.putString(PAYLOAD_DIGEST, "sha1:"+digString); } else { @@ -81,13 +85,17 @@ public void notifyEOF() throws IOException { (PushBackOneByteInputStream) raw; long numNewlines = StreamCopy.skipChars(pb1bis, CR_NL_CHARS); if(numNewlines > 0) { - metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); + long payloadLength = countingIS.getCount(); + if (!metaData.has(PAYLOAD_LENGTH) || payloadLength != metaData.getLong(PAYLOAD_LENGTH)) { + metaData.putLong(PAYLOAD_LENGTH, payloadLength); + } metaData.putLong(PAYLOAD_SLOP_BYTES, numNewlines); metaData.putString(PAYLOAD_DIGEST, "sha1:"+digString); } } } } + public MetaData getEnvelopeMetaData() { return envelope; } diff --git a/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java b/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java index 0dfb2834..ba8a35da 100644 --- a/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java +++ b/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java @@ -21,6 +21,7 @@ public WARCMetaDataResourceFactory() { parser = new HttpHeaderParser(); } + @Override public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { @@ -33,8 +34,13 @@ public Resource getResource(InputStream is, MetaData parentMetaData, if(headers.isCorrupt()) { md.putBoolean(WARC_META_FIELDS_CORRUPT, true); } - parentMetaData.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(is)); - parentMetaData.putLong(PAYLOAD_LENGTH, bytes); + long trailingSlopBytes = StreamCopy.readToEOF(is); + if (!parentMetaData.has(PAYLOAD_SLOP_BYTES) || trailingSlopBytes > 0) { + parentMetaData.putLong(PAYLOAD_SLOP_BYTES, trailingSlopBytes); + } + if (!parentMetaData.has(PAYLOAD_LENGTH) || bytes != parentMetaData.getLong(PAYLOAD_LENGTH)) { + parentMetaData.putLong(PAYLOAD_LENGTH, bytes); + } return new WARCMetaDataResource(md,container, headers); } catch (HttpParseException e) { diff --git a/src/test/java/org/archive/resource/arc/ARCResourceTest.java b/src/test/java/org/archive/resource/arc/ARCResourceTest.java new file mode 100644 index 00000000..43116af7 --- /dev/null +++ b/src/test/java/org/archive/resource/arc/ARCResourceTest.java @@ -0,0 +1,48 @@ +package org.archive.resource.arc; + + +import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; +import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; + +import java.io.IOException; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; +import org.archive.resource.Resource; +import org.archive.resource.ResourceParseException; +import org.archive.resource.ResourceProducer; +import org.archive.util.StreamCopy; + +import org.json.JSONObject; + +import junit.framework.TestCase; + +public class ARCResourceTest extends TestCase { + + public void testARCResource() throws ResourceParseException, IOException { + String testFileName = "../../format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"; + ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer extractor = new ExtractingResourceProducer(producer, mapper); + + Resource resource = extractor.getNext(); + + while (resource != null) { + JSONObject payloadMD = resource.getMetaData().getTopMetaData().getJSONObject("Envelope") + .getJSONObject("Payload-Metadata"); + System.err.println(payloadMD); + + if (payloadMD.has(PAYLOAD_LENGTH)) { + assertTrue(payloadMD.getLong(PAYLOAD_LENGTH) != -1); + } + if (payloadMD.has(PAYLOAD_SLOP_BYTES)) { + // does not occur with the tested ARC file + } + + StreamCopy.readToEOF(resource.getInputStream()); + resource = extractor.getNext(); + } + } +} diff --git a/src/test/java/org/archive/resource/warc/WARCResourceTest.java b/src/test/java/org/archive/resource/warc/WARCResourceTest.java new file mode 100644 index 00000000..1b935405 --- /dev/null +++ b/src/test/java/org/archive/resource/warc/WARCResourceTest.java @@ -0,0 +1,46 @@ +package org.archive.resource.warc; + +import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; +import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; + +import java.io.IOException; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; +import org.archive.resource.Resource; +import org.archive.resource.ResourceParseException; +import org.archive.resource.ResourceProducer; +import org.archive.util.StreamCopy; + +import org.json.JSONObject; + +import junit.framework.TestCase; + +public class WARCResourceTest extends TestCase { + + public void testWARCResource() throws ResourceParseException, IOException { + String testFileName = "../../format/warc/IAH-urls-wget.warc"; + ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer extractor = new ExtractingResourceProducer(producer, mapper); + + Resource resource = extractor.getNext(); + + while (resource != null) { + JSONObject payloadMD = resource.getMetaData().getTopMetaData().getJSONObject("Envelope") + .getJSONObject("Payload-Metadata"); + + if (payloadMD.has(PAYLOAD_LENGTH)) { + assertTrue(payloadMD.getLong(PAYLOAD_LENGTH) != -1); + } + if (payloadMD.has(PAYLOAD_SLOP_BYTES)) { + assertEquals(4, payloadMD.getLong(PAYLOAD_SLOP_BYTES)); + } + + StreamCopy.readToEOF(resource.getInputStream()); + resource = extractor.getNext(); + } + } +} From c5b779128edd1f0fad2709d4ab1b797326c2cb6c Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 20 Dec 2024 14:10:44 +0900 Subject: [PATCH 056/169] Update CHANGES.md for 1.3.0 --- CHANGES.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e3afd137..8a0a7d20 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,40 @@ +1.3.0 +----- + +#### URL Canonicalization Changed + +The output of WaybackURLKeyMaker and other canonicalizers based on BasicURLCanonicalizer has changed for URLs that +contain non UTF-8 percent encoded sequences. For example when a URL contains "%C3%23" it will now be normalised to +"%c3%23" whereas previous releases produced "%25c3%23". This change brings webarchive-commons more inline with pywb, +surt (Python), warcio.js and RFC 3986. While CDX file compatibility with these newer tools should improve, note that CDX +files generated by the new release which contain such URLs may not work correctly with existing versions of +OpenWayback that use the older webarchive-commons. [#102](https://github.com/iipc/webarchive-commons/pull/102) + +#### Bug fixes + +* WAT: Duplicated payload metadata values for "Actual-Content-Length" and "Trailing-Slop-Length" [#103](https://github.com/iipc/webarchive-commons/pull/103) +* ObjectPlusFilesOutputStream.hardlinkOrCopy now uses `Files.createLink()` instead of executing `ln`. This + prevents the potential for security vulnerabilities from command line option injection and improves portability. + +#### Dependency upgrades + +* fastutil removed +* dsiutils removed + +#### Deprecations + +The following classes and enum members have been marked deprecated as a step towards removal of the dependency on +Apache Commons HttpClient 3.1. + +* org.archive.httpclient.HttpRecorderGetMethod +* org.archive.httpclient.HttpRecorderMethod +* org.archive.httpclient.HttpRecorderPostMethod +* org.archive.httpclient.SingleHttpConnectionManager +* org.archive.httpclient.ThreadLocalHttpConnectionManager +* org.archive.util.binsearch.impl.http.ApacheHttp31SLR +* org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory +* org.archive.util.binsearch.impl.http.HTTPSeekableLineReaderFactory.HttpLibs.APACHE_31 + 1.2.0 ----- From eee48cc18017dde59b1d12f11654a2c752c63d45 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 20 Dec 2024 14:12:09 +0900 Subject: [PATCH 057/169] [maven-release-plugin] prepare release webarchive-commons-1.3.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 6dec154c..f489826c 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.2.1-SNAPSHOT + 1.3.0 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - HEAD + webarchive-commons-1.3.0 From a8fd8a74b83d3327bc074cf783f6315659fbc715 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 20 Dec 2024 14:12:13 +0900 Subject: [PATCH 058/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f489826c..74a4bbe6 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.3.0 + 1.3.1-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-1.3.0 + HEAD From a3a39598fc7b6947e38161e9f27f6842eed95456 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Mar 2025 10:20:00 +0100 Subject: [PATCH 059/169] Upgrade GitHub workflow actions cache --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 8bb55c4e..60fac096 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -24,7 +24,7 @@ jobs: distribution: 'temurin' cache: maven - name: Cache local Maven repository - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} From c427a12e82f3cebd6ba57152209d0bb5b9de2619 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Sun, 18 May 2025 09:39:48 +0900 Subject: [PATCH 060/169] Upgrade to JUnit 5 --- CHANGES.md | 7 + pom.xml | 7 +- .../java/org/archive/util/TmpDirTestCase.java | 119 ---- .../extract/RealCDXExtractorOutputTest.java | 31 +- .../format/dns/DNSResponseParserTest.java | 10 +- .../format/gzip/GZIPMemberSeriesTest.java | 38 +- .../format/gzip/GZIPMemberWriterTest.java | 5 +- .../format/gzip/zipnum/ZipNumWriterTest.java | 16 +- .../http/HttpRequestMessageParserTest.java | 12 +- .../format/http/HttpResponseParserTest.java | 14 +- .../json/CompoundORJSONPathSpecTest.java | 5 +- .../format/json/JSONPathSpecFactoryTest.java | 5 +- .../org/archive/format/json/JSONViewTest.java | 9 +- .../format/json/SimpleJSONPathSpecTest.java | 5 +- .../format/text/html/CDATALexerTest.java | 14 +- .../archive/io/ArchiveReaderFactoryTest.java | 27 +- .../io/BufferedSeekInputStreamTest.java | 9 +- .../archive/io/HeaderedArchiveRecordTest.java | 22 +- .../archive/io/RecordingInputStreamTest.java | 39 +- .../archive/io/RecordingOutputStreamTest.java | 74 ++- .../archive/io/ReplayCharSequenceTest.java | 110 ++-- .../io/RepositionableInputStreamTest.java | 20 +- .../archive/io/arc/ARCReaderFactoryTest.java | 13 +- .../org/archive/io/arc/ARCWriterPoolTest.java | 41 +- .../org/archive/io/arc/ARCWriterTest.java | 121 ++-- .../io/warc/WARCReaderFactoryTest.java | 7 +- .../org/archive/io/warc/WARCWriterTest.java | 67 ++- .../org/archive/net/PublicSuffixesTest.java | 55 +- .../org/archive/resource/MetaDataTest.java | 21 +- .../archive/resource/arc/ARCResourceTest.java | 6 +- .../html/ExtractingParseObserverTest.java | 24 +- .../resource/html/HTMLMetaDataTest.java | 12 +- .../resource/warc/WARCResourceTest.java | 7 +- .../org/archive/uid/UUIDGeneratorTest.java | 7 +- .../url/AggressiveIAURLCanonicalizerTest.java | 9 +- .../url/BasicURLCanonicalizerTest.java | 39 +- .../java/org/archive/url/HandyURLTest.java | 13 +- .../archive/url/IAURLCanonicalizerTest.java | 13 +- .../url/OrdinaryIAURLCanonicalizerTest.java | 10 +- .../java/org/archive/url/URLParserTest.java | 11 +- .../archive/url/URLRegexTransformerTest.java | 45 +- .../org/archive/url/UsableURIFactoryTest.java | 564 +++++++++--------- .../java/org/archive/url/UsableURITest.java | 16 +- .../archive/url/WaybackURLKeyMakerTest.java | 7 +- .../org/archive/util/ArchiveUtilsTest.java | 231 ++++--- .../java/org/archive/util/ByteOpTest.java | 14 +- .../org/archive/util/CrossProductTest.java | 8 +- .../java/org/archive/util/FileUtilsTest.java | 69 ++- .../util/InterruptibleCharSequenceTest.java | 21 +- .../org/archive/util/MimetypeUtilsTest.java | 63 +- .../org/archive/util/PropertyUtilsTest.java | 11 +- .../util/StringFieldExtractorTest.java | 10 +- src/test/java/org/archive/util/TestUtils.java | 17 +- .../org/archive/util/anvl/ANVLRecordTest.java | 56 +- .../util/binsearch/SortedTextFileTest.java | 8 +- .../iterator/CachingStringFilterTest.java | 5 +- .../iterator/FilterStringIteratorTest.java | 25 +- .../iterator/SortedCompositeIteratorTest.java | 8 +- .../util/zip/GZIPMembersInputStreamTest.java | 157 ++--- 59 files changed, 1236 insertions(+), 1173 deletions(-) delete mode 100644 src/main/java/org/archive/util/TmpDirTestCase.java diff --git a/CHANGES.md b/CHANGES.md index 8a0a7d20..478238bf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +Unreleased +---------- + +#### Dependency upgrades + +- **junit**: 4.13.2 → 5.12.2 + 1.3.0 ----- diff --git a/pom.xml b/pom.xml index 74a4bbe6..c70a2cd7 100644 --- a/pom.xml +++ b/pom.xml @@ -52,9 +52,10 @@ - junit - junit - 4.13.2 + org.junit.jupiter + junit-jupiter + 5.12.2 + test diff --git a/src/main/java/org/archive/util/TmpDirTestCase.java b/src/main/java/org/archive/util/TmpDirTestCase.java deleted file mode 100644 index 09ec345b..00000000 --- a/src/main/java/org/archive/util/TmpDirTestCase.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.util; - -import java.io.File; -import java.io.IOException; - -import junit.framework.TestCase; - - -/** - * Base class for TestCases that want access to a tmp dir for the writing - * of files. - * - * @author stack - */ -public abstract class TmpDirTestCase extends TestCase -{ - /** - * Name of the system property that holds pointer to tmp directory into - * which we can safely write files. - */ - public static final String TEST_TMP_SYSTEM_PROPERTY_NAME = "testtmpdir"; - - /** - * Default test tmp. - */ - public static final String DEFAULT_TEST_TMP_DIR = File.separator + "tmp" + - File.separator + "heritrix-junit-tests"; - - /** - * Directory to write temporary files to. - */ - private File tmpDir = null; - - - public TmpDirTestCase() - { - super(); - } - - public TmpDirTestCase(String testName) - { - super(testName); - } - - /* - * @see TestCase#setUp() - */ - protected void setUp() throws Exception { - super.setUp(); - this.tmpDir = tmpDir(); - } - - /** - * @return Returns the tmpDir. - */ - public File getTmpDir() - { - return this.tmpDir; - } - - /** - * Delete any files left over from previous run. - * - * @param basename Base name of files we're to clean up. - */ - public void cleanUpOldFiles(String basename) { - cleanUpOldFiles(getTmpDir(), basename); - } - - /** - * Delete any files left over from previous run. - * - * @param prefix Base name of files we're to clean up. - * @param basedir Directory to start cleaning in. - */ - public void cleanUpOldFiles(File basedir, String prefix) { - File [] files = FileUtils.getFilesWithPrefix(basedir, prefix); - if (files != null) { - for (int i = 0; i < files.length; i++) { - org.apache.commons.io.FileUtils.deleteQuietly(files[i]); - } - } - } - - - public static File tmpDir() throws IOException { - String tmpDirStr = System.getProperty(TEST_TMP_SYSTEM_PROPERTY_NAME); - tmpDirStr = (tmpDirStr == null)? DEFAULT_TEST_TMP_DIR: tmpDirStr; - File tmpDir = new File(tmpDirStr); - FileUtils.ensureWriteableDirectory(tmpDir); - - if (!tmpDir.canWrite()) - { - throw new IOException(tmpDir.getAbsolutePath() + - " is unwriteable."); - } - - return tmpDir; - } -} diff --git a/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java index 14f8489d..a716df82 100644 --- a/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java +++ b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java @@ -1,28 +1,29 @@ package org.archive.extract; -import java.net.MalformedURLException; import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.URLEncoder; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; -public class RealCDXExtractorOutputTest extends TestCase { +public class RealCDXExtractorOutputTest { + + @Test public void testEscapeResolvedUrl() throws Exception { - String context ="http://www.uni-giessen.de/cms/studium/dateien/informationberatung/merkblattpdf"; - String spec = "http://fss.plone.uni-giessen.de/fß/studium/dateien/informationberatung/merkblattpdf/file/Mérkblatt zur Gestaltung von Nachteilsausgleichen.pdf?föo=bar#änchor"; - String escaped = RealCDXExtractorOutput.resolve(context, spec); - assertTrue(escaped.indexOf(" ") < 0); - URI parsed = new URI(escaped); - assertEquals("änchor", parsed.getFragment()); + String context = "http://www.uni-giessen.de/cms/studium/dateien/informationberatung/merkblattpdf"; + String spec = "http://fss.plone.uni-giessen.de/fß/studium/dateien/informationberatung/merkblattpdf/file/Mérkblatt zur Gestaltung von Nachteilsausgleichen.pdf?föo=bar#änchor"; + String escaped = RealCDXExtractorOutput.resolve(context, spec); + assertTrue(escaped.indexOf(" ") < 0); + URI parsed = new URI(escaped); + assertEquals("änchor", parsed.getFragment()); } + @Test public void testNoDoubleEscaping() throws Exception { - String spec = "https://www.google.com/search?q=java+escape+url+spaces&ie=utf-8&oe=utf-8"; - String resolved = RealCDXExtractorOutput.resolve(spec, spec); - assertTrue(spec.equals(resolved)); + String spec = "https://www.google.com/search?q=java+escape+url+spaces&ie=utf-8&oe=utf-8"; + String resolved = RealCDXExtractorOutput.resolve(spec, spec); + assertTrue(spec.equals(resolved)); } } diff --git a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java index 27d0fdad..7ade0ad5 100644 --- a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java +++ b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java @@ -3,15 +3,13 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.archive.format.dns.DNSParseException; -import org.archive.format.dns.DNSRecord; -import org.archive.format.dns.DNSResponse; -import org.archive.format.dns.DNSResponseParser; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.assertEquals; -public class DNSResponseParserTest extends TestCase { +public class DNSResponseParserTest { DNSResponseParser parser = new DNSResponseParser(); + @Test public void testParse() throws DNSParseException, IOException { verifyResults("20110328212258\nfarm6.static.flickr.a06.yahoodns.net.\t300\tIN\tA\t98.136.170.121\n", "20110328212258",new String[][] {{"farm6.static.flickr.a06.yahoodns.net.","300","IN","A","98.136.170.121"}}); diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java index 2eec46ec..6f218ebb 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java @@ -9,9 +9,6 @@ import org.archive.util.ByteOp; import org.archive.util.IAUtils; import org.archive.util.TestUtils; -import org.archive.format.gzip.GZIPFormatException; -import org.archive.format.gzip.GZIPMemberSeries; -import org.archive.format.gzip.GZIPSeriesMember; import org.archive.streamcontext.ByteArrayWrappedStream; import org.archive.streamcontext.SimpleStream; import org.archive.streamcontext.Stream; @@ -19,10 +16,13 @@ import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class GZIPMemberSeriesTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; +public class GZIPMemberSeriesTest { + + @Test public void testSingle() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -38,6 +38,7 @@ public void testSingle() throws IndexOutOfBoundsException, FileNotFoundException assertNull(s.getNextMember()); } + @Test public void testSingleEmpty() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("empty.gz"); @@ -59,6 +60,7 @@ public void testSingleEmpty() throws IndexOutOfBoundsException, FileNotFoundExce assertTrue(s.gotEOF()); } + @Test public void testDouble() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -81,14 +83,14 @@ public void testDouble() throws IndexOutOfBoundsException, FileNotFoundException assertNull(s.getNextMember()); } - + @Test public void testSingleCRCStrict() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); ByteArrayInputStream bais = new ByteArrayInputStream(abcd); Stream stream = new SimpleStream(bais); @@ -117,14 +119,15 @@ public void testSingleCRCStrict() throws IndexOutOfBoundsException, FileNotFound } assertNotNull(e); } - + + @Test public void testSingleCRCLAX() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); ByteArrayInputStream bais = new ByteArrayInputStream(abcd); Stream stream = new SimpleStream(bais); @@ -154,7 +157,8 @@ public void testSingleCRCLAX() throws IndexOutOfBoundsException, FileNotFoundExc assertNull(e); assertNull(s.getNextMember()); } - + + @Test public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -162,7 +166,7 @@ public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundEx byte abcdorig[] = ByteOp.copy(abcd); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); byte both[] = Bytes.concat(abcd,abcdorig); @@ -195,7 +199,8 @@ public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundEx assertNotNull(m); TestUtils.assertStreamEquals(m,"abcd".getBytes(IAUtils.UTF8)); } - + + @Test public void testSingleDeflateError() throws IndexOutOfBoundsException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -240,7 +245,7 @@ public void testSingleDeflateError() throws IndexOutOfBoundsException, IOExcepti assertNull(m); } - + @Test public void testDoubleDeflateError() throws IndexOutOfBoundsException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -290,7 +295,8 @@ public void testDoubleDeflateError() throws IndexOutOfBoundsException, IOExcepti assertFalse(s.gotIOError()); } - + + @Test public void testDoubleBiggerDeflateErrOnFirst() throws IOException { String resource = "double-single-inflate-error.gz"; InputStream is = getClass().getResourceAsStream(resource); @@ -333,7 +339,8 @@ public void testDoubleBiggerDeflateErrOnFirst() throws IOException { } - + + @Test public void testAutoSkip() throws IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); @@ -375,6 +382,7 @@ public void testAutoSkip() throws IOException { assertTrue(s.gotEOF()); } + @Test public void testWgetProblem() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("IAH-urls-wget.warc.gz"); new GZIPDecoder().parseHeader(is); diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java index 483d2baf..45bc18e4 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java @@ -7,10 +7,11 @@ import org.archive.util.IAUtils; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class GZIPMemberWriterTest extends TestCase { +public class GZIPMemberWriterTest { + @Test public void testWrite() throws IOException { File outFile = File.createTempFile("tmp", ".gz"); GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(outFile)); diff --git a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java index cfadbd79..25a5eaa7 100644 --- a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java +++ b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java @@ -10,19 +10,21 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; import org.archive.streamcontext.SimpleStream; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class ZipNumWriterTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class ZipNumWriterTest { + + @Test public void testAddRecord() throws IOException { - Charset UTF8 = Charset.forName("UTF-8"); - File main = File.createTempFile("test-znw",".main"); + File main = File.createTempFile("test-znw",".main"); File summ = File.createTempFile("test-znw",".summ"); main.deleteOnExit(); summ.deleteOnExit(); @@ -31,11 +33,11 @@ public void testAddRecord() throws IOException { ZipNumWriter znw = new ZipNumWriter(new FileOutputStream(main,false), new FileOutputStream(summ,false), limit); for(int i = 0; i < 1000; i++) { - znw.addRecord(String.format("%06d\n",i).getBytes(UTF8)); + znw.addRecord(String.format("%06d\n",i).getBytes(StandardCharsets.UTF_8)); } znw.close(); InputStreamReader isr = - new InputStreamReader(new FileInputStream(summ),UTF8); + new InputStreamReader(new FileInputStream(summ), StandardCharsets.UTF_8); BufferedReader br = new BufferedReader(isr); String line = null; int count = 0; diff --git a/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java b/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java index 50df9dde..9a5d69af 100644 --- a/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java +++ b/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java @@ -3,16 +3,16 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.archive.format.http.HttpConstants; -import org.archive.format.http.HttpParseException; -import org.archive.format.http.HttpRequestMessage; -import org.archive.format.http.HttpRequestMessageParser; import org.archive.util.IAUtils; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HttpRequestMessageParserTest extends TestCase implements HttpConstants { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class HttpRequestMessageParserTest implements HttpConstants { HttpRequestMessageParser parser = new HttpRequestMessageParser(); + + @Test public void testParse() throws IOException { assertParse("GET / HTTP/1.0\r\n", METHOD_GET, "/", VERSION_0); assertParse("GET / HTTP/1.1\r\n", METHOD_GET, "/", VERSION_1); diff --git a/src/test/java/org/archive/format/http/HttpResponseParserTest.java b/src/test/java/org/archive/format/http/HttpResponseParserTest.java index ea076a69..631d67c7 100644 --- a/src/test/java/org/archive/format/http/HttpResponseParserTest.java +++ b/src/test/java/org/archive/format/http/HttpResponseParserTest.java @@ -5,16 +5,14 @@ import org.archive.util.IAUtils; import org.archive.util.TestUtils; -import org.archive.format.http.HttpHeader; -import org.archive.format.http.HttpHeaders; -import org.archive.format.http.HttpParseException; -import org.archive.format.http.HttpResponse; -import org.archive.format.http.HttpResponseParser; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HttpResponseParserTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; +public class HttpResponseParserTest { + + @Test public void testParse() throws IOException { HttpResponseParser parser = new HttpResponseParser(); @@ -38,6 +36,7 @@ public void testParse() throws IOException { } + @Test public void testParseWithLf() throws IOException { HttpResponseParser parser = new HttpResponseParser(); @@ -57,6 +56,7 @@ public void testParseWithLf() throws IOException { } + @Test public void testParseEmptyHeaderField() throws IOException { HttpResponseParser parser = new HttpResponseParser(); diff --git a/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java b/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java index 57c21965..ef8c2fa0 100644 --- a/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java +++ b/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java @@ -6,11 +6,12 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CompoundORJSONPathSpecTest extends TestCase { +public class CompoundORJSONPathSpecTest { String json1S = "{\"a\":\"A\"}"; String json2S = "{\"b\":\"B\"}"; + @Test public void testExtract() throws JSONException { JSONObject json1 = new JSONObject(json1S); JSONObject json2 = new JSONObject(json2S); diff --git a/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java b/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java index ab999dca..257cb112 100644 --- a/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java +++ b/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java @@ -4,9 +4,9 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class JSONPathSpecFactoryTest extends TestCase { +public class JSONPathSpecFactoryTest { String json1S = "{\"a\":\"A\"}"; String json2S = "{\"b\":\"B\"}"; @@ -14,6 +14,7 @@ public class JSONPathSpecFactoryTest extends TestCase { String json4S = "{\"b\":[{\"x\":\"x1\", \"y\":\"y1\"},{\"x\":\"x2\", \"y\":\"y2\"}]}"; + @Test public void testGet() throws JSONException { JSONObject json1 = new JSONObject(json1S); JSONObject json2 = new JSONObject(json2S); diff --git a/src/test/java/org/archive/format/json/JSONViewTest.java b/src/test/java/org/archive/format/json/JSONViewTest.java index 20bd4fe6..aabbe7df 100644 --- a/src/test/java/org/archive/format/json/JSONViewTest.java +++ b/src/test/java/org/archive/format/json/JSONViewTest.java @@ -4,14 +4,15 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class JSONViewTest extends TestCase { +public class JSONViewTest { public int getInt(byte b[]) { return b[0] & 0xff; } - + + @Test public void testBytes() throws JSONException { JSONObject o = new JSONObject(); o.append("name1", "val\\rue1"); @@ -28,6 +29,8 @@ public void testBytes() throws JSONException { System.out.format("I(%d) gi(%d)\n",i,gi); } } + + @Test public void testApply() throws JSONException { String json1S = "{\"url\":\"a\",\"link\":[{\"zz\":\"1\",\"qq\":\"qa\"},{\"zz2\":\"2\",\"qq\":\"qb\"},{\"zz\":\"3\",\"qq\":\"qc\"},{\"zz\":\"4\"}]}"; JSONObject json1 = new JSONObject(json1S); diff --git a/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java b/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java index a703b49a..640a5a80 100644 --- a/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java +++ b/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java @@ -4,15 +4,16 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class SimpleJSONPathSpecTest extends TestCase { +public class SimpleJSONPathSpecTest { String json1 = "{\"a\": { \"b\": \"Foo\" }}"; String json2 = "{\"a\": { \"b\": [{\"a\":\"1\"},{\"a\":\"2\"}] }}"; String json3 = "{\"a\": { \"b\": {\"A\":\"11\",\"B\":\"22\"} }}"; String json4 = "{\"a\": { \"b\": [{\"A\":\"11\",\"B\":\"22\"},{\"A\":\"33\",\"B\":\"44\"}] }}"; + @Test public void testExtract() throws JSONException { JSONObject json = new JSONObject(json1); JSONPathSpec spec = new SimpleJSONPathSpec("a.b"); diff --git a/src/test/java/org/archive/format/text/html/CDATALexerTest.java b/src/test/java/org/archive/format/text/html/CDATALexerTest.java index 481a3eda..856576ba 100644 --- a/src/test/java/org/archive/format/text/html/CDATALexerTest.java +++ b/src/test/java/org/archive/format/text/html/CDATALexerTest.java @@ -1,17 +1,16 @@ package org.archive.format.text.html; -import org.archive.format.text.html.CDATALexer; -import org.archive.format.text.html.NodeUtils; import org.htmlparser.Node; import org.htmlparser.lexer.Page; -//import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; import org.htmlparser.nodes.TextNode; import org.htmlparser.util.ParserException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CDATALexerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +public class CDATALexerTest { CDATALexer l; Node n; private CDATALexer makeLexer(String html) { @@ -19,7 +18,8 @@ private CDATALexer makeLexer(String html) { t.setPage(new Page(html)); return t; } - + + @Test public void testNextNode() throws ParserException { l = makeLexer("blem"); n = l.nextNode(); @@ -35,6 +35,7 @@ public void testNextNode() throws ParserException { assertNull(l.nextNode()); } + @Test public void testInJS() throws ParserException { l = makeLexer(""); assertFalse(l.inCSS()); @@ -54,6 +55,7 @@ public void testInJS() throws ParserException { assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT")); } + @Test public void testInCSS() throws ParserException { l = makeLexer(""); assertFalse(l.inCSS()); diff --git a/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java index 2313868c..f7ad75d2 100644 --- a/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java +++ b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java @@ -21,29 +21,34 @@ import java.io.File; import java.io.IOException; -import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import org.apache.commons.lang.StringUtils; -import org.archive.io.ArchiveRecord; import org.archive.io.arc.ARCWriterTest; -import org.archive.util.TmpDirTestCase; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ArchiveReaderFactoryTest { + @TempDir + File tempDir; -public class ArchiveReaderFactoryTest extends TmpDirTestCase { /** * Test local file as URL * @throws IOException */ + @Test public void testGetFileURL() throws IOException { - File arc = ARCWriterTest.createARCFile(getTmpDir(), true); + File arc = ARCWriterTest.createARCFile(tempDir, true); ArchiveReader reader = null; try { reader = ArchiveReaderFactory. get(new URL("file:////" + arc.getAbsolutePath())); for (Iterator i = reader.iterator(); i.hasNext();) { ArchiveRecord r = (ArchiveRecord)i.next(); - assertTrue("mime unread",StringUtils.isNotBlank(r.getHeader().getMimetype())); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); } } finally { if (reader != null) { @@ -56,14 +61,15 @@ public void testGetFileURL() throws IOException { * Test local file as File * @throws IOException */ + @Test public void testGetFile() throws IOException { - File arc = ARCWriterTest.createARCFile(getTmpDir(), true); + File arc = ARCWriterTest.createARCFile(tempDir, true); ArchiveReader reader = null; try { reader = ArchiveReaderFactory.get(arc.getAbsoluteFile()); for (Iterator i = reader.iterator(); i.hasNext();) { ArchiveRecord r = (ArchiveRecord)i.next(); - assertTrue("mime unread",StringUtils.isNotBlank(r.getHeader().getMimetype())); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); } } finally { if (reader != null) { @@ -76,14 +82,15 @@ public void testGetFile() throws IOException { * Test local file as String path * @throws IOException */ + @Test public void testGetPath() throws IOException { - File arc = ARCWriterTest.createARCFile(getTmpDir(), true); + File arc = ARCWriterTest.createARCFile(tempDir, true); ArchiveReader reader = null; try { reader = ArchiveReaderFactory.get(arc.getAbsoluteFile().getAbsolutePath()); for (Iterator i = reader.iterator(); i.hasNext();) { ArchiveRecord r = (ArchiveRecord)i.next(); - assertTrue("mime unread",StringUtils.isNotBlank(r.getHeader().getMimetype())); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); } } finally { if (reader != null) { diff --git a/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java b/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java index 270e45e0..f7e8e0b2 100644 --- a/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java +++ b/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java @@ -18,9 +18,11 @@ */ package org.archive.io; +import org.junit.jupiter.api.Test; + import java.util.Random; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -29,11 +31,12 @@ * * @author pjack */ -public class BufferedSeekInputStreamTest extends TestCase { +public class BufferedSeekInputStreamTest { private static byte[] TEST_DATA = makeTestData(); - + + @Test public void testPosition() throws Exception { Random random = new Random(); ArraySeekInputStream asis = new ArraySeekInputStream(TEST_DATA); diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java index 9f7e2a15..7988cb2b 100644 --- a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java +++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java @@ -26,13 +26,15 @@ import java.util.Map; import java.util.Set; -import junit.framework.TestCase; - import org.apache.commons.httpclient.Header; import org.archive.io.arc.ARCRecord; import org.archive.io.warc.WARCRecord; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; -public class HeaderedArchiveRecordTest extends TestCase { +public class HeaderedArchiveRecordTest { private static final String HTTPHEADER = "HTTP/1.1 200 OK\r\n" + "Last-Modified: Sun, 28 Aug 2005 14:10:55 GMT\r\n" + "Content-Length: 108\r\n" + "Connection: close\r\n" @@ -41,6 +43,7 @@ public class HeaderedArchiveRecordTest extends TestCase { + " Neue Seite 1\r\n" + " \r\n" + " \r\n" + " \r\n" + ""; + @Test public void testParseHttpHeadersInWARC() throws IOException { final String url = "http://foo.maths.uq.edu.au/index.html"; // final String warcHeader = "WARC/0.10 000000000486 response " + @@ -76,8 +79,8 @@ public void testParseHttpHeadersInWARC() throws IOException { String bodyRead = new String(b); assertEquals(BODY, bodyRead); assertHeaderCorrectlyParsed(har.getContentHeaders()); - assertEquals("failed to retrieve Url from metadata", har.getHeader() - .getUrl(), url); + assertEquals(har.getHeader().getUrl(), url, + "failed to retrieve Url from metadata"); } public void testParseHttpHeadersInARC() throws IOException { @@ -165,6 +168,7 @@ public String getVersion() { assertHeaderCorrectlyParsed(har.getContentHeaders()); } + @Test public void testEasierParseHttpHeadersInARC() throws IOException { final String url = "http://www.archive.org/index.htm"; final String arcHeader = url @@ -181,14 +185,13 @@ public void testEasierParseHttpHeadersInARC() throws IOException { String bodyRead = new String(b); assertEquals(BODY, bodyRead); assertHeaderCorrectlyParsed(har.getContentHeaders()); - assertEquals("failed to retrieve Url from metadata", har.getHeader() - .getUrl(), url); + assertEquals(har.getHeader().getUrl(), url, "failed to retrieve Url from metadata"); } private void assertHeaderCorrectlyParsed(Header[] headers) { final List orgHeaders = Arrays.asList(HTTPHEADER.split("\r\n")); - assertEquals("not all HTTP header entries have been retrieved", - orgHeaders.size(), headers.length + 1); + assertEquals(orgHeaders.size(), headers.length + 1, + "not all HTTP header entries have been retrieved"); for (Header header : headers) { assertTrue(orgHeaders.contains(header.getName() + ": " @@ -196,6 +199,7 @@ private void assertHeaderCorrectlyParsed(Header[] headers) { } } + @Test public void testNoheaderWARC() throws IOException { String b = "hello world"; String c = "WARC/0.12\r\nContent-Type: text/plain\r\n" diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java index 20a8b8b3..9ddc7457 100644 --- a/src/test/java/org/archive/io/RecordingInputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -25,7 +25,11 @@ import java.io.PipedInputStream; import java.io.PipedOutputStream; -import org.archive.util.TmpDirTestCase; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -33,17 +37,10 @@ * * @author gojomo */ -public class RecordingInputStreamTest extends TmpDirTestCase -{ - +public class RecordingInputStreamTest { + @TempDir + File tempDir; - /* - * @see TmpDirTestCase#setUp() - */ - protected void setUp() throws Exception - { - super.setUp(); - } /** * Test readFullyOrUntil soft (no exception) and hard (exception) @@ -53,10 +50,11 @@ protected void setUp() throws Exception * @throws InterruptedException * @throws RecorderTimeoutException */ + @Test public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, InterruptedException { RecordingInputStream ris = new RecordingInputStream(16384, (new File( - getTmpDir(), "testReadFullyOrUntil").getAbsolutePath())); + tempDir, "testReadFullyOrUntil").getAbsolutePath())); ByteArrayInputStream bais = new ByteArrayInputStream( "abcdefghijklmnopqrstuvwxyz".getBytes()); // test soft max @@ -67,7 +65,7 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, ReplayInputStream res = ris.getReplayInputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); res.readFullyTo(baos); - assertEquals("soft max cutoff","abcdefg",new String(baos.toByteArray())); + assertEquals("abcdefg",new String(baos.toByteArray()),"soft max cutoff"); // test hard max bais.reset(); baos.reset(); @@ -79,25 +77,26 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, } catch (RecorderLengthExceededException ex) { exceptionThrown = true; } - assertTrue("hard max exception",exceptionThrown); + assertTrue(exceptionThrown,"hard max exception"); ris.close(); res = ris.getReplayInputStream(); res.readFullyTo(baos); - assertEquals("hard max cutoff","abcdefghijk", - new String(baos.toByteArray())); + assertEquals("abcdefghijk",new String(baos.toByteArray()), + "hard max cutoff"); // test timeout PipedInputStream pin = new PipedInputStream(); PipedOutputStream pout = new PipedOutputStream(pin); ris.open(pin); exceptionThrown = false; trickle("abcdefghijklmnopqrstuvwxyz".getBytes(),pout); + int timeout = 200; try { - ris.setLimits(0,5000,0); + ris.setLimits(0, timeout,0); ris.readFullyOrUntil(0); } catch (RecorderTimeoutException ex) { exceptionThrown = true; } - assertTrue("timeout exception",exceptionThrown); + assertTrue(exceptionThrown,"timeout exception"); ris.close(); // test rate limit bais = new ByteArrayInputStream(new byte[1024*2*5]); @@ -107,7 +106,7 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, ris.readFullyOrUntil(0); long endTime = System.currentTimeMillis(); long duration = endTime - startTime; - assertTrue("read too fast: "+duration,duration>=5000); + assertTrue(duration>= timeout,"read too fast: "+duration); ris.close(); } @@ -116,7 +115,7 @@ protected void trickle(final byte[] bytes, final PipedOutputStream pout) { public void run() { try { for (int i = 0; i < bytes.length; i++) { - Thread.sleep(1000); + Thread.sleep(200); pout.write(bytes[i]); } pout.close(); diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java index f697ff31..c94f8245 100644 --- a/src/test/java/org/archive/io/RecordingOutputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -25,7 +25,11 @@ import java.io.IOException; import org.archive.util.Base32; -import org.archive.util.TmpDirTestCase; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; /** @@ -33,8 +37,7 @@ * * @author stack */ -public class RecordingOutputStreamTest extends TmpDirTestCase -{ +public class RecordingOutputStreamTest { /** * Size of buffer used in tests. */ @@ -45,14 +48,9 @@ public class RecordingOutputStreamTest extends TmpDirTestCase */ private static final int WRITE_TOTAL = 10; + @TempDir + File tempDir; - /* - * @see TmpDirTestCase#setUp() - */ - protected void setUp() throws Exception - { - super.setUp(); - } /** * Test reusing instance of RecordingOutputStream. @@ -60,13 +58,13 @@ protected void setUp() throws Exception * @throws IOException Failed open of backing file or opening of * input streams verifying recording. */ + @Test public void testReuse() throws IOException { final String BASENAME = "testReuse"; - cleanUpOldFiles(BASENAME); RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, - (new File(getTmpDir(), BASENAME + "Bkg.txt")).getAbsolutePath()); + (new File(tempDir, BASENAME + "Bkg.txt")).getAbsolutePath()); for (int i = 0; i < 3; i++) { reuse(BASENAME, ros, i); @@ -92,13 +90,13 @@ private void reuse(String baseName, RecordingOutputStream ros, int index) * @throws IOException Failed open of backing file or opening of * input streams verifying recording. */ + @Test public void testWriteint() throws IOException { final String BASENAME = "testWriteint"; - cleanUpOldFiles(BASENAME); RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, - (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); verifyRecording(ros, f, WRITE_TOTAL); // Do again to test that I can get a new ReplayInputStream on same @@ -114,13 +112,13 @@ public void testWriteint() * @throws IOException Failed open of backing file or opening of * input streams verifying recording. */ + @Test public void testWritebytearray() throws IOException { final String BASENAME = "testWritebytearray"; - cleanUpOldFiles(BASENAME); RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, - (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); verifyRecording(ros, f, WRITE_TOTAL); // Do again to test that I can get a new ReplayInputStream on same @@ -132,12 +130,12 @@ public void testWritebytearray() * Test mark and reset. * @throws IOException */ + @Test public void testMarkReset() throws IOException { final String BASENAME = "testMarkReset"; - cleanUpOldFiles(BASENAME); RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, - (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); verifyRecording(ros, f, WRITE_TOTAL); ReplayInputStream ris = ros.getReplayInputStream(); @@ -148,15 +146,15 @@ public void testMarkReset() throws IOException ris.read(); // Reset it. It should be back at zero. ris.reset(); - assertEquals("Reset to zero", ris.read(), 0); - assertEquals("Reset to zero char 1", ris.read(), 1); - assertEquals("Reset to zero char 2", ris.read(), 2); + assertEquals(0, ris.read(), "Reset to zero"); + assertEquals(1, ris.read(), "Reset to zero char 1"); + assertEquals(2, ris.read(), "Reset to zero char 2"); // Mark stream. Here. Next character should be '3'. ris.mark(10 /* Arbitrary value*/); ris.read(); ris.read(); ris.reset(); - assertEquals("Reset to zero char 3", ris.read(), 3); + assertEquals(3, ris.read(), "Reset to zero char 3"); } /** @@ -179,7 +177,7 @@ private File writeIntRecordedFile(RecordingOutputStream ros, String basename, int size) throws IOException { - File f = new File(getTmpDir(), basename + ".txt"); + File f = new File(tempDir, basename + ".txt"); FileOutputStream fos = new FileOutputStream(f); ros.open(fos); for (int i = 0; i < WRITE_TOTAL; i++) @@ -188,8 +186,8 @@ private File writeIntRecordedFile(RecordingOutputStream ros, } ros.close(); fos.close(); - assertEquals("Content-Length test", size, - ros.getResponseContentLength()); + assertEquals(size, ros.getResponseContentLength(), + "Content-Length test"); return f; } @@ -213,7 +211,7 @@ private File writeByteRecordedFile(RecordingOutputStream ros, String basename, int size) throws IOException { - File f = new File(getTmpDir(), basename + ".txt"); + File f = new File(tempDir, basename + ".txt"); FileOutputStream fos = new FileOutputStream(f); ros.open(fos); byte [] b = new byte[size]; @@ -224,8 +222,8 @@ private File writeByteRecordedFile(RecordingOutputStream ros, ros.write(b); ros.close(); fos.close(); - assertEquals("Content-Length test", size, - ros.getResponseContentLength()); + assertEquals(size, ros.getResponseContentLength(), + "Content-Length test"); return f; } @@ -243,28 +241,28 @@ private File writeByteRecordedFile(RecordingOutputStream ros, private void verifyRecording(RecordingOutputStream ros, File f, int size) throws IOException { - assertEquals("Recorded file size.", size, f.length()); + assertEquals(size, f.length(), "Recorded file size."); FileInputStream fis = new FileInputStream(f); - assertNotNull("FileInputStream not null", fis); + assertNotNull(fis, "FileInputStream not null"); ReplayInputStream ris = ros.getReplayInputStream(); - assertNotNull("ReplayInputStream not null", ris); + assertNotNull(ris, "ReplayInputStream not null"); for (int i = 0; i < size; i++) { - assertEquals("ReplayInputStream content verification", i, - ris.read()); - assertEquals("Recorded file content verification", i, - fis.read()); + assertEquals(i, ris.read(), + "ReplayInputStream content verification"); + assertEquals(i, fis.read(), + "Recorded file content verification"); } - assertEquals("ReplayInputStream at EOF", -1, ris.read()); + assertEquals(-1, ris.read(), "ReplayInputStream at EOF"); fis.close(); ris.close(); } + @Test public void testMessageBodyBegin() throws IOException { final String BASENAME = "testMessageBodyBegin"; - cleanUpOldFiles(BASENAME); RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, - (new File(getTmpDir(), BASENAME + "Backing.txt")).getAbsolutePath()); + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); ros.setSha1Digest(); ros.open(new ByteArrayOutputStream()); diff --git a/src/test/java/org/archive/io/ReplayCharSequenceTest.java b/src/test/java/org/archive/io/ReplayCharSequenceTest.java index 9208594a..3234259c 100644 --- a/src/test/java/org/archive/io/ReplayCharSequenceTest.java +++ b/src/test/java/org/archive/io/ReplayCharSequenceTest.java @@ -19,17 +19,24 @@ package org.archive.io; +import java.io.File; import java.io.IOException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.NumberFormat; import java.util.Date; import java.util.Random; import java.util.logging.Logger; import org.archive.util.FileUtils; -import org.archive.util.TmpDirTestCase; import com.google.common.base.Charsets; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.*; /** * Test ReplayCharSequences. @@ -37,8 +44,7 @@ * @author stack, gojomo * @version $Revision$, $Date$ */ -public class ReplayCharSequenceTest extends TmpDirTestCase -{ +public class ReplayCharSequenceTest { /** * Logger. */ @@ -56,16 +62,17 @@ public class ReplayCharSequenceTest extends TmpDirTestCase */ private byte [] regularBuffer = null; - /* - * @see TestCase#setUp() - */ + @TempDir + File tempDir; + + @BeforeEach protected void setUp() throws Exception { - super.setUp(); this.regularBuffer = fillBufferWithRegularContent(new byte [BUFFER_SIZE]); } - + + @Test public void testShiftjis() throws IOException { // Here's the bytes for the JIS encoding of the Japanese form of Nihongo @@ -86,19 +93,18 @@ public void testShiftjis() throws IOException { // Now check that start of the rcs comes back in as nihongo string. String rcsStr = rcs.subSequence(0, nihongo.length()).toString(); - assertTrue("Nihongo " + nihongo + " does not equal converted string" + - " from rcs " + rcsStr, - nihongo.equals(rcsStr)); + assertEquals(nihongo, rcsStr, "Nihongo " + nihongo + " does not equal converted string" + + " from rcs " + rcsStr); // And assert next string is also properly nihongo. if (rcs.length() >= (nihongo.length() * 2)) { rcsStr = rcs.subSequence(nihongo.length(), nihongo.length() + nihongo.length()).toString(); - assertTrue("Nihongo " + nihongo + " does not equal converted " + - " string from rcs (2nd time)" + rcsStr, - nihongo.equals(rcsStr)); + assertEquals(nihongo, rcsStr, "Nihongo " + nihongo + " does not equal converted " + + " string from rcs (2nd time)" + rcsStr); } } + @Test public void testGetReplayCharSequenceByteZeroOffset() throws IOException { RecordingOutputStream ros = writeTestStream( @@ -120,7 +126,7 @@ private ReplayCharSequence getReplayCharSequence(RecordingOutputStream ros, Char ros.getBufferLength()/2, ros.backingFilename, charset); } - + @Test public void testGetReplayCharSequenceMultiByteZeroOffset() throws IOException { @@ -133,7 +139,8 @@ public void testGetReplayCharSequenceMultiByteZeroOffset() accessingCharacters(rcs); } } - + + @Test public void testReplayCharSequenceByteToString() throws IOException { String fileContent = "Some file content"; byte [] buffer = fileContent.getBytes(); @@ -142,7 +149,7 @@ public void testReplayCharSequenceByteToString() throws IOException { "testReplayCharSequenceByteToString.txt",0); ReplayCharSequence rcs = getReplayCharSequence(ros); String result = rcs.toString(); - assertEquals("Strings don't match",result,fileContent); + assertEquals(fileContent, result,"Strings don't match"); } private String toHexString(String str) @@ -160,7 +167,8 @@ private String toHexString(String str) else return "null"; } - + + @Test public void testSingleByteEncodings() throws IOException { byte[] bytes = { (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, @@ -175,7 +183,7 @@ public void testSingleByteEncodings() throws IOException { String result = rcs.toString(); logger.fine("latin1[0] " + toHexString(latin1String)); logger.fine("latin1[1] " + toHexString(result)); - assertEquals("latin1 strings don't match", result, latin1String); + assertEquals(result, latin1String, "latin1 strings don't match"); String w1252String = new String(bytes, "windows-1252"); ros = writeTestStream( @@ -184,18 +192,19 @@ public void testSingleByteEncodings() throws IOException { result = rcs.toString(); logger.fine("windows-1252[0] " + toHexString(w1252String)); logger.fine("windows-1252[1] " + toHexString(result)); - assertEquals("windows-1252 strings don't match", result, w1252String); + assertEquals(result, w1252String, "windows-1252 strings don't match"); - String asciiString = new String(bytes, "ascii"); + String asciiString = new String(bytes, StandardCharsets.US_ASCII); ros = writeTestStream( bytes, 1, "testSingleByteEncodings-ascii.txt", 0); - rcs = getReplayCharSequence(ros,Charset.forName("ascii")); + rcs = getReplayCharSequence(ros, StandardCharsets.US_ASCII); result = rcs.toString(); logger.fine("ascii[0] " + toHexString(asciiString)); logger.fine("ascii[1] " + toHexString(result)); - assertEquals("ascii strings don't match", result, asciiString); + assertEquals(result, asciiString, "ascii strings don't match"); } - + + @Test public void testReplayCharSequenceByteToStringOverflow() throws IOException { String fileContent = "Some file content. "; // ascii byte [] buffer = fileContent.getBytes(); @@ -212,15 +221,16 @@ public void testReplayCharSequenceByteToStringOverflow() throws IOException { ReplayCharSequence rcs1252 = getReplayCharSequence(ros,Charset.forName("windows-1252")); String result = rcsUtf8.toString(); - assertEquals("Strings don't match", expectedContent, result); + assertEquals(expectedContent, result, "Strings don't match"); result = rcs1252.toString(); - assertEquals("Strings don't match", expectedContent, result); + assertEquals(expectedContent, result, "Strings don't match"); } - + + @Test public void testReplayCharSequenceByteToStringMulti() throws IOException { String fileContent = "Some file content"; - byte [] buffer = fileContent.getBytes("UTF-8"); + byte [] buffer = fileContent.getBytes(StandardCharsets.UTF_8); final int MULTIPLICAND = 10; StringBuilder sb = new StringBuilder(MULTIPLICAND * fileContent.length()); @@ -232,15 +242,17 @@ public void testReplayCharSequenceByteToStringMulti() throws IOException { buffer,1, "testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1); for (int i = 0; i < 3; i++) { - ReplayCharSequence rcs = getReplayCharSequence(ros,Charsets.UTF_8); + ReplayCharSequence rcs = getReplayCharSequence(ros,StandardCharsets.UTF_8); String result = rcs.toString(); - assertEquals("Strings don't match", result, expectedResult); + assertEquals(result, expectedResult, "Strings don't match"); rcs.close(); System.gc(); System.runFinalization(); } } - + + @Test + @Disabled public void xestHugeReplayCharSequence() throws IOException { String fileContent = "01234567890123456789"; String characterEncoding = "ascii"; @@ -255,14 +267,13 @@ public void xestHugeReplayCharSequence() throws IOException { ReplayCharSequence rcs = getReplayCharSequence(ros,Charset.forName(characterEncoding)); if (reps * fileContent.length() > (long) Integer.MAX_VALUE) { - assertTrue("ReplayCharSequence has wrong length (length()=" - + rcs.length() + ") (should be " + Integer.MAX_VALUE + ")", - rcs.length() == Integer.MAX_VALUE); + assertEquals(Integer.MAX_VALUE, rcs.length(), "ReplayCharSequence has wrong length (length()=" + + rcs.length() + ") (should be " + Integer.MAX_VALUE + ")"); } else { - assertEquals("ReplayCharSequence has wrong length (length()=" + assertEquals(rcs.length(), reps * (long) fileContent.length(), + "ReplayCharSequence has wrong length (length()=" + rcs.length() + ") (should be " - + (reps * fileContent.length()) + ")", (long) rcs.length(), - reps * (long) fileContent.length()); + + (reps * fileContent.length()) + ")"); } // boundary cases or something @@ -270,10 +281,9 @@ public void xestHugeReplayCharSequence() throws IOException { rcs.length() - 1, rcs.length() / 4 }) { // logger.info("testing char at index=" + // NumberFormat.getInstance().format(index)); - assertEquals("Characters don't match (index=" - + NumberFormat.getInstance().format(index) + ")", - fileContent.charAt(index % fileContent.length()), rcs - .charAt(index)); + assertEquals(fileContent.charAt(index % fileContent.length()), + rcs.charAt(index), "Characters don't match (index=" + + NumberFormat.getInstance().format(index) + ")"); } // check that out of bounds indices throw exception @@ -295,10 +305,9 @@ public void xestHugeReplayCharSequence() throws IOException { int index = rand.nextInt(rcs.length()); // logger.info(i + ". testing char at index=" + // NumberFormat.getInstance().format(index)); - assertEquals("Characters don't match (index=" - + NumberFormat.getInstance().format(index) + ")", - fileContent.charAt(index % fileContent.length()), rcs - .charAt(index)); + assertEquals(fileContent.charAt(index % fileContent.length()), + rcs.charAt(index), "Characters don't match (index=" + + NumberFormat.getInstance().format(index) + ")"); } } @@ -338,8 +347,8 @@ private void accessingCharacters(CharSequence rcs) { */ private void checkCharacter(CharSequence rcs, int i) { int c = rcs.charAt(i); - assertTrue("Character " + Integer.toString(c) + " at offset " + i + - " unexpected.", (c % SEQUENCE_LENGTH) == (i % SEQUENCE_LENGTH)); + assertEquals((c % SEQUENCE_LENGTH), (i % SEQUENCE_LENGTH), "Character " + Integer.toString(c) + " at offset " + i + + " unexpected."); } /** @@ -349,7 +358,7 @@ private void checkCharacter(CharSequence rcs, int i) { */ private RecordingOutputStream writeTestStream(byte[] content, int memReps, String baseName, long fileReps) throws IOException { - String backingFilename = FileUtils.maybeRelative(getTmpDir(),baseName).getAbsolutePath(); + String backingFilename = FileUtils.maybeRelative(tempDir,baseName).getAbsolutePath(); RecordingOutputStream ros = new RecordingOutputStream( content.length * memReps, backingFilename); @@ -383,9 +392,4 @@ private RecordingOutputStream writeTestStream(byte[] content, } return buffer; } - - public void testCheckParameters() - { - // TODO. - } } diff --git a/src/test/java/org/archive/io/RepositionableInputStreamTest.java b/src/test/java/org/archive/io/RepositionableInputStreamTest.java index 1c7cc74c..228c9042 100644 --- a/src/test/java/org/archive/io/RepositionableInputStreamTest.java +++ b/src/test/java/org/archive/io/RepositionableInputStreamTest.java @@ -23,23 +23,29 @@ import java.io.FileOutputStream; import java.io.PrintWriter; -import org.archive.util.TmpDirTestCase; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; -public class RepositionableInputStreamTest extends TmpDirTestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class RepositionableInputStreamTest { private File testFile; private static final String LINE = "0123456789abcdefghijklmnopqrstuv"; + @TempDir + File tempDir; + + @BeforeEach protected void setUp() throws Exception { - super.setUp(); - this.testFile = new File(getTmpDir(), this.getClass().getName()); + this.testFile = new File(tempDir, this.getClass().getName()); PrintWriter pw = new PrintWriter(new FileOutputStream(testFile)); for (int i = 0; i < 100; i++) { pw.print(LINE); } pw.close(); } - protected void tearDown() throws Exception { - super.tearDown(); - } + + @Test public void testname() throws Exception { // Make buffer awkward size so we run into buffers spanning issues. RepositionableInputStream ris = diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java index 0721f795..25d5218e 100644 --- a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -2,14 +2,16 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.InputStream; import java.io.RandomAccessFile; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; /** * @@ -18,7 +20,7 @@ * @author csr@statsbiblioteket.dk (Colin Rosenthal) * */ -public class ARCReaderFactoryTest extends TestCase { +public class ARCReaderFactoryTest { private File testfile1 = new File("src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"); @@ -27,6 +29,7 @@ public class ARCReaderFactoryTest extends TestCase { * https://github.com/iipc/openwayback/issues/101 * @throws Exception */ + @Test public void testGetResource() throws Exception { this.offsetResourceTest(testfile1, 1515, "http://www.archive.org/robots.txt" ); this.offsetResourceTest(testfile1, 36420, "http://www.archive.org/services/collection-rss.php" ); @@ -43,11 +46,11 @@ private void offsetResourceTest( File testfile, long offset, String uri ) throws ArchiveRecord record = reader.get(); final String url = record.getHeader().getUrl(); - assertEquals("URL of record is not as expected.", uri, url); + assertEquals(uri, url, "URL of record is not as expected."); final long position = record.getPosition(); final long recordLength = record.getHeader().getLength(); - assertTrue("Position " + position + " is after end of record " + recordLength, position <= recordLength); + assertTrue(position <= recordLength, "Position " + position + " is after end of record " + recordLength); // Clean up: if( raf != null ) diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java index f0be6506..07548b4c 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java @@ -21,26 +21,31 @@ import java.io.ByteArrayOutputStream; import java.io.File; +import java.nio.file.Path; import java.util.Arrays; import org.archive.io.WriterPool; import org.archive.io.WriterPoolMember; import org.archive.io.WriterPoolSettings; -import org.archive.util.TmpDirTestCase; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; /** * Test ARCWriterPool */ @SuppressWarnings("deprecation") -public class ARCWriterPoolTest extends TmpDirTestCase { - private static final String PREFIX = "TEST"; - +public class ARCWriterPoolTest { + @TempDir + Path tempDir; + + @Test public void testARCWriterPool() throws Exception { final int MAX_ACTIVE = 3; final int MAX_WAIT_MILLISECONDS = 100; - cleanUpOldFiles(PREFIX); WriterPool pool = new ARCWriterPool(getSettings(true), MAX_ACTIVE, MAX_WAIT_MILLISECONDS); WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; @@ -49,7 +54,7 @@ public void testARCWriterPool() baos.write(CONTENT.getBytes()); for (int i = 0; i < MAX_ACTIVE; i++) { writers[i] = pool.borrowFile(); - assertEquals("Number active", i + 1, pool.getNumActive()); + assertEquals(i + 1, pool.getNumActive(), "Number active"); ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", "0.0.0.0", 1234567890, CONTENT.length(), baos); } @@ -60,17 +65,17 @@ public void testARCWriterPool() for (int i = (MAX_ACTIVE - 1); i >= 0; i--) { pool.returnFile(writers[i]); - assertEquals("Number active", i, pool.getNumActive()); - assertEquals("Number idle", MAX_ACTIVE - pool.getNumActive(), - pool.getNumIdle()); + assertEquals(i, pool.getNumActive(), "Number active"); + assertEquals(MAX_ACTIVE - pool.getNumActive(), pool.getNumIdle(), + "Number idle"); } pool.close(); } - + + @Test public void testInvalidate() throws Exception { final int MAX_ACTIVE = 3; final int MAX_WAIT_MILLISECONDS = 100; - cleanUpOldFiles(PREFIX); WriterPool pool = new ARCWriterPool(getSettings(true), MAX_ACTIVE, MAX_WAIT_MILLISECONDS); WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; @@ -79,7 +84,7 @@ public void testInvalidate() throws Exception { baos.write(CONTENT.getBytes()); for (int i = 0; i < MAX_ACTIVE; i++) { writers[i] = pool.borrowFile(); - assertEquals("Number active", i + 1, pool.getNumActive()); + assertEquals(i + 1, pool.getNumActive(), "Number active"); ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", "0.0.0.0", 1234567890, CONTENT.length(), baos); } @@ -96,23 +101,23 @@ public void testInvalidate() throws Exception { for (int i = 0; i < MAX_ACTIVE; i++) { writers[i] = pool.borrowFile(); - assertEquals("Number active", i + 1, pool.getNumActive()); + assertEquals(i + 1, pool.getNumActive(), "Number active"); ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", "0.0.0.0", 1234567890, CONTENT.length(), baos); } for (int i = (MAX_ACTIVE - 1); i >= 0; i--) { pool.returnFile(writers[i]); - assertEquals("Number active", i, pool.getNumActive()); - assertEquals("Number idle", MAX_ACTIVE - pool.getNumActive(), - pool.getNumIdle()); + assertEquals(i, pool.getNumActive(), "Number active"); + assertEquals(MAX_ACTIVE - pool.getNumActive(), pool.getNumIdle(), + "Number idle"); } pool.close(); } private WriterPoolSettings getSettings(final boolean isCompressed) { - File [] files = {getTmpDir()}; + File [] files = {tempDir.toFile()}; return new WriterPoolSettingsData( - PREFIX, + "TEST", "${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}", ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE, isCompressed, diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java index f6e2bf6a..84539391 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -42,9 +42,12 @@ import org.archive.io.WriterPoolMember; import org.archive.io.WriterPoolSettings; import org.archive.util.ArchiveUtils; -import org.archive.util.TmpDirTestCase; import com.google.common.io.Closeables; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.*; /** @@ -55,8 +58,7 @@ * * @author stack */ -public class ARCWriterTest -extends TmpDirTestCase implements ARCConstants { +public class ARCWriterTest implements ARCConstants { /** * Utility class for writing bad ARCs (with trailing junk) */ @@ -90,20 +92,13 @@ public void setEndJunk(byte[] b) throws IOException { private static final AtomicInteger SERIAL_NO = new AtomicInteger(); - /* - * @see TestCase#setUp() - */ - protected void setUp() throws Exception { - super.setUp(); - } + @TempDir + File tempDir; - /* - * @see TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); + protected static String getContent(int index) { + return getContent(Integer.toString(index)); } - + protected static String getContent() { return getContent(null); } @@ -140,8 +135,7 @@ protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) private File writeRecords(String baseName, boolean compress, long maxSize, int recordCount) throws IOException { - cleanUpOldFiles(baseName); - File [] files = {getTmpDir()}; + File [] files = {tempDir}; ARCWriter arcWriter = new ARCWriter( SERIAL_NO, @@ -157,9 +151,9 @@ private File writeRecords(String baseName, boolean compress, writeRandomHTTPRecord(arcWriter, i); } arcWriter.close(); - assertTrue("Doesn't exist: " + - arcWriter.getFile().getAbsolutePath(), - arcWriter.getFile().exists()); + assertTrue(arcWriter.getFile().exists(), + "Doesn't exist: " + + arcWriter.getFile().getAbsolutePath()); return arcWriter.getFile(); } @@ -183,34 +177,38 @@ private void validate(File arcFile, int recordCount) ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i); ArchiveRecord r = reader.get(meta.getOffset()); String mimeType = r.getHeader().getMimetype(); - assertTrue("Record is bogus", - mimeType != null && mimeType.length() > 0); + assertTrue(mimeType != null && mimeType.length() > 0, + "Record is bogus"); reader.close(); } - assertEquals("Metadata count not as expected",recordCount, metaDatas.size()); + assertEquals(recordCount,metaDatas.size(), "Metadata count not as expected"); for (Iterator i = metaDatas.iterator(); i.hasNext();) { ARCRecordMetaData r = (ARCRecordMetaData)i.next(); - assertTrue("Record is empty", r.getLength() > 0); + assertTrue(r.getLength() > 0, "Record is empty"); } } + @Test public void testCheckARCFileSize() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", false); } + @Test public void testCheckARCFileSizeCompressed() throws IOException { runCheckARCFileSizeTest("checkARCFileSize", true); } + @Test public void testWriteRecord() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecord", false, DEFAULT_MAX_ARC_FILE_SIZE, recordCount); validate(arcFile, recordCount + 1); // Header record. } - + + @Test public void testRandomAccess() throws IOException { final int recordCount = 3; File arcFile = writeRecords("writeRecord", true, @@ -252,6 +250,7 @@ public void testRandomAccess() throws IOException { assertEquals(totalRecords - 1, count); } + @Test public void testWriteRecordCompressed() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecordCompressed", true, @@ -291,7 +290,7 @@ private void runCheckARCFileSizeTest(String baseName, boolean compress) } protected CorruptibleARCWriter createARCWriter(String name, boolean compress) { - File [] files = {getTmpDir()}; + File [] files = {tempDir}; return new CorruptibleARCWriter( SERIAL_NO, new WriterPoolSettingsData( @@ -326,8 +325,8 @@ protected int iterateRecords(ARCReader r) ARCRecord rec = (ARCRecord)i.next(); rec.close(); if (count != 0) { - assertTrue("Unexpected URL " + rec.getMetaData().getUrl(), - rec.getMetaData().getUrl().startsWith(SOME_URL)); + assertTrue(rec.getMetaData().getUrl().startsWith(SOME_URL), + "Unexpected URL " + rec.getMetaData().getUrl()); } count++; } @@ -343,7 +342,8 @@ protected CorruptibleARCWriter createArcWithOneRecord(String name, content.length(), getBais(content)); return writer; } - + + @Test public void testSpaceInURL() { String eMessage = null; try { @@ -351,10 +351,11 @@ public void testSpaceInURL() { } catch (IOException e) { eMessage = e.getMessage(); } - assertTrue("Didn't get expected exception: " + eMessage, - eMessage.startsWith("Metadata line doesn't match")); + assertTrue(eMessage.startsWith("Metadata line doesn't match"), + "Didn't get expected exception: " + eMessage); } + @Test public void testTabInURL() { String eMessage = null; try { @@ -362,8 +363,8 @@ public void testTabInURL() { } catch (IOException e) { eMessage = e.getMessage(); } - assertTrue("Didn't get expected exception: " + eMessage, - eMessage.startsWith("Metadata line doesn't match")); + assertTrue(eMessage.startsWith("Metadata line doesn't match"), + "Didn't get expected exception: " + eMessage); } protected void holeyUrl(String name, boolean compress, String urlInsert) @@ -385,11 +386,13 @@ protected void holeyUrl(String name, boolean compress, String urlInsert) // public void testLengthTooShort() throws IOException { // lengthTooShort("testLengthTooShort-" + PREFIX, false); // } - + + @Test public void testLengthTooShortCompressed() throws IOException { lengthTooShort("testLengthTooShortCompressed", true, false); } - + + @Test public void testLengthTooShortCompressedStrict() throws IOException { String eMessage = null; @@ -399,8 +402,8 @@ public void testLengthTooShortCompressedStrict() } catch (RuntimeException e) { eMessage = e.getMessage(); } - assertTrue("Didn't get expected exception: " + eMessage, - eMessage.startsWith("java.io.IOException: Record STARTING at")); + assertTrue(eMessage.startsWith("java.io.IOException: Record STARTING at"), + "Didn't get expected exception: " + eMessage); } protected void lengthTooShort(String name, boolean compress, boolean strict) @@ -430,13 +433,13 @@ protected void lengthTooShort(String name, boolean compress, boolean strict) r = ARCReaderFactory.get(writer.getFile()); r.setStrict(strict); int count = iterateRecords(r); - assertTrue("Count wrong " + count, count == 4); + assertTrue(count == 4, "Count wrong " + count); // Make sure we get the warning string which complains about the // trailing bytes. String err = os.toString(); - assertTrue("No message " + err, err.startsWith("WARNING") && - (err.indexOf("Record STARTING at") > 0)); + assertTrue(err.startsWith("WARNING") && + (err.indexOf("Record STARTING at") > 0), "No message " + err); r.close(); } finally { Closeables.close(r, true); @@ -451,13 +454,15 @@ protected void lengthTooShort(String name, boolean compress, boolean strict) // lengthTooLong("testLengthTooLongCompressed-" + PREFIX, // false, false); // } - + + @Test public void testLengthTooLongCompressed() throws IOException { lengthTooLong("testLengthTooLongCompressed", true, false); } - + + @Test public void testLengthTooLongCompressedStrict() { String eMessage = null; try { @@ -466,8 +471,8 @@ public void testLengthTooLongCompressedStrict() { } catch (IOException e) { eMessage = e.getMessage(); } - assertTrue("Didn't get expected exception: " + eMessage, - eMessage.startsWith("Premature EOF before end-of-record")); + assertTrue(eMessage.startsWith("Premature EOF before end-of-record"), + "Didn't get expected exception: " + eMessage); } protected void lengthTooLong(String name, boolean compress, @@ -493,19 +498,20 @@ protected void lengthTooLong(String name, boolean compress, r = ARCReaderFactory.get(writer.getFile()); r.setStrict(strict); int count = iterateRecords(r); - assertTrue("Count wrong " + count, count == 4); + assertTrue(count == 4, "Count wrong " + count); // Make sure we get the warning string which complains about the // trailing bytes. String err = os.toString(); - assertTrue("No message " + err, - err.startsWith("WARNING Premature EOF before end-of-record")); + assertTrue(err.startsWith("WARNING Premature EOF before end-of-record"), + "No message " + err); } finally { Closeables.close(r, true); System.setErr(origErr); } } - + + @Test public void testGapError() throws IOException { ARCWriter writer = createArcWithOneRecord("testGapError", true); String content = getContent(); @@ -527,9 +533,9 @@ public long remaining() { IOUtils.closeQuietly(ris); } writer.close(); - assertTrue("No gap when should be", - message != null && - message.indexOf("Gap between expected and actual") >= 0); + assertTrue(message != null && + message.indexOf("Gap between expected and actual") >= 0, + "No gap when should be"); } /** @@ -570,8 +576,8 @@ public static File createARCFile(File arcdir, boolean compress) // writer.close(); // logger.info("Finished speed write test."); // } - - + + @Test public void testValidateMetaLine() throws Exception { final String line = "http://www.aandw.net/images/walden2.png " + "128.197.34.86 20060111174224 image/png 2160"; @@ -584,7 +590,8 @@ public void testValidateMetaLine() throws Exception { w.close(); } } - + + @Test public void testArcRecordOffsetReads() throws Exception { ARCReader r = getSingleRecordReader("testArcRecordInBufferStream"); ARCRecord ar = getSingleRecord(r); @@ -603,6 +610,7 @@ public void testArcRecordOffsetReads() throws Exception { } // available should always be >= 0; extra read()s should all give EOF + @Test public void testArchiveRecordAvailableConsistent() throws Exception { // first test reading byte-at-a-time via no-param read() ARCReader r = getSingleRecordReader("testArchiveRecordAvailableConsistent"); @@ -613,13 +621,14 @@ public void testArchiveRecordAvailableConsistent() throws Exception { } // consecutive reads after EOR should always give -1, still show zero available() for (int i=0; i<5; i++) { - assertTrue("available negative:"+record.available(), record.available()>=0); + assertTrue(record.available()>=0, "available negative:"+record.available()); assertEquals(-1, record.read()); } r.close(); } // should always give -1 on repeated reads past EOR + @Test public void testArchiveRecordEORConsistent() throws Exception { ARCReader r = getSingleRecordReader("testArchiveRecordEORConsistent"); ARCRecord record = getSingleRecord(r); @@ -633,6 +642,7 @@ public void testArchiveRecordEORConsistent() throws Exception { // should not throw premature EOF when wrapped with BufferedInputStream // [HER-1450] showed this was the case using Apache Tika + @Test public void testArchiveRecordMarkSupport() throws Exception { ARCReader r = getSingleRecordReader("testArchiveRecordMarkSupport"); ARCRecord record = getSingleRecord(r); @@ -657,6 +667,7 @@ public void testArchiveRecordMarkSupport() throws Exception { * * @throws IOException */ + @Test public void testReadIterator() throws IOException { final int recordCount = 3; File arcFile = writeRecords("writeRecord", true, diff --git a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java index 25028797..c6617559 100644 --- a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java +++ b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java @@ -8,9 +8,11 @@ import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class WARCReaderFactoryTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class WARCReaderFactoryTest { // Test files: String[] files = new String[] { @@ -18,6 +20,7 @@ public class WARCReaderFactoryTest extends TestCase { "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc" }; + @Test public void testGetStringInputstreamBoolean() throws IOException { // Check the test files can be opened: for( String file : files ) { diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java index 35c68714..1039119e 100644 --- a/src/test/java/org/archive/io/warc/WARCWriterTest.java +++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java @@ -38,16 +38,18 @@ import org.archive.uid.RecordIDGenerator; import org.archive.uid.UUIDGenerator; import org.archive.util.ArchiveUtils; -import org.archive.util.TmpDirTestCase; import org.archive.util.anvl.ANVLRecord; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.*; /** * Test Writer and Reader. * @author stack * @version $Date: 2006-08-29 19:35:48 -0700 (Tue, 29 Aug 2006) $ $Version$ */ -public class WARCWriterTest -extends TmpDirTestCase implements WARCConstants { +public class WARCWriterTest implements WARCConstants { private static final AtomicInteger SERIAL_NO = new AtomicInteger(); @@ -59,8 +61,12 @@ public class WARCWriterTest private static final String SUFFIX = "JUNIT"; private static final String SOME_URL = "http://www.archive.org/test/"; - + + @TempDir + File tempDir; + @SuppressWarnings("unchecked") + @Test public void testCheckHeaderLineValue() throws Exception { WARCWriter writer = new WARCWriter( SERIAL_NO, @@ -85,21 +91,23 @@ public void testCheckHeaderLineValue() throws Exception { } @SuppressWarnings("unchecked") + @Test public void testMimetypes() throws IOException { WARCWriter writer = new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( "m","testM",1,false,Collections.EMPTY_LIST,Collections.EMPTY_LIST,generator)); writer.checkHeaderLineMimetypeParameter("text/xml"); writer.checkHeaderLineMimetypeParameter("text/xml+rdf"); - assertEquals(writer.checkHeaderLineMimetypeParameter( - "text/plain; charset=SHIFT-JIS"), "text/plain; charset=SHIFT-JIS"); - assertEquals(writer.checkHeaderLineMimetypeParameter( - "multipart/mixed; \r\n boundary=\"simple boundary\""), - "multipart/mixed; boundary=\"simple boundary\""); + assertEquals("text/plain; charset=SHIFT-JIS", writer.checkHeaderLineMimetypeParameter( + "text/plain; charset=SHIFT-JIS")); + assertEquals("multipart/mixed; boundary=\"simple boundary\"", + writer.checkHeaderLineMimetypeParameter( + "multipart/mixed; \r\n boundary=\"simple boundary\"")); } - + + @Test public void testWriteRecord() throws IOException { - File [] files = {getTmpDir()}; + File [] files = {tempDir}; // Write uncompressed. WARCWriter writer = @@ -245,8 +253,7 @@ protected int writeRandomHTTPRecord(WARCWriter w, int index) private File writeRecords(String baseName, boolean compress, int maxSize, int recordCount) throws IOException { - cleanUpOldFiles(baseName); - File [] files = {getTmpDir()}; + File [] files = {tempDir}; WARCWriter w = new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( baseName + '-' + SUFFIX, "${prefix}", maxSize, compress, Arrays.asList(files), null, generator)); @@ -255,8 +262,8 @@ private File writeRecords(String baseName, boolean compress, writeRandomHTTPRecord(w, i); } w.close(); - assertTrue("Doesn't exist: " + w.getFile().getAbsolutePath(), - w.getFile().exists()); + assertTrue(w.getFile().exists(), + "Doesn't exist: " + w.getFile().getAbsolutePath()); return w.getFile(); } @@ -288,18 +295,19 @@ private void validate(File f, int recordCount) ArchiveRecordHeader h = (ArchiveRecordHeader)headers.get(i); ArchiveRecord r = reader.get(h.getOffset()); String mimeType = r.getHeader().getMimetype(); - assertTrue("Record is bogus", - mimeType != null && mimeType.length() > 0); + assertTrue(mimeType != null && mimeType.length() > 0, + "Record is bogus"); reader.close(); } - assertTrue("Metadatas not equal", headers.size() == recordCount); + assertTrue(headers.size() == recordCount, "Metadatas not equal"); for (Iterator i = headers.iterator(); i.hasNext();) { ArchiveRecordHeader r = (ArchiveRecordHeader)i.next(); - assertTrue("Record is empty", r.getLength() > 0); + assertTrue(r.getLength() > 0, "Record is empty"); } } + @Test public void testWriteRecords() throws IOException { final int recordCount = 2; File f = writeRecords("writeRecords", false, DEFAULT_MAX_WARC_FILE_SIZE, @@ -307,6 +315,7 @@ public void testWriteRecords() throws IOException { validate(f, recordCount + 1); // Header record. } + @Test public void testRandomAccess() throws IOException { final int recordCount = 3; File f = writeRecords("randomAccess", true, DEFAULT_MAX_WARC_FILE_SIZE, @@ -348,7 +357,8 @@ public void testRandomAccess() throws IOException { reader.close(); assertEquals(totalRecords - 1, count); } - + + @Test public void testWriteRecordCompressed() throws IOException { final int recordCount = 2; File arcFile = writeRecords("writeRecordCompressed", true, @@ -358,7 +368,7 @@ public void testWriteRecordCompressed() throws IOException { protected WARCWriter createWARCWriter(String name, boolean compress) { - File [] files = {getTmpDir()}; + File [] files = {tempDir}; return new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( name, @@ -401,8 +411,8 @@ protected int iterateRecords(WARCReader r) ArchiveRecord ar = i.next(); ar.close(); if (count != 0) { - assertTrue("Unexpected URL " + ar.getHeader().getUrl(), - ar.getHeader().getUrl().equals(SOME_URL)); + assertTrue(ar.getHeader().getUrl().equals(SOME_URL), + "Unexpected URL " + ar.getHeader().getUrl()); } count++; } @@ -418,15 +428,17 @@ protected WARCWriter createWithOneRecord(String name, content.length(), getBaos(content)); return writer; } - + + @Test public void testSpaceInURL() throws IOException { long bytesWritten = holeyUrl("testSpaceInURL", false, " "); - assertEquals("Unexpected successful writing occurred",0,bytesWritten); + assertEquals(0,bytesWritten,"Unexpected successful writing occurred"); } + @Test public void testTabInURL() throws IOException { long bytesWritten = holeyUrl("testTabInURL", false, "\t"); - assertEquals("Unexpected successful writing occurred",0,bytesWritten); + assertEquals(0,bytesWritten,"Unexpected successful writing occurred"); } protected long holeyUrl(String name, boolean compress, String urlInsert) @@ -483,7 +495,8 @@ public static File createWARCFile(File arcdir, boolean compress) // writer.close(); // logger.info("Finished speed write test."); // } - + + @Test public void testArcRecordOffsetReads() throws Exception { // Get an ARC with one record. WriterPoolMember w = diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index ca6e6408..758d7f46 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -24,9 +24,10 @@ import java.util.ArrayList; import java.util.regex.Matcher; -import junit.framework.TestCase; - import org.archive.net.PublicSuffixes.Node; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches @@ -34,10 +35,11 @@ * * @author gojomo */ -public class PublicSuffixesTest extends TestCase { +public class PublicSuffixesTest { // test of low level implementation private final String NL = System.getProperty("line.separator"); - + + @Test public void testCompare() { Node n = new Node("hoge"); assertTrue(n.compareTo('a') > 0); @@ -75,6 +77,8 @@ protected String dump(Node alt) { PublicSuffixes.dump(alt, 0, new PrintWriter(w)); return w.toString(); } + + @Test public void testTrie1() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); @@ -92,6 +96,8 @@ public void testTrie1() { " \"edu,\"" + NL + " \"\"" + NL, dump(alt)); } + + @Test public void testTrie2() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); @@ -101,6 +107,7 @@ public void testTrie2() { " \"*,\"" + NL, dump(alt)); } + @Test public void testTrie3() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); @@ -119,6 +126,7 @@ public void testTrie3() { Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() .matcher(""); + @Test public void testBasics() { matchPrefix("com,example,www,", "com,example,"); matchPrefix("com,example,", "com,example,"); @@ -137,27 +145,32 @@ public void testBasics() { matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); } + @Test public void testDomainWithDash() { matchPrefix("de,bad-site,www", "de,bad-site,"); } - + + @Test public void testDomainWithNumbers() { matchPrefix("de,archive4u,www", "de,archive4u,"); } - + + @Test public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + assertEquals("1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4"), + "unexpected reduction"); } - + + @Test public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + assertEquals("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]"), + "unexpected reduction"); } - + + @Test public void testExceptions() { matchPrefix("uk,bl,www,", "uk,bl,"); matchPrefix("uk,bl,", "uk,bl,"); @@ -165,6 +178,7 @@ public void testExceptions() { matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); } + @Test public void testFakeTLD() { // we assume any new/unknonwn TLD should be assumed as 2-level; // this is preferable for our grouping purpose but might not be @@ -172,22 +186,23 @@ public void testFakeTLD() { matchPrefix("zzz,example,www,", "zzz,example,"); } + @Test public void testUnsegmentedHostname() { m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); + assertFalse(m.find(), "unexpected match found in 'example'"); } + @Test public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),"topmostAssignedSurtPrefixPattern not cached"); + assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),"topmostAssignedSurtPrefixRegex not cached"); } // TODO: test UTF domains? protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); + assertTrue(m.find(), "expected match not found in '" + surtDomain); + assertEquals(expectedAssignedPrefix, m.group(), "expected match not found"); } } diff --git a/src/test/java/org/archive/resource/MetaDataTest.java b/src/test/java/org/archive/resource/MetaDataTest.java index ea66135a..88b8cd10 100644 --- a/src/test/java/org/archive/resource/MetaDataTest.java +++ b/src/test/java/org/archive/resource/MetaDataTest.java @@ -10,9 +10,11 @@ import org.json.JSONArray; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class MetaDataTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +public class MetaDataTest { private static String[] testFilePaths = { "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc", @@ -59,13 +61,13 @@ private MetaData putMetaData(MetaData m) { private void verifyMultiValuedMetaData(MetaData m) { // boolean assertEquals(JSONArray.class, m.get("boolean-1").getClass()); - assertEquals(false, ((JSONArray) m.get("boolean-1")).getBoolean(0)); - assertEquals(true, ((JSONArray) m.get("boolean-1")).getBoolean(1)); - assertEquals(true, m.getBoolean("boolean-2")); - assertEquals(true, m.getBoolean("boolean-3")); + assertFalse(((JSONArray) m.get("boolean-1")).getBoolean(0)); + assertTrue(((JSONArray) m.get("boolean-1")).getBoolean(1)); + assertTrue(m.getBoolean("boolean-2")); + assertTrue(m.getBoolean("boolean-3")); assertEquals(Boolean.class, m.get("boolean-3").getClass()); - assertEquals(true, m.optBoolean("boolean-3", false)); - assertEquals(false, m.optBoolean("boolean-99", false)); + assertTrue(m.optBoolean("boolean-3", false)); + assertFalse(m.optBoolean("boolean-99", false)); // double assertEquals(JSONArray.class, m.get("double-1").getClass()); @@ -121,6 +123,7 @@ private void verifyMultiValuedMetaData(MetaData m) { assertEquals("world", ((JSONObject) m.get("obj-2")).get("hello")); } + @Test public void testMultiValued() { MetaData m = new MetaData(); m = putMetaData(m); @@ -151,6 +154,7 @@ private MetaData readNextWARCResponseAsMetaData(String filePath) throws IOExcept * Verify that in the legacy test file all WARC and HTTP headers are * single-valued, i.e. {@linkplain String}s. */ + @Test public void testSingleHeaders() throws IOException, ResourceParseException { MetaData m = readNextWARCResponseAsMetaData(testFilePaths[0]); @@ -166,6 +170,7 @@ public void testSingleHeaders() throws IOException, ResourceParseException { } } + @Test public void testMultipleHeaders() throws IOException, ResourceParseException { MetaData m = readNextWARCResponseAsMetaData(testFilePaths[1]); diff --git a/src/test/java/org/archive/resource/arc/ARCResourceTest.java b/src/test/java/org/archive/resource/arc/ARCResourceTest.java index 43116af7..e92d07be 100644 --- a/src/test/java/org/archive/resource/arc/ARCResourceTest.java +++ b/src/test/java/org/archive/resource/arc/ARCResourceTest.java @@ -3,6 +3,7 @@ import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; @@ -17,10 +18,11 @@ import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class ARCResourceTest extends TestCase { +public class ARCResourceTest { + @Test public void testARCResource() throws ResourceParseException, IOException { String testFileName = "../../format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"; ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); diff --git a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java index 4828ad64..157499ff 100644 --- a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java +++ b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java @@ -21,13 +21,16 @@ import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Multimap; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class ExtractingParseObserverTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +public class ExtractingParseObserverTest { private static final Logger LOG = Logger.getLogger(ExtractingParseObserverTest.class.getName()); + @Test public void testHandleStyleNodeExceptions() throws Exception { String[] tests = { "some css", @@ -58,6 +61,7 @@ public void testHandleStyleNodeExceptions() throws Exception { } } + @Test public void testHandleStyleNode() throws Exception { String[][] tests = { {""}, @@ -80,6 +84,7 @@ public void testHandleStyleNode() throws Exception { * Test whether the pattern matcher does extract nothing and also does not * not hang-up if an overlong CSS link is truncated. */ + @Test public void testHandleStyleNodeNoHangupTruncated() throws Exception { StringBuilder sb = new StringBuilder(); sb.append("url("); @@ -113,22 +118,22 @@ private void checkExtract(String[] data) throws JSONException { assertTrue(o instanceof JSONObject); JSONObject jo = (JSONObject) o; - assertEquals("CSS link extraction failed for <" + css + ">", - data[i], jo.getString("href")); + assertEquals(data[i], jo.getString("href"), + "CSS link extraction failed for <" + css + ">"); } } else { - assertNull("Expected no extracted link for <" + css + ">", a); + assertNull(a, "Expected no extracted link for <" + css + ">"); } } private void checkLink(Multimap links, String url, String path) { - assertTrue("Link with URL " + url + " not found", links.containsKey(url)); - assertTrue("Wrong path " + path + " for " + url, links.get(url).contains(path)); + assertTrue(links.containsKey(url), "Link with URL " + url + " not found"); + assertTrue(links.get(url).contains(path), "Wrong path " + path + " for " + url); } private void checkLinks(Resource resource, String[][] expectedLinks) { assertNotNull(resource); - assertTrue("Wrong instance type of Resource: " + resource.getClass(), resource instanceof HTMLResource); + assertInstanceOf(HTMLResource.class, resource, "Wrong instance type of Resource: " + resource.getClass()); MetaData md = resource.getMetaData(); LOG.info(md.toString()); Multimap links = ArrayListMultimap.create(); @@ -178,12 +183,13 @@ private void checkLinks(Resource resource, String[][] expectedLinks) { } } } - assertEquals("Unexpected number of links", expectedLinks.length, links.size()); + assertEquals(expectedLinks.length, links.size(), "Unexpected number of links"); for (String[] l : expectedLinks) { checkLink(links, l[0], l[1]); } } + @Test public void testLinkExtraction() throws ResourceParseException, IOException { String testFileName = "link-extraction-test.warc"; ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); diff --git a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java index fb255d3c..3b4193b9 100644 --- a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java +++ b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java @@ -4,13 +4,11 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HTMLMetaDataTest extends TestCase { +public class HTMLMetaDataTest { - public void testHTMLParseData() { -// fail("Not yet implemented"); - } + @Test public void testJSON() throws JSONException { JSONObject data = new JSONObject(); JSONObject links = new JSONObject(); @@ -30,6 +28,8 @@ public void testJSON() throws JSONException { System.out.println(data.toString()); } + + @Test public void testJSON2() throws JSONException { String sa[][] = {{"one","1"},{"two","2"},{"three","3"}}; JSONObject jo = new JSONObject(); @@ -37,6 +37,8 @@ public void testJSON2() throws JSONException { appendStrArr(jo,sa); System.out.println(jo.toString(1)); } + + @Test public void testJSON3() throws JSONException { JSONObject jo = new JSONObject(); appendStrArr2(jo,"k",new String[] {"1","2","3","4"}); diff --git a/src/test/java/org/archive/resource/warc/WARCResourceTest.java b/src/test/java/org/archive/resource/warc/WARCResourceTest.java index 1b935405..71c2a4ee 100644 --- a/src/test/java/org/archive/resource/warc/WARCResourceTest.java +++ b/src/test/java/org/archive/resource/warc/WARCResourceTest.java @@ -2,6 +2,8 @@ import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; @@ -16,10 +18,11 @@ import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class WARCResourceTest extends TestCase { +public class WARCResourceTest { + @Test public void testWARCResource() throws ResourceParseException, IOException { String testFileName = "../../format/warc/IAH-urls-wget.warc"; ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); diff --git a/src/test/java/org/archive/uid/UUIDGeneratorTest.java b/src/test/java/org/archive/uid/UUIDGeneratorTest.java index 79e98fb6..66fbf7a8 100644 --- a/src/test/java/org/archive/uid/UUIDGeneratorTest.java +++ b/src/test/java/org/archive/uid/UUIDGeneratorTest.java @@ -23,13 +23,16 @@ import java.util.HashMap; import java.util.Map; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotSame; /** * @author stack * @version $Revision$ $Date$ */ -public class UUIDGeneratorTest extends TestCase { +public class UUIDGeneratorTest { + @Test public void testQualifyRecordID() throws URISyntaxException { RecordIDGenerator g = new UUIDGenerator(); URI uri = g.getRecordID(); diff --git a/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java b/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java index 711dbede..fff1ea1f 100644 --- a/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java @@ -2,10 +2,13 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class AggressiveIAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class AggressiveIAURLCanonicalizerTest { static AggressiveIAURLCanonicalizer ia = new AggressiveIAURLCanonicalizer(); + @Test public void testCanonicalize() throws URISyntaxException { // FULL end-to-end tests: check("http://www.alexa.com/","http://alexa.com/"); @@ -26,6 +29,6 @@ private static void check(String orig, String want) throws URISyntaxException { HandyURL u2 = URLParser.parse(got); ia.canonicalize(u2); String got2 = u2.getURLString(); - assertEquals("Second passs changed!",got,got2); + assertEquals(got,got2,"Second passs changed!"); } } diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index cc100e4c..dc000265 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -4,11 +4,15 @@ import org.apache.commons.httpclient.URIException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class BasicURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class BasicURLCanonicalizerTest { BasicURLCanonicalizer guc = new BasicURLCanonicalizer(); - + + @Test public void testGetHex() { assertEquals(0,guc.getHex('0')); assertEquals(1,guc.getHex('1')); @@ -37,7 +41,8 @@ public void testGetHex() { assertEquals(-1,guc.getHex('q')); assertEquals(-1,guc.getHex(' ')); } - + + @Test public void testDecode() { assertEquals("A",guc.decode("A")); assertEquals("AA",guc.decode("AA")); @@ -131,7 +136,7 @@ public void testDecode() { assertEquals("\u2691%E2%9A!\u2691%E2%9A", guc.decode("%E2%9A%91%E2%9A%21%E2%9A%91%E2%9A")); } - + @Test public void testUnescapeRepeatedly() { assertEquals("%!A!!%",guc.unescapeRepeatedly("%!A%21%21%25")); assertEquals("%",guc.unescapeRepeatedly("%")); @@ -147,10 +152,11 @@ public void testUnescapeRepeatedly() { assertEquals("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5", guc.unescapeRepeatedly("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); } - + + @Test public void testAttemptIPFormats() throws URIException { - assertEquals(null,guc.attemptIPFormats(null)); - assertEquals(null,guc.attemptIPFormats("www.foo.com")); + assertNull(guc.attemptIPFormats(null)); + assertNull(guc.attemptIPFormats("www.foo.com")); assertEquals("127.0.0.1",guc.attemptIPFormats("127.0.0.1")); assertEquals("15.0.0.1",guc.attemptIPFormats("017.0.0.1")); assertEquals("168.188.99.26",guc.attemptIPFormats("168.188.99.26")); @@ -190,11 +196,12 @@ In specifying the inet_addr() API, the POSIX standard [IEEE-1003.1] * For now, we'll enforce some strictness: */ - assertEquals(null,guc.attemptIPFormats("10.0.258")); - assertEquals(null,guc.attemptIPFormats("1.2.3.256")); + assertNull(guc.attemptIPFormats("10.0.258")); + assertNull(guc.attemptIPFormats("1.2.3.256")); } - + + @Test public void testFoo() { String path = "/a/b/c/"; String[] paths = path.split("/",-1); @@ -212,6 +219,7 @@ public void testFoo() { /* * Tests copied from https://developers.google.com/safe-browsing/developers_guide_v2#Canonicalization */ + @Test public void testGoogleExamples() throws URISyntaxException { checkCanonicalization("http://host/%25%32%35", "http://host/%25"); checkCanonicalization("http://host/%25%32%35%25%32%35", "http://host/%25%25"); @@ -249,19 +257,22 @@ public void testGoogleExamples() throws URISyntaxException { checkCanonicalization("http://host.com/ab%23cd", "http://host.com/ab%23cd"); checkCanonicalization("http://host.com//twoslashes?more//slashes", "http://host.com/twoslashes?more//slashes"); } - + + @Test public void testStraySpacing() throws URISyntaxException { checkCanonicalization("http://example.org/\u2028", "http://example.org/"); checkCanonicalization("\nhttp://examp\rle.org/", "http://example.org/"); checkCanonicalization("\nhttp://examp\u2029\t\rle.org/ ", "http://example.org/"); } - + + @Test public void testSchemeCapitalsPreserved() throws URISyntaxException { checkCanonicalization("Http://example.com", "Http://example.com/"); checkCanonicalization("HTTP://example.com", "HTTP://example.com/"); checkCanonicalization("ftP://example.com", "ftP://example.com/"); } - + + @Test public void testUnicodeEscaping() throws URISyntaxException { checkCanonicalization("http://example.org/\u2691", "http://example.org/%E2%9A%91"); checkCanonicalization("http://example.org/%e2%9a%91", "http://example.org/%E2%9A%91"); diff --git a/src/test/java/org/archive/url/HandyURLTest.java b/src/test/java/org/archive/url/HandyURLTest.java index 28edff77..ad108db5 100644 --- a/src/test/java/org/archive/url/HandyURLTest.java +++ b/src/test/java/org/archive/url/HandyURLTest.java @@ -1,9 +1,12 @@ package org.archive.url; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HandyURLTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class HandyURLTest { + + @Test public void testGetPublicSuffix() { HandyURL h = new HandyURL(); h.setHost("www.fool.com"); @@ -23,10 +26,4 @@ public void testGetPublicSuffix() { assertEquals("funky-images",h.getPublicPrefix()); } - - public void testGetPublicPrefix() { -// -// fail("Not yet implemented"); - } - } diff --git a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java index e2c46258..974bdd22 100644 --- a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java @@ -2,10 +2,13 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class IAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class IAURLCanonicalizerTest { + + @Test public void testFull() throws URISyntaxException { IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); compCan(iaC,"http://www.archive.org:80/","http://archive.org/"); @@ -26,6 +29,7 @@ private void compCan(URLCanonicalizer c, String orig, String want) throws URISyn assertEquals(want,got); } + @Test public void testAlphaReorderQuery() { assertEquals(null,IAURLCanonicalizer.alphaReorderQuery(null)); assertEquals("",IAURLCanonicalizer.alphaReorderQuery("")); @@ -41,6 +45,7 @@ public void testAlphaReorderQuery() { assertEquals("a=a&a=b&b=a&b=b",IAURLCanonicalizer.alphaReorderQuery("b=b&a=b&b=a&a=a")); } + @Test public void testMassageHost() { assertEquals("foo.com",IAURLCanonicalizer.massageHost("foo.com")); assertEquals("foo.com",IAURLCanonicalizer.massageHost("www.foo.com")); @@ -49,12 +54,14 @@ public void testMassageHost() { assertEquals("www2foo.com",IAURLCanonicalizer.massageHost("www2.www2foo.com")); } + @Test public void testGetDefaultPort() { assertEquals(0,IAURLCanonicalizer.getDefaultPort("foo")); assertEquals(80,IAURLCanonicalizer.getDefaultPort("http")); assertEquals(443,IAURLCanonicalizer.getDefaultPort("https")); } - + + @Test public void testStripSessionId() throws URISyntaxException { IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); compCan(iaC, diff --git a/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java b/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java index 3c131105..175491fd 100644 --- a/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java @@ -2,11 +2,14 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class OrdinaryIAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class OrdinaryIAURLCanonicalizerTest { private OrdinaryIAURLCanonicalizer canon = new OrdinaryIAURLCanonicalizer(); - + + @Test public void testMisc() throws URISyntaxException { checkCanonicalization("http://...host..com..", "http://host.com/"); checkCanonicalization("http://example.org:80/", "http://example.org/"); @@ -17,6 +20,7 @@ public void testMisc() throws URISyntaxException { checkCanonicalization("http://example.org/foo/?", "http://example.org/foo/"); } + @Test public void testSchemeCapitals() throws URISyntaxException { checkCanonicalization("Http://example.com", "http://example.com/"); checkCanonicalization("HTTP://example.com", "http://example.com/"); diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java index 68dfcd23..ff99fe38 100644 --- a/src/test/java/org/archive/url/URLParserTest.java +++ b/src/test/java/org/archive/url/URLParserTest.java @@ -4,13 +4,15 @@ import java.net.URISyntaxException; import java.net.URLDecoder; -import junit.framework.TestCase; - import org.apache.commons.httpclient.URIException; import com.google.common.net.InetAddresses; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; -public class URLParserTest extends TestCase { +public class URLParserTest { + @Test public void testGuava() throws URIException, UnsupportedEncodingException { Long l = Long.parseLong("3279880203"); int i2 = l.intValue(); @@ -18,6 +20,7 @@ public void testGuava() throws URIException, UnsupportedEncodingException { System.err.format("FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress()); } + @Test public void testAddDefaultSchemeIfNeeded() { assertEquals(null,URLParser.addDefaultSchemeIfNeeded(null)); assertEquals("http://",URLParser.addDefaultSchemeIfNeeded("")); @@ -27,7 +30,7 @@ public void testAddDefaultSchemeIfNeeded() { assertEquals("http://www.fool.com/",URLParser.addDefaultSchemeIfNeeded("www.fool.com/")); } - + @Test public void testParse() throws UnsupportedEncodingException, URISyntaxException { System.out.format("O(%s) E(%s)\n","%66",URLDecoder.decode("%66","UTF-8")); checkParse("http://www.archive.org/index.html#foo", diff --git a/src/test/java/org/archive/url/URLRegexTransformerTest.java b/src/test/java/org/archive/url/URLRegexTransformerTest.java index 71979d06..01e97aac 100644 --- a/src/test/java/org/archive/url/URLRegexTransformerTest.java +++ b/src/test/java/org/archive/url/URLRegexTransformerTest.java @@ -3,10 +3,13 @@ import org.apache.commons.httpclient.URIException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class URLRegexTransformerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class URLRegexTransformerTest { + + @Test public void testStripPathSessionID() { // strip jsessionid // String sid1 = "jsessionid=0123456789abcdefghijklemopqrstuv"; @@ -48,115 +51,117 @@ public void testStripPathSessionID() { private static void checkStripPathSessionID(String orig, String want) { String got = URLRegexTransformer.stripPathSessionID(orig); - assertTrue(String.format("FAIL Orig(%s) Got(%s) Want(%s)",orig,got,want),want.equals(got)); + assertEquals(want, got, String.format("FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want)); } // private static final String BASE = "http://www.archive.org/index.html"; private static final String BASE = ""; + @Test public void testStripQuerySessionID() throws URIException { String str32id = "0123456789abcdefghijklemopqrstuv"; String url = BASE + "?jsessionid=" + str32id; String expectedResult = BASE + "?"; String result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test that we don't strip if not 32 chars only. url = BASE + "?jsessionid=" + str32id + '0'; expectedResult = url; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test what happens when followed by another key/value pair. url = BASE + "?jsessionid=" + str32id + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed (" + result + ")", expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed (" + result + ")"); // Test what happens when followed by another key/value pair and // prefixed by a key/value pair. url = BASE + "?one=two&jsessionid=" + str32id + "&x=y"; expectedResult = BASE + "?one=two&x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test what happens when prefixed by a key/value pair. url = BASE + "?one=two&jsessionid=" + str32id; expectedResult = BASE + "?one=two&"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test aspsession. url = BASE + "?aspsessionidABCDEFGH=" + "ABCDEFGHIJKLMNOPQRSTUVWX" + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test archive phpsession. url = BASE + "?phpsessid=" + str32id + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // With prefix too. url = BASE + "?one=two&phpsessid=" + str32id + "&x=y"; expectedResult = BASE + "?one=two&x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // With only prefix url = BASE + "?one=two&phpsessid=" + str32id; expectedResult = BASE + "?one=two&"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test sid. url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Igor test. url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&" + "jsessionid=" + str32id; expectedResult = BASE + "?"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=1169580&CFTOKEN=48630702&dtstamp=22%2F08%2F2006%7C06%3A58%3A11"; expectedResult = "?dtstamp=22%2F08%2F2006%7C06%3A58%3A11"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=12412453&CFTOKEN=15501799&dt=19_08_2006_22_39_28"; expectedResult = "?dt=19_08_2006_22_39_28"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=14475712&CFTOKEN=2D89F5AF-3048-2957-DA4EE4B6B13661AB&r=468710288378&m=forgotten"; expectedResult = "?r=468710288378&m=forgotten"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=16603925&CFTOKEN=2AE13EEE-3048-85B0-56CEDAAB0ACA44B8"; expectedResult = "?"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=4308017&CFTOKEN=63914124&requestID=200608200458360%2E39414378"; expectedResult = "?requestID=200608200458360%2E39414378"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); } - + + @Test public void testSURT() { assertEquals("org,archive,www",URLRegexTransformer.hostToSURT("www.archive.org")); } diff --git a/src/test/java/org/archive/url/UsableURIFactoryTest.java b/src/test/java/org/archive/url/UsableURIFactoryTest.java index 73f2b6db..368cc93d 100644 --- a/src/test/java/org/archive/url/UsableURIFactoryTest.java +++ b/src/test/java/org/archive/url/UsableURIFactoryTest.java @@ -19,15 +19,13 @@ package org.archive.url; -import java.util.Iterator; import java.util.TreeMap; -import junit.framework.TestCase; - import org.apache.commons.httpclient.URIException; import org.apache.commons.lang.SerializationUtils; -import org.archive.url.UsableURI; -import org.archive.url.UsableURIFactory; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Test UURIFactory for proper UURI creation across variety of @@ -37,8 +35,9 @@ * * @author igor stack gojomo */ -public class UsableURIFactoryTest extends TestCase { - +public class UsableURIFactoryTest { + + @Test public final void testEscaping() throws URIException { // Note: single quote is not being escaped by URI class. final String ESCAPED_URISTR = "http://archive.org/" + @@ -64,44 +63,46 @@ public final void testEscaping() throws URIException { UsableURI uuri = UsableURIFactory.getInstance(URISTR); final String uuriStr = uuri.toString(); - assertEquals("expected escaping", ESCAPED_URISTR, uuriStr); + assertEquals(ESCAPED_URISTR, uuriStr, "expected escaping"); } + @Test public final void testUnderscoreMakesPortParseFail() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://one-two_three:8080/index.html"); int port = uuri.getPort(); - assertTrue("Failed find of port " + uuri, port == 8080); + assertEquals(8080, port, "Failed find of port " + uuri); } - + + @Test public final void testRelativeURIWithTwoSlashes() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.archive.org"); UsableURI uuri = UsableURIFactory.getInstance(base, "one//index.html"); - assertTrue("Doesn't do right thing with two slashes " + uuri, - uuri.toString().equals( - "http://www.archive.org/one//index.html")); + assertEquals("http://www.archive.org/one//index.html", uuri.toString(), + "Doesn't do right thing with two slashes " + uuri); } - + + @Test public final void testSchemelessURI() throws URIException { UsableURI base = UsableURIFactory.getInstance("https://www.archive.org"); UsableURI uuri = UsableURIFactory.getInstance(base, "//example.com/monkey?this:uri:has:colons"); - assertTrue("Doesn't do right thing with a schemeless URI " + uuri, - uuri.toString().equals( - "https://example.com/monkey?this:uri:has:colons")); + assertEquals("https://example.com/monkey?this:uri:has:colons", uuri.toString(), + "Doesn't do right thing with a schemeless URI " + uuri); } - + + @Test public final void testTrailingEncodedSpace() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.nps-shoes.co.uk%20"); - assertTrue("Doesn't strip trailing encoded space 1 " + uuri, - uuri.toString().equals("http://www.nps-shoes.co.uk/")); + assertEquals("http://www.nps-shoes.co.uk/", uuri.toString(), + "Doesn't strip trailing encoded space 1 " + uuri); uuri = UsableURIFactory.getInstance("http://www.nps-shoes.co.uk%20%20%20"); - assertTrue("Doesn't strip trailing encoded space 2 " + uuri, - uuri.toString().equals("http://www.nps-shoes.co.uk/")); + assertEquals("http://www.nps-shoes.co.uk/", uuri.toString(), + "Doesn't strip trailing encoded space 2 " + uuri); } - + + @Test public final void testPort0080is80() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://archive.org:0080"); - assertTrue("Doesn't strip leading zeros " + uuri, - uuri.toString().equals("http://archive.org/")); + assertEquals("http://archive.org/", uuri.toString(), "Doesn't strip leading zeros " + uuri); } // DISABLING TEST AS PRECURSOR TO ELIMINATION @@ -127,13 +128,15 @@ public final void testPort0080is80() throws URIException { // } // assertNotNull("Didn't get expected exception.", message); // } - + + @Test public final void testEscapeEncoding() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.y1y1.com/" + "albums/userpics/11111/normal_%E3%E4%EC%EC%EC.jpg", "windows-1256"); uuri.getPath(); } - + + @Test public final void testTooLongAfterEscaping() { StringBuffer buffer = new StringBuffer("http://www.archive.org/a/"); // Append bunch of spaces. When escaped, they'll triple in size. @@ -147,36 +150,36 @@ public final void testTooLongAfterEscaping() { } catch (URIException e) { message = e.getMessage(); } - assertTrue("Wrong or no exception: " + message, (message != null) && - message.startsWith("Created (escaped) uuri >")); + assertTrue((message != null) && message.startsWith("Created (escaped) uuri >"), + "Wrong or no exception: " + message); } - + + @Test public final void testFtpUris() throws URIException { final String FTP = "ftp"; final String AUTHORITY = "pfbuser:pfbuser@mprsrv.agri.gov.cn"; final String PATH = "/clzreceive/"; final String uri = FTP + "://" + AUTHORITY + PATH; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertTrue("Failed to get matching scheme: " + uuri.getScheme(), - (uuri.getScheme()).equals(FTP)); - assertTrue("Failed to get matching authority: " + - uuri.getAuthority(), (uuri.getAuthority()).equals(AUTHORITY)); - assertTrue("Failed to get matching path: " + - uuri.getPath(), (uuri.getPath()).equals(PATH)); + assertEquals(FTP, (uuri.getScheme()), "Failed to get matching scheme: " + uuri.getScheme()); + assertEquals(AUTHORITY, (uuri.getAuthority()), "Failed to get matching authority: " + + uuri.getAuthority()); + assertEquals(PATH, (uuri.getPath()), "Failed to get matching path: " + + uuri.getPath()); } - + + @Test public final void testWhitespaceEscaped() throws URIException { // Test that we get all whitespace even if the uri is // already escaped. String uri = "http://archive.org/index%25 .html"; String tgtUri = "http://archive.org/index%25%20.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertTrue("Not equal " + uuri.toString(), - uuri.toString().equals(tgtUri)); + assertEquals(tgtUri, uuri.toString(), "Not equal " + uuri); uri = "http://archive.org/index%25\u001D.html"; tgtUri = "http://archive.org/index%25%1D.html"; uuri = UsableURIFactory.getInstance(uri); - assertEquals("whitespace escaping", tgtUri, uuri.toString()); + assertEquals(tgtUri, uuri.toString(), "whitespace escaping"); uri = "http://gemini.info.usaid.gov/directory/" + "pbResults.cfm?&urlNameLast=Rumplestiltskin"; tgtUri = "http://gemini.info.usaid.gov/directory/faxResults.cfm?" + @@ -184,13 +187,13 @@ public final void testWhitespaceEscaped() throws URIException { uuri = UsableURIFactory.getInstance(UsableURIFactory.getInstance(uri), "faxResults.cfm?name=Ebenezer +Rumplestiltskin,&location=" + "RRB%20%20%20%205%2E08%2D006"); - assertEquals("whitespace escaping", tgtUri, uuri.toString()); + assertEquals(tgtUri, uuri.toString(), "whitespace escaping"); // https://webarchive.jira.com/browse/HER-2089 uri = "http://archive.org/index%25\u3000.html"; tgtUri = "http://archive.org/index%25%E3%80%80.html"; uuri = UsableURIFactory.getInstance(uri); - assertEquals("U+3000 ideographic space escaping", tgtUri, uuri.toString()); + assertEquals(tgtUri, uuri.toString(), "U+3000 ideographic space escaping"); } // public final void testFailedGetPath() throws URIException { @@ -203,44 +206,48 @@ public final void testWhitespaceEscaped() throws URIException { // String foundPath = uuri.getPath(); // assertEquals("unexpected path", path, foundPath); // } - + + @Test public final void testDnsHost() throws URIException { String uri = "dns://ads.nandomedia.com:81/one.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); String host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); uri = "dns:ads.nandomedia.com"; uuri = UsableURIFactory.getInstance(uri); host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); uri = "dns:ads.nandomedia.com?a=b"; uuri = UsableURIFactory.getInstance(uri); host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); } - + + @Test public final void testPercentEscaping() throws URIException { final String uri = "http://archive.org/%a%%%%%.html"; // tests indicate firefox (1.0.6) does not encode '%' at all final String tgtUri = "http://archive.org/%a%%%%%.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Not equal",tgtUri, uuri.toString()); + assertEquals(tgtUri,uuri.toString(), "Not equal"); } - + + @Test public final void testRelativeDblPathSlashes() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.archive.org/index.html"); UsableURI uuri = UsableURIFactory.getInstance(base, "JIGOU//KYC//INDEX.HTM"); - assertTrue("Double slash not working " + uuri.toString(), - uuri.getPath().equals("/JIGOU//KYC//INDEX.HTM")); + assertEquals("/JIGOU//KYC//INDEX.HTM", uuri.getPath(), "Double slash not working " + uuri); } - + + @Test public final void testRelativeWithScheme() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/some/page"); UsableURI uuri = UsableURIFactory.getInstance(base, "http:boo"); - assertTrue("Relative with scheme not working " + uuri.toString(), - uuri.toString().equals("http://www.example.com/some/boo")); + assertEquals("http://www.example.com/some/boo", uuri.toString(), + "Relative with scheme not working " + uuri); } - + + @Test public final void testBadBaseResolve() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://license.joins.com/board/" + "etc_board_list.asp?board_name=new_main&b_type=&nPage=" + @@ -248,29 +255,29 @@ public final void testBadBaseResolve() throws URIException { "notice&gate=02"); UsableURIFactory.getInstance(base, "http://www.changeup.com/...[ 1010966 ] crawl.log has URIs with spaces in them. - * @throws URIException */ + @Test public final void testSpaceDoubleEncoding() throws URIException { final String uri = "http://www.brook.edu/i.html? %20taxonomy=Politics"; final String encodedUri = "http://www.brook.edu/i.html?%20%20taxonomy=Politics"; UsableURI uuri = UsableURIFactory.getInstance(uri, "ISO-8859-1"); - assertTrue("Not equal " + uuri.toString(), - uuri.toString().equals(encodedUri)); + assertEquals(encodedUri, uuri.toString(), "Not equal " + uuri.toString()); } /** * Test for doubly-encoded sequences. * See [ 966219 ] UURI doubly-encodes %XX sequences. - * @throws URIException */ + @Test public final void testDoubleEncoding() throws URIException { final char ae = '\u00E6'; final String uri = "http://archive.org/DIR WITH SPACES/home" + @@ -316,20 +322,20 @@ public final void testDoubleEncoding() throws URIException { final String encodedUri = "http://archive.org/DIR%20WITH%20SPACES/home%E6.html"; UsableURI uuri = UsableURIFactory.getInstance(uri, "ISO-8859-1"); - assertEquals("single encoding", encodedUri, uuri.toString()); + assertEquals(encodedUri, uuri.toString(), "single encoding"); // Dbl-encodes. uuri = UsableURIFactory.getInstance(uuri.toString(), "ISO-8859-1"); uuri = UsableURIFactory.getInstance(uuri.toString(), "ISO-8859-1"); - assertEquals("double encoding", encodedUri, uuri.toString()); + assertEquals(encodedUri, uuri.toString(), "double encoding"); // Do default utf-8 test. uuri = UsableURIFactory.getInstance(uri); final String encodedUtf8Uri = "http://archive.org/DIR%20WITH%20SPACES/home%C3%A6.html"; - assertEquals("Not equal utf8", encodedUtf8Uri, uuri.toString()); + assertEquals(encodedUtf8Uri, uuri.toString(), "Not equal utf8"); // Now dbl-encode. uuri = UsableURIFactory.getInstance(uuri.toString()); uuri = UsableURIFactory.getInstance(uuri.toString()); - assertEquals("Not equal (dbl-encoding) utf8", encodedUtf8Uri, uuri.toString()); + assertEquals(encodedUtf8Uri, uuri.toString(), "Not equal (dbl-encoding) utf8"); } /** @@ -337,26 +343,25 @@ public final void testDoubleEncoding() throws URIException { * @see [ 788219 ] URI Syntax Errors stop page parsing * @throws URIException */ + @Test public final void testThreeSlashes() throws URIException { UsableURI goodURI = UsableURIFactory. getInstance("http://lcweb.loc.gov/rr/goodtwo.html"); String uuri = "http:///lcweb.loc.gov/rr/goodtwo.html"; UsableURI rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); uuri = "http:////lcweb.loc.gov/rr/goodtwo.html"; rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); // Check https. goodURI = UsableURIFactory. getInstance("https://lcweb.loc.gov/rr/goodtwo.html"); uuri = "https:////lcweb.loc.gov/rr/goodtwo.html"; rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); } - + + @Test public final void testNoScheme() { boolean expectedException = false; String uuri = "www.loc.gov/rr/european/egw/polishex.html"; @@ -366,10 +371,10 @@ public final void testNoScheme() { // Expected exception. expectedException = true; } - assertTrue("Didn't get expected exception: " + uuri, - expectedException); + assertTrue(expectedException, "Didn't get expected exception: " + uuri); } - + + @Test public final void testRelative() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/home.html"); @@ -377,26 +382,25 @@ public final void testRelative() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, "/home.html"); - assertTrue("Not equal", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Not equal"); } - + + @Test public void testSchemelessRelative() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.itsnicethat.com/articles/laura-hobson"); UsableURI test1 = UsableURIFactory.getInstance(base, "//www.facebook.com/plugins/like.php"); - assertEquals("schemaless relative 1", "http://www.facebook.com/plugins/like.php", test1.toString()); + assertEquals("http://www.facebook.com/plugins/like.php", test1.toString(), "schemaless relative 1"); // reported by Erin Staniland UsableURI test2 = UsableURIFactory.getInstance(base, "//www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson"); - assertEquals("schemeless relative 2", "http://www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson", - test2.toString()); + assertEquals("http://www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson", test2.toString(), + "schemeless relative 2"); } /** * Test that an empty uuri does the right thing -- that we get back the * base. - * - * @throws URIException */ + @Test public final void testRelativeEmpty() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/one/two/three.html"); @@ -404,10 +408,10 @@ public final void testRelativeEmpty() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, ""); - assertTrue("Empty length don't work", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Empty length don't work"); } - + + @Test public final void testAbsolute() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/home.html"); @@ -415,14 +419,14 @@ public final void testAbsolute() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, "http://archive.org:83/home.html"); - assertTrue("Not equal", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Not equal"); } /** * Test for [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts). * @see [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts) */ + @Test public final void testHostWithLessThan() { checkExceptionOnIllegalDomainlabel("http://www.betamobile.com[ 1012520 ] UURI.length() > 2k */ + @Test public final void test2kURI() throws URIException { final StringBuffer buffer = new StringBuffer("http://a.b"); final String subPath = "/123456789"; @@ -451,8 +455,7 @@ public final void test2kURI() throws URIException { } catch (URIException e) { gotException = true; } - assertTrue("No expected exception complaining about long URI", - gotException); + assertTrue(gotException, "No expected exception complaining about long URI"); } private void checkExceptionOnIllegalDomainlabel(String uuri) { @@ -463,23 +466,21 @@ private void checkExceptionOnIllegalDomainlabel(String uuri) { // Expected exception. expectedException = true; } - assertTrue("Didn't get expected exception: " + uuri, - expectedException); + assertTrue(expectedException, "Didn't get expected exception: " + uuri); } /** * Test for doing separate DNS lookup for same host * * @see [ 788277 ] Doing separate DNS lookup for same host - * @throws URIException */ + @Test public final void testHostWithPeriod() throws URIException { UsableURI uuri1 = UsableURIFactory. getInstance("http://www.loc.gov./index.html"); UsableURI uuri2 = UsableURIFactory. getInstance("http://www.loc.gov/index.html"); - assertEquals("Failed equating hosts with dot", - uuri1.getHost(), uuri2.getHost()); + assertEquals(uuri1.getHost(), uuri2.getHost(), "Failed equating hosts with dot"); } /** @@ -488,12 +489,12 @@ public final void testHostWithPeriod() throws URIException { * @see [ 874220 ] NPE in java.net.URI.encode * @throws URIException */ + @Test public final void testHostEncodedChars() throws URIException { String s = "http://g.msn.co.kr/0nwkokr0/00/19??" + "PS=10274&NC=10009&CE=42&CP=949&HL=" + "���?��"; - assertNotNull("Encoded chars " + s, - UsableURIFactory.getInstance(s)); + assertNotNull(UsableURIFactory.getInstance(s), "Encoded chars " + s); } /** @@ -501,6 +502,7 @@ public final void testHostEncodedChars() throws URIException { * * See [ 927940 ] java.net.URI parses %20 but getHost null */ + @Test public final void testSpaceInHost() { boolean expectedException = false; try { @@ -510,7 +512,7 @@ public final void testSpaceInHost() { } catch (URIException e) { expectedException = true; } - assertTrue("Did not fail with escaped space.", expectedException); + assertTrue(expectedException, "Did not fail with escaped space."); expectedException = false; try { @@ -520,26 +522,27 @@ public final void testSpaceInHost() { } catch (URIException e) { expectedException = true; } - assertTrue("Did not fail with real space.", expectedException); + assertTrue(expectedException, "Did not fail with real space."); } /** * Test for java.net.URI chokes on hosts_with_underscores. * * @see [ 808270 ] java.net.URI chokes on hosts_with_underscores - * @throws URIException - */ + */ + @Test public final void testHostWithUnderscores() throws URIException { UsableURI uuri = UsableURIFactory.getInstance( "http://x_underscore_underscore.2u.com.tw/nonexistent_page.html"); - assertEquals("Failed get of host with underscore", - "x_underscore_underscore.2u.com.tw", uuri.getHost()); + assertEquals("x_underscore_underscore.2u.com.tw", + uuri.getHost(), "Failed get of host with underscore"); } /** * Two dots for igor. */ + @Test public final void testTwoDots() { boolean expectedException = false; try { @@ -548,20 +551,19 @@ public final void testTwoDots() { } catch (URIException e) { expectedException = true; } - assertTrue("Two dots did not throw exception", expectedException); + assertTrue(expectedException, "Two dots did not throw exception"); } /** * Test for java.net.URI#getHost fails when leading digit. * * @see [ 910120 ] java.net.URI#getHost fails when leading digit. - * @throws URIException */ + @Test public final void testHostWithDigit() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://0204chat.2u.com.tw/nonexistent_page.html"); - assertEquals("Failed get of host with digit", - "0204chat.2u.com.tw", uuri.getHost()); + assertEquals("0204chat.2u.com.tw", uuri.getHost(), "Failed get of host with digit"); } /** @@ -569,6 +571,7 @@ public final void testHostWithDigit() throws URIException { * * @see [ 949548 ] Constraining java URI class */ + @Test public final void testPort() { checkBadPort("http://www.tyopaikat.com:a/robots.txt"); checkBadPort("http://158.144.21.3:80808/robots.txt"); @@ -591,19 +594,18 @@ private void checkBadPort(String uri) { catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception: " + uri, exception); + assertTrue(exception, "Didn't throw exception: " + uri); } /** * Preserve userinfo capitalization. - * @throws URIException */ + @Test public final void testUserinfo() throws URIException { final String authority = "stack:StAcK@www.tyopaikat.com"; final String uri = "http://" + authority + "/robots.txt"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Authority not equal", uuri.getAuthority(), - authority); + assertEquals(authority, uuri.getAuthority(), "Authority not equal"); /* String tmp = uuri.toString(); assertTrue("URI not equal", tmp.equals(uri)); @@ -612,8 +614,8 @@ public final void testUserinfo() throws URIException { /** * Test user info + port - * @throws URIException */ + @Test public final void testUserinfoPlusPort() throws URIException { final String userInfo = "stack:StAcK"; final String authority = "www.tyopaikat.com"; @@ -621,14 +623,13 @@ public final void testUserinfoPlusPort() throws URIException { final String uri = "http://" + userInfo + "@" + authority + ":" + port + "/robots.txt"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Host not equal", authority,uuri.getHost()); - assertEquals("Userinfo Not equal",userInfo,uuri.getUserinfo()); - assertEquals("Port not equal",port,uuri.getPort()); - assertEquals("Authority wrong","stack:StAcK@www.tyopaikat.com:8080", - uuri.getAuthority()); - assertEquals("AuthorityMinusUserinfo wrong","www.tyopaikat.com:8080", - uuri.getAuthorityMinusUserinfo()); - + assertEquals(authority, uuri.getHost(),"Host not equal"); + assertEquals(userInfo,uuri.getUserinfo(),"Userinfo Not equal"); + assertEquals(port,uuri.getPort(),"Port not equal"); + assertEquals("stack:StAcK@www.tyopaikat.com:8080",uuri.getAuthority(), + "Authority wrong"); + assertEquals("www.tyopaikat.com:8080",uuri.getAuthorityMinusUserinfo(), + "AuthorityMinusUserinfo wrong"); } public final void testRFC3986RelativeChange() throws URIException { @@ -664,9 +665,8 @@ public final void testRFC3986RelativeChange() throws URIException { * "../../" = "http://a/" * "../../g" = "http://a/g" * - * - * @throws URIException */ + @Test public final void testRFC3986Relative() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://a/b/c/d;p?q"); tryRelative(base, "g:h", "g:h"); @@ -697,9 +697,8 @@ public final void testRFC3986Relative() throws URIException { protected void tryRelative(UsableURI base, String relative, String expected) throws URIException { UsableURI uuri = UsableURIFactory.getInstance(base, relative); - assertEquals("Derelativized " + relative + " gave " - + uuri + " not " + expected, - UsableURIFactory.getInstance(expected),uuri); + assertEquals(UsableURIFactory.getInstance(expected), uuri,"Derelativized " + relative + " gave " + + uuri + " not " + expected); } /** @@ -730,9 +729,8 @@ protected void tryRelative(UsableURI base, String relative, String expected) * ../../ = http://a/ * ../../g = http://a/g * - * - * @throws URIException */ + @Test public final void testRFC2396Relative() throws URIException { UsableURI base = UsableURIFactory. getInstance("http://a/b/c/d;p?q"); @@ -764,13 +762,11 @@ public final void testRFC2396Relative() throws URIException { m.put("/../../../../../../../../g", "http://a/g"); m.put("../../../../../../../../g", "http://a/g"); m.put("../G", "http://a/b/G"); - for (Iterator i = m.keySet().iterator(); i.hasNext();) { - String key = (String)i.next(); - String value = (String)m.get(key); - UsableURI uuri = UsableURIFactory.getInstance(base, key); - assertTrue("Unexpected " + key + " " + value + " " + uuri, - uuri.equals(UsableURIFactory.getInstance(value))); - } + for (String key : m.keySet()) { + String value = m.get(key); + UsableURI uuri = UsableURIFactory.getInstance(base, key); + assertEquals(uuri, UsableURIFactory.getInstance(value), "Unexpected " + key + " " + value + " " + uuri); + } } /** @@ -778,14 +774,13 @@ public final void testRFC2396Relative() throws URIException { * unused and irrelevant for network fetches. * * See [ 970666 ] #anchor links not trimmed, and thus recrawled - * - * @throws URIException */ + @Test public final void testAnchors() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); - assertEquals("Not equal", "http://www.example.com/path?query", - uuri.toString()); + assertEquals("http://www.example.com/path?query", uuri.toString(), + "Not equal"); } @@ -793,50 +788,47 @@ public final void testAnchors() throws URIException { * Ensure that URI strings beginning with a colon are treated * the same as browsers do (as relative, rather than as absolute * with zero-length scheme). - * - * @throws URIException */ + @Test public void testStartsWithColon() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/path/page"); UsableURI uuri = UsableURIFactory.getInstance(base,":foo"); - assertEquals("derelativize starsWithColon", + assertEquals("http://www.example.com/path/:foo", uuri.getURI(), - "http://www.example.com/path/:foo"); + "derelativize starsWithColon"); } /** * Ensure that relative URIs with colons in late positions * aren't mistakenly interpreted as absolute URIs with long, * illegal schemes. - * - * @throws URIException */ + @Test public void testLateColon() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/path/page"); UsableURI uuri1 = UsableURIFactory.getInstance(base,"example.html;jsessionid=deadbeef:deadbeed?parameter=this:value"); - assertEquals("derelativize lateColon", + assertEquals("http://www.example.com/path/example.html;jsessionid=deadbeef:deadbeed?parameter=this:value", uuri1.getURI(), - "http://www.example.com/path/example.html;jsessionid=deadbeef:deadbeed?parameter=this:value"); + "derelativize lateColon"); UsableURI uuri2 = UsableURIFactory.getInstance(base,"example.html?parameter=this:value"); - assertEquals("derelativize lateColon", + assertEquals("http://www.example.com/path/example.html?parameter=this:value", uuri2.getURI(), - "http://www.example.com/path/example.html?parameter=this:value"); + "derelativize lateColon"); } /** * Ensure that stray trailing '%' characters do not prevent * UURI instances from being created, and are reasonably * escaped when encountered. - * - * @throws URIException */ + @Test public void testTrailingPercents() throws URIException { String plainPath = "http://www.example.com/path%"; UsableURI plainPathUuri = UsableURIFactory.getInstance(plainPath); - assertEquals("plainPath getURI", plainPath, plainPathUuri.getURI()); - assertEquals("plainPath getEscapedURI", - "http://www.example.com/path%", // browsers don't escape '%' - plainPathUuri.getEscapedURI()); + assertEquals(plainPath, plainPathUuri.getURI(), "plainPath getURI"); + assertEquals("http://www.example.com/path%", + plainPathUuri.getEscapedURI(), // browsers don't escape '%' + "plainPath getEscapedURI"); String partiallyEscapedPath = "http://www.example.com/pa%20th%"; UsableURI partiallyEscapedPathUuri = UsableURIFactory.getInstance( @@ -845,9 +837,9 @@ public void testTrailingPercents() throws URIException { // "http://www.example.com/pa th%", // TODO: is this desirable? //// partiallyEscapedPath, // partiallyEscapedPathUuri.getURI()); - assertEquals("partiallyEscapedPath getEscapedURI", - "http://www.example.com/pa%20th%", - partiallyEscapedPathUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa%20th%", + partiallyEscapedPathUuri.getEscapedURI(), + "partiallyEscapedPath getEscapedURI"); String plainQueryString = "http://www.example.com/path?q=foo%"; UsableURI plainQueryStringUuri = UsableURIFactory.getInstance( @@ -855,58 +847,58 @@ public void testTrailingPercents() throws URIException { // assertEquals("plainQueryString getURI", // plainQueryString, // plainQueryStringUuri.getURI()); - assertEquals("plainQueryString getEscapedURI", - "http://www.example.com/path?q=foo%", - plainQueryStringUuri.getEscapedURI()); + assertEquals("http://www.example.com/path?q=foo%", + plainQueryStringUuri.getEscapedURI(), + "plainQueryString getEscapedURI"); String partiallyEscapedQueryString = "http://www.example.com/pa%20th?q=foo%"; UsableURI partiallyEscapedQueryStringUuri = UsableURIFactory.getInstance( partiallyEscapedQueryString); - assertEquals("partiallyEscapedQueryString getURI", - "http://www.example.com/pa th?q=foo%", - partiallyEscapedQueryStringUuri.getURI()); - assertEquals("partiallyEscapedQueryString getEscapedURI", - "http://www.example.com/pa%20th?q=foo%", - partiallyEscapedQueryStringUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa th?q=foo%", + partiallyEscapedQueryStringUuri.getURI(), + "partiallyEscapedQueryString getURI"); + assertEquals("http://www.example.com/pa%20th?q=foo%", + partiallyEscapedQueryStringUuri.getEscapedURI(), + "partiallyEscapedQueryString getEscapedURI"); } /** * Ensure that stray '%' characters do not prevent * UURI instances from being created, and are reasonably * escaped when encountered. - * - * @throws URIException */ + @Test public void testStrayPercents() throws URIException { String oneStray = "http://www.example.com/pa%th"; UsableURI oneStrayUuri = UsableURIFactory.getInstance(oneStray); - assertEquals("oneStray getURI", oneStray, oneStrayUuri.getURI()); - assertEquals("oneStray getEscapedURI", - "http://www.example.com/pa%th", // browsers don't escape '%' - oneStrayUuri.getEscapedURI()); + assertEquals(oneStray, oneStrayUuri.getURI(), "oneStray getURI"); + assertEquals("http://www.example.com/pa%th", + oneStrayUuri.getEscapedURI(), // browsers don't escape '%' + "oneStray getEscapedURI"); String precededByValidEscape = "http://www.example.com/pa%20th%way"; UsableURI precededByValidEscapeUuri = UsableURIFactory.getInstance( precededByValidEscape); - assertEquals("precededByValidEscape getURI", - "http://www.example.com/pa th%way", // getURI interprets escapes - precededByValidEscapeUuri.getURI()); - assertEquals("precededByValidEscape getEscapedURI", - "http://www.example.com/pa%20th%way", - precededByValidEscapeUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa th%way", + precededByValidEscapeUuri.getURI(), // getURI interprets escapes + "precededByValidEscape getURI"); + assertEquals("http://www.example.com/pa%20th%way", + precededByValidEscapeUuri.getEscapedURI(), + "precededByValidEscape getEscapedURI"); String followedByValidEscape = "http://www.example.com/pa%th%20way"; UsableURI followedByValidEscapeUuri = UsableURIFactory.getInstance( followedByValidEscape); - assertEquals("followedByValidEscape getURI", - "http://www.example.com/pa%th way", // getURI interprets escapes - followedByValidEscapeUuri.getURI()); - assertEquals("followedByValidEscape getEscapedURI", - "http://www.example.com/pa%th%20way", - followedByValidEscapeUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa%th way", + followedByValidEscapeUuri.getURI(), // getURI interprets escapes + "followedByValidEscape getURI"); + assertEquals("http://www.example.com/pa%th%20way", + followedByValidEscapeUuri.getEscapedURI(), + "followedByValidEscape getEscapedURI"); } - + + @Test public void testEscapingNotNecessary() throws URIException { String escapesUnnecessary = "http://www.example.com/misc;reserved:chars@that&don't=need" @@ -914,42 +906,46 @@ public void testEscapingNotNecessary() throws URIException { // expect everything but the #fragment String expected = escapesUnnecessary.substring(0, escapesUnnecessary .length() - 3); - assertEquals("escapes unnecessary", - expected, - UsableURIFactory.getInstance(escapesUnnecessary).toString()); + assertEquals(expected, + UsableURIFactory.getInstance(escapesUnnecessary).toString(), + "escapes unnecessary"); } - + + @Test public void testIdn() throws URIException { // See http://www.josefsson.org/idn.php. // http://räksmörgås.josefßon.org/ String idn1 = "http://r\u00e4ksm\u00f6rg\u00e5s.josef\u00dfon.org/"; String puny1 = "http://xn--rksmrgs-5wao1o.josefsson.org/"; - assertEquals("encoding of " + idn1, puny1, UsableURIFactory - .getInstance(idn1).toString()); + assertEquals(puny1, UsableURIFactory + .getInstance(idn1).toString(), "encoding of " + idn1); // http://www.pølse.dk/ String idn2 = "http://www.p\u00f8lse.dk/"; String puny2 = "http://www.xn--plse-gra.dk/"; - assertEquals("encoding of " + idn2, puny2, UsableURIFactory - .getInstance(idn2).toString()); + assertEquals(puny2, UsableURIFactory + .getInstance(idn2).toString(), "encoding of " + idn2); // http://例子.測試 String idn3 = "http://\u4F8B\u5B50.\u6E2C\u8A66"; String puny3 = "http://xn--fsqu00a.xn--g6w251d/"; - assertEquals("encoding of " + idn3, puny3, UsableURIFactory - .getInstance(idn3).toString()); + assertEquals(puny3, UsableURIFactory + .getInstance(idn3).toString(), "encoding of " + idn3); } - + + @Test public void testNewLineInURL() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.ar\rchive\n." + "org/i\n\n\r\rndex.html"); assertEquals("http://www.archive.org/index.html", uuri.toString()); } - + + @Test public void testTabsInURL() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.ar\tchive\t." + "org/i\t\r\n\tndex.html"); assertEquals("http://www.archive.org/index.html", uuri.toString()); } - + + @Test public void testQueryEscaping() throws URIException { UsableURI uuri = UsableURIFactory.getInstance( "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'\";:/?.>,<"); @@ -963,50 +959,44 @@ public void testQueryEscaping() throws URIException { * Check that our 'normalization' does same as Nutch's * Below before-and-afters were taken from the nutch urlnormalizer-basic * TestBasicURLNormalizer class (December 2006, Nutch 0.9-dev). - * @throws URIException */ + @Test public void testSameAsNutchURLFilterBasic() throws URIException { - assertEquals(UsableURIFactory.getInstance(" http://foo.com/ ").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance(" http://foo.com/ ").toString()); // check that protocol is lower cased - assertEquals(UsableURIFactory.getInstance("HTTP://foo.com/").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("HTTP://foo.com/").toString()); // check that host is lower cased - assertEquals(UsableURIFactory. - getInstance("http://Foo.Com/index.html").toString(), - "http://foo.com/index.html"); - assertEquals(UsableURIFactory. - getInstance("http://Foo.Com/index.html").toString(), - "http://foo.com/index.html"); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://Foo.Com/index.html").toString()); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://Foo.Com/index.html").toString()); // check that port number is normalized - assertEquals(UsableURIFactory. - getInstance("http://foo.com:80/index.html").toString(), - "http://foo.com/index.html"); - assertEquals(UsableURIFactory.getInstance("http://foo.com:81/").toString(), - "http://foo.com:81/"); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://foo.com:80/index.html").toString()); + assertEquals("http://foo.com:81/", + UsableURIFactory.getInstance("http://foo.com:81/").toString()); // check that null path is normalized - assertEquals(UsableURIFactory.getInstance("http://foo.com").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com").toString()); // check that references are removed - assertEquals(UsableURIFactory. - getInstance("http://foo.com/foo.html#ref").toString(), - "http://foo.com/foo.html"); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/foo.html#ref").toString()); // // check that encoding is normalized // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html"); // check that unnecessary "../" are removed - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../").toString(), - "http://foo.com/" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/../").toString(), - "http://foo.com/aa/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com/aa/../").toString()); + assertEquals("http://foo.com/aa/", + UsableURIFactory.getInstance("http://foo.com/aa/bb/../").toString()); /* We fail this one. Here we produce: 'http://foo.com/'. assertEquals(UURIFactory. @@ -1014,45 +1004,33 @@ public void testSameAsNutchURLFilterBasic() throws URIException { "http://foo.com/aa/.."); */ - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString(), - "http://foo.com/aa/foo.html"); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html"). - toString(), - "http://foo.com/aa/cc/ee/foo.html"); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../aa/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../bb/../foo.html/../../"). - toString(), - "http://foo.com/" ); - assertEquals(UsableURIFactory.getInstance("http://foo.com/../aa/foo.html"). - toString(), "http://foo.com/aa/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../aa/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/a..a/foo.html").toString(), - "http://foo.com/a..a/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/a..a/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/foo.foo/../foo.html").toString(), - "http://foo.com/foo.html" ); + assertEquals("http://foo.com/aa/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString()); + assertEquals("http://foo.com/aa/cc/ee/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/../../foo.html").toString()); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com/aa/../bb/../foo.html/../../").toString()); + assertEquals("http://foo.com/aa/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/../foo.html").toString()); + assertEquals("http://foo.com/a..a/foo.html", + UsableURIFactory.getInstance("http://foo.com/a..a/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/a..a/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/foo.foo/../foo.html").toString()); } - + + @Test public void testHttpSchemeColonSlash() { boolean exception = false; try { @@ -1060,16 +1038,17 @@ public void testHttpSchemeColonSlash() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); exception = false; try { UsableURIFactory.getInstance("http://"); } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); } - + + @Test public void testNakedHttpsSchemeColon() { boolean exception = false; try { @@ -1077,7 +1056,7 @@ public void testNakedHttpsSchemeColon() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); exception = false; try { UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); @@ -1085,15 +1064,14 @@ public void testNakedHttpsSchemeColon() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); } /** * Test motivated by [#HER-616] The UURI class may throw * NullPointerException in getReferencedHost() - * - * @throws URIException */ + @Test public void testMissingHttpColon() throws URIException { String suspectUri = "http//www.test.foo"; UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); @@ -1105,7 +1083,7 @@ public void testMissingHttpColon() throws URIException { // should get relative-uri-no-base exception exceptionThrown = true; } finally { - assertTrue("expected exception not thrown",exceptionThrown); + assertTrue(exceptionThrown,"expected exception not thrown"); } UsableURI goodUuri = UsableURIFactory.getInstance(base,suspectUri); goodUuri.getReferencedHost(); @@ -1114,33 +1092,31 @@ public void testMissingHttpColon() throws URIException { /** * A UURI's string representation should be same after a * serialization roundtrip. - * - * @throws URIException */ + @Test public final void testSerializationRoundtrip() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); UsableURI uuri2 = (UsableURI) SerializationUtils.deserialize( SerializationUtils.serialize(uuri)); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); uuri = UsableURIFactory. getInstance("file://///boo_hoo/wwwroot/CMS/Images1/Banner.gif"); uuri2 = (UsableURI) SerializationUtils.deserialize( SerializationUtils.serialize(uuri)); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); } /** * A UURI's string representation should be same after a * toCustomString-getInstance roundtrip. - * - * @throws URIException */ + @Test public final void testToCustomStringRoundtrip() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); UsableURI uuri2 = UsableURIFactory.getInstance(uuri.toCustomString()); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); // TODO: fix // see [HER-1470] UURI String roundtrip (UURIFactory.getInstance(uuri.toString()) results in different URI for file: (and perhaps other) URIs // http://webteam.archive.org/jira/browse/HER-1470 @@ -1153,9 +1129,8 @@ public final void testToCustomStringRoundtrip() throws URIException { /** * A UURI's string representation should be same after a * toCustomString-getInstance roundtrip. - * - * @throws URIException */ + @Test public final void testHostnamePortRoundtrip() throws URIException { UsableURI base = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); @@ -1163,13 +1138,14 @@ public final void testHostnamePortRoundtrip() throws URIException { System.out.println("scheme:"+test.getScheme()); System.out.println(test.toCustomString()); UsableURI roundtrip = UsableURIFactory.getInstance(test.toCustomString()); - assertEquals("Not equal", test.toString(), roundtrip.toString()); + assertEquals(test.toString(), roundtrip.toString(), "Not equal"); } /** * Test bad port throws URIException not NumberFormatException */ + @Test public void testExtremePort() { try { UsableURI uuri = UsableURIFactory.getInstance("http://Tel.:010101010101"); @@ -1183,9 +1159,8 @@ public void testExtremePort() { /** * Bars ('|') in path-segments aren't encoded by FF, preferred by some * RESTful-URI-ideas guides, so should work without error. - * - * @throws URIException */ + @Test public void testBarsInRelativePath() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); String relative = "foo/bar|baz|yorple"; @@ -1197,9 +1172,8 @@ public void testBarsInRelativePath() throws URIException { * To match IE behavior, backslashes in path-info (really, anywhere before * query string) assumed to be slashes, to match IE behavior. In * query-string, they are escaped to %5C. - * - * @throws URIException */ + @Test public void testBackslashes() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http:\\/www.example.com\\a/b\\c/d?q\\r\\|s/t\\v"); String expected = "http://www.example.com/a/b/c/d?q%5Cr%5C|s/t%5Cv"; diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 2a2f41f5..9a4c1860 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -22,16 +22,20 @@ import org.apache.commons.httpclient.URIException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class UsableURITest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +public class UsableURITest { + @Test public void testHasScheme() { assertTrue(UsableURI.hasScheme("http://www.archive.org")); assertTrue(UsableURI.hasScheme("http:")); assertFalse(UsableURI.hasScheme("ht/tp://www.archive.org")); assertFalse(UsableURI.hasScheme("/tmp")); } - + + @Test public void testGetFileName() throws URISyntaxException { final String filename = "x.arc.gz"; assertEquals(filename, @@ -43,11 +47,12 @@ public void testGetFileName() throws URISyntaxException { UsableURI.parseFilename("rsync://archive.org/tmp/one.two/" + filename)); } - + + @Test public void testSchemalessRelative() throws URIException { UsableURI base = new UsableURI("http://www.archive.org/a", true, "UTF-8"); UsableURI relative = new UsableURI("//www.facebook.com/?href=http://www.archive.org/a", true, "UTF-8"); - assertEquals(null, relative.getScheme()); + assertNull(relative.getScheme()); assertEquals("www.facebook.com", relative.getAuthority()); UsableURI test = new UsableURI(base, relative); assertEquals("http://www.facebook.com/?href=http://www.archive.org/a", test.toString()); @@ -56,6 +61,7 @@ public void testSchemalessRelative() throws URIException { /** * Test of toUnicodeHostString method, of class UsableURI. */ + @Test public void testToUnicodeHostString() throws URIException { assertEquals("http://øx.dk", new UsableURI("http://xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); assertEquals("xn--x-4ga.dk", new UsableURI("xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); diff --git a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java index 26371ba8..28a00422 100644 --- a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java +++ b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java @@ -2,10 +2,13 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class WaybackURLKeyMakerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class WaybackURLKeyMakerTest { + + @Test public void testMakeKey() throws URISyntaxException { WaybackURLKeyMaker km = new WaybackURLKeyMaker(); assertEquals("-", km.makeKey(null)); diff --git a/src/test/java/org/archive/util/ArchiveUtilsTest.java b/src/test/java/org/archive/util/ArchiveUtilsTest.java index 586a1821..18337498 100644 --- a/src/test/java/org/archive/util/ArchiveUtilsTest.java +++ b/src/test/java/org/archive/util/ArchiveUtilsTest.java @@ -19,15 +19,16 @@ package org.archive.util; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; + import java.text.ParseException; import java.util.Date; import java.util.HashSet; import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; +import static org.junit.jupiter.api.Assertions.*; /** * JUnit test suite for ArchiveUtils @@ -35,47 +36,21 @@ * @author James Casey * @version $Id$ */ -public class ArchiveUtilsTest extends TestCase { - - /** - * Create a new ArchiveUtilsTest object - * - * @param testName the name of the test - */ - public ArchiveUtilsTest(final String testName) { - super(testName); - } - - /** - * run all the tests for ArchiveUtilsTest - * - * @param argv the command line arguments - */ - public static void main(String argv[]) { - junit.textui.TestRunner.run(suite()); - } - - /** - * return the suite of tests for ArchiveUtilsTest - * - * @return the suite of test - */ - public static Test suite() { - return new TestSuite(ArchiveUtilsTest.class); - } +public class ArchiveUtilsTest { /** check the getXXDigitDate() methods produce valid dates*/ + @Test public void testGetXXDigitDate() { // TODO - we only really test the date lengths here. How to test // other stuff well ? final String date12 = ArchiveUtils.get12DigitDate(); - assertEquals("12 digits", 12, date12.length()); + assertEquals(12, (Object) date12.length(), "12 digits"); final String date14 = ArchiveUtils.get14DigitDate(); - assertEquals("14 digits", 14, date14.length()); + assertEquals(14, (Object) date14.length(), "14 digits"); final String date17 = ArchiveUtils.get17DigitDate(); - assertEquals("17 digits", 17, date17.length()); + assertEquals(17, (Object) date17.length(), "17 digits"); // now parse, and check they're all within 1 minute @@ -93,18 +68,19 @@ public void testGetXXDigitDate() { } /** check that getXXDigitDate(long) does the right thing */ + @Test public void testGetXXDigitDateLong() { final long now = System.currentTimeMillis(); final String date12 = ArchiveUtils.get12DigitDate(now); - assertEquals("12 digits", 12, date12.length()); + assertEquals(12, (Object) date12.length(), "12 digits"); final String date14 = ArchiveUtils.get14DigitDate(now); - assertEquals("14 digits", 14, date14.length()); - assertEquals("first twelve digits same as date12", date12, date14.substring(0, 12)); + assertEquals(14, (Object) date14.length(), "14 digits"); + assertEquals(date12, date14.substring(0, 12), "first twelve digits same as date12"); final String date17 = ArchiveUtils.get17DigitDate(now); - assertEquals("17 digits", 17, date17.length()); - assertEquals("first twelve digits same as date12", date12, date17.substring(0, 12)); - assertEquals("first fourteen digits same as date14", date14, date17.substring(0, 14)); + assertEquals(17, (Object) date17.length(), "17 digits"); + assertEquals(date12, date17.substring(0, 12), "first twelve digits same as date12"); + assertEquals(date14, date17.substring(0, 14), "first fourteen digits same as date14"); } /** @@ -112,6 +88,7 @@ public void testGetXXDigitDateLong() { * * @throws ParseException */ + @Test public void testParseXXDigitDate() throws ParseException { // given a date, check it get resolved properly // It's 02 Jan 2004, 12:40:02.111 @@ -128,7 +105,8 @@ public void testParseXXDigitDate() throws ParseException { fail("Could not parse a date : " + e.getMessage()); } } - + + @Test public void testTooShortParseDigitDate() throws ParseException { String d = "X"; boolean b = false; @@ -157,6 +135,7 @@ public void testTooShortParseDigitDate() throws ParseException { } /** check that parse12DigitDate doesn't accept a bad date */ + @Test public void testBad12Date() { // now try a badly formed dates assertBad12DigitDate("a-stringy-digit-date"); @@ -166,6 +145,7 @@ public void testBad12Date() { /** * check that parse14DigitDate doesn't accept a bad date */ + @Test public void testBad14Date() { // now try a badly formed dates assertBad14DigitDate("a-stringy-digit-date"); @@ -175,6 +155,7 @@ public void testBad14Date() { /** * check that parse12DigitDate doesn't accept a bad date */ + @Test public void testBad17Date() { // now try a badly formed dates assertBad17DigitDate("a-stringy-digit-date"); @@ -184,27 +165,30 @@ public void testBad17Date() { } /** check that padTo(String) works */ + @Test public void testPadToString() { - assertEquals("pad to one (smaller)", "foo", ArchiveUtils.padTo("foo", 1)); - assertEquals("pad to 0 (no sense)", "foo", ArchiveUtils.padTo("foo", 0)); - assertEquals("pad to neg (nonsense)", "foo", ArchiveUtils.padTo("foo", 0)); - assertEquals("pad to 4", " foo", ArchiveUtils.padTo("foo", 4)); - assertEquals("pad to 10", " foo", ArchiveUtils.padTo("foo", 10)); + assertEquals("foo", ArchiveUtils.padTo("foo", 1), "pad to one (smaller)"); + assertEquals("foo", ArchiveUtils.padTo("foo", 0), "pad to 0 (no sense)"); + assertEquals("foo", ArchiveUtils.padTo("foo", 0), "pad to neg (nonsense)"); + assertEquals(" foo", ArchiveUtils.padTo("foo", 4), "pad to 4"); + assertEquals(" foo", ArchiveUtils.padTo("foo", 10), "pad to 10"); } /** * check that padTo(int) works */ + @Test public void testPadToInt() { - assertEquals("pad to one (smaller)", "123", ArchiveUtils.padTo(123, 1)); - assertEquals("pad to 0 (no sense)", "123", ArchiveUtils.padTo(123, 0)); - assertEquals("pad to neg (nonsense)", "123", ArchiveUtils.padTo(123, 0)); - assertEquals("pad to 4", " 123", ArchiveUtils.padTo(123, 4)); - assertEquals("pad to 10", " 123", ArchiveUtils.padTo(123, 10)); - assertEquals("pad -123 to 10", " -123", ArchiveUtils.padTo(-123, 10)); + assertEquals("123", ArchiveUtils.padTo(123, 1), "pad to one (smaller)"); + assertEquals("123", ArchiveUtils.padTo(123, 0), "pad to 0 (no sense)"); + assertEquals("123", ArchiveUtils.padTo(123, 0), "pad to neg (nonsense)"); + assertEquals(" 123", ArchiveUtils.padTo(123, 4), "pad to 4"); + assertEquals(" 123", ArchiveUtils.padTo(123, 10), "pad to 10"); + assertEquals(" -123", ArchiveUtils.padTo(-123, 10), "pad -123 to 10"); } /** check that byteArrayEquals() works */ + @Test public void testByteArrayEquals() { // foo == foo2, foo != bar, foo != bar2 byte[] foo = new byte[10], bar = new byte[20]; @@ -214,62 +198,52 @@ public void testByteArrayEquals() { foo[i] = foo2[i] = bar[i] = i; bar2[i] = (byte)(01 + i); } - assertTrue("two nulls", ArchiveUtils.byteArrayEquals(null, null)); - assertFalse("lhs null", ArchiveUtils.byteArrayEquals(null, foo)); - assertFalse("rhs null", ArchiveUtils.byteArrayEquals(foo, null)); + assertTrue(ArchiveUtils.byteArrayEquals(null, null), "two nulls"); + assertFalse(ArchiveUtils.byteArrayEquals(null, foo), "lhs null"); + assertFalse(ArchiveUtils.byteArrayEquals(foo, null), "rhs null"); // now check with same length, with same (foo2) and different (bar2) // contents - assertFalse("different lengths", ArchiveUtils.byteArrayEquals(foo, bar)); + assertFalse(ArchiveUtils.byteArrayEquals(foo, bar), "different lengths"); - assertTrue("same to itself", ArchiveUtils.byteArrayEquals(foo, foo)); - assertTrue("same contents", ArchiveUtils.byteArrayEquals(foo, foo2)); - assertFalse("different contents", ArchiveUtils.byteArrayEquals(foo, bar2)); + assertTrue(ArchiveUtils.byteArrayEquals(foo, foo), "same to itself"); + assertTrue(ArchiveUtils.byteArrayEquals(foo, foo2), "same contents"); + assertFalse(ArchiveUtils.byteArrayEquals(foo, bar2), "different contents"); } /** test doubleToString() */ + @Test public void testDoubleToString(){ double test = 12.121d; - assertEquals( - "cecking zero precision", - "12", - ArchiveUtils.doubleToString(test, 0)); - assertEquals( - "cecking 2 character precision", - "12.12", - ArchiveUtils.doubleToString(test, 2)); - assertEquals( - "cecking precision higher then the double has", - "12.121", - ArchiveUtils.doubleToString(test, 65)); + assertEquals("12", ArchiveUtils.doubleToString(test, 0), "cecking zero precision"); + assertEquals("12.12", ArchiveUtils.doubleToString(test, 2), "cecking 2 character precision"); + assertEquals("12.121", ArchiveUtils.doubleToString(test, 65), "cecking precision higher then the double has"); } + @Test public void testFormatBytesForDisplayPrecise(){ - assertEquals("formating negative number", "0 B", ArchiveUtils - .formatBytesForDisplay(-1)); - assertEquals("0 bytes", "0 B", ArchiveUtils - .formatBytesForDisplay(0)); - assertEquals("1 B", ArchiveUtils.formatBytesForDisplay(1)); - assertEquals("9 B", ArchiveUtils.formatBytesForDisplay(9)); - assertEquals("512 B", ArchiveUtils.formatBytesForDisplay(512)); - assertEquals("1023 bytes", "1,023 B", ArchiveUtils - .formatBytesForDisplay(1023)); - assertEquals("1025 bytes", "1.0 KiB", ArchiveUtils - .formatBytesForDisplay(1025)); + assertEquals("0 B", ArchiveUtils + .formatBytesForDisplay(-1), "formating negative number"); + assertEquals("0 B", ArchiveUtils + .formatBytesForDisplay(0), "0 bytes"); + Object a2 = ArchiveUtils.formatBytesForDisplay(1); + assertEquals("1 B", a2); + Object a1 = ArchiveUtils.formatBytesForDisplay(9); + assertEquals("9 B", a1); + Object a = ArchiveUtils.formatBytesForDisplay(512); + assertEquals( "512 B", a); + assertEquals("1,023 B", ArchiveUtils + .formatBytesForDisplay(1023), "1023 bytes"); + assertEquals("1.0 KiB", ArchiveUtils + .formatBytesForDisplay(1025), "1025 bytes"); // expected display values taken from Google calculator - assertEquals("10,000 bytes", "9.8 KiB", - ArchiveUtils.formatBytesForDisplay(10000)); - assertEquals("1,000,000 bytes", "977 KiB", - ArchiveUtils.formatBytesForDisplay(1000000)); - assertEquals("100,000,000 bytes", "95 MiB", - ArchiveUtils.formatBytesForDisplay(100000000)); - assertEquals("100,000,000,000 bytes", "93 GiB", - ArchiveUtils.formatBytesForDisplay(100000000000L)); - assertEquals("100,000,000,000,000 bytes", "91 TiB", - ArchiveUtils.formatBytesForDisplay(100000000000000L)); - assertEquals("100,000,000,000,000,000 bytes", "90,949 TiB", - ArchiveUtils.formatBytesForDisplay(100000000000000000L)); + assertEquals("9.8 KiB", ArchiveUtils.formatBytesForDisplay(10000), "10,000 bytes"); + assertEquals("977 KiB", ArchiveUtils.formatBytesForDisplay(1000000), "1,000,000 bytes"); + assertEquals("95 MiB", ArchiveUtils.formatBytesForDisplay(100000000), "100,000,000 bytes"); + assertEquals("93 GiB", ArchiveUtils.formatBytesForDisplay(100000000000L), "100,000,000,000 bytes"); + assertEquals("91 TiB", ArchiveUtils.formatBytesForDisplay(100000000000000L), "100,000,000,000,000 bytes"); + assertEquals("90,949 TiB", ArchiveUtils.formatBytesForDisplay(100000000000000000L), "100,000,000,000,000,000 bytes"); } /* @@ -321,11 +295,12 @@ private void assertBad17DigitDate(final String date) { /** check that two longs are within a given delta */ private void assertClose(String desc, long date1, long date2, long delta) { - assertTrue(desc, date1 == date2 || + assertTrue(date1 == date2 || (date1 < date2 && date2 < (date1 + delta)) || - (date2 < date1 && date1 < (date2 + delta))); + (date2 < date1 && date1 < (date2 + delta)), desc); } - + + @Test public void testArrayToLong() { testOneArrayToLong(-1); testOneArrayToLong(1); @@ -339,19 +314,23 @@ private void testOneArrayToLong(final long testValue) { final long l = ArchiveUtils.byteArrayIntoLong(a, 0); assertEquals(testValue, l); } - + + @Test public void testSecondsSinceEpochCalculation() throws ParseException { - assertEquals(ArchiveUtils.secondsSinceEpoch("20010909014640"), - "1000000000"); - assertEquals(ArchiveUtils.secondsSinceEpoch("20010909014639"), - "0999999999"); - assertEquals(ArchiveUtils.secondsSinceEpoch("19700101"), - "0000000000"); - assertEquals(ArchiveUtils.secondsSinceEpoch("2005"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("200501"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("20050101"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("2005010100"), - "1104537600"); + String m6 = ArchiveUtils.secondsSinceEpoch("20010909014640"); + assertEquals("1000000000", m6); + String m5 = ArchiveUtils.secondsSinceEpoch("20010909014639"); + assertEquals("0999999999", m5); + String m4 = ArchiveUtils.secondsSinceEpoch("19700101"); + assertEquals("0000000000", m4); + String m3 = ArchiveUtils.secondsSinceEpoch("2005"); + assertEquals("1104537600", m3); + String m2 = ArchiveUtils.secondsSinceEpoch("200501"); + assertEquals("1104537600", m2); + String m1 = ArchiveUtils.secondsSinceEpoch("20050101"); + assertEquals("1104537600", m1); + String m = ArchiveUtils.secondsSinceEpoch("2005010100"); + assertEquals("1104537600", m); boolean eThrown = false; try { ArchiveUtils.secondsSinceEpoch("20050"); @@ -360,10 +339,13 @@ public void testSecondsSinceEpochCalculation() throws ParseException { } assertTrue(eThrown); } - - public static void testZeroPadInteger() { - assertEquals(ArchiveUtils.zeroPadInteger(1), "0000000001"); - assertEquals(ArchiveUtils.zeroPadInteger(1000000000), "1000000000"); + + @Test + public void testZeroPadInteger() { + String m1 = ArchiveUtils.zeroPadInteger(1); + assertEquals("0000000001", m1); + String m = ArchiveUtils.zeroPadInteger(1000000000); + assertEquals("1000000000", m); } /** @@ -371,7 +353,9 @@ public static void testZeroPadInteger() { * * @throws InterruptedException */ - public static void testDateFormatConcurrency() throws InterruptedException { + @Test + @EnabledIfSystemProperty(named = "runSlowTests", matches = "true") + public void testDateFormatConcurrency() throws InterruptedException { final int COUNT = 1000; Thread [] ts = new Thread[COUNT]; final Semaphore allDone = new Semaphore(-COUNT+1); @@ -403,24 +387,29 @@ public void run() { while(!ts[i].isAlive()) /* Wait for thread to spin up*/; } allDone.acquire(); // wait for all threads to finish - assertEquals(failures.get()+" format mismatches",0,failures.get()); + String m = failures.get()+" format mismatches"; + assertEquals(0, (Object) failures.get(), m); } - + + @Test public void testIsTld() { - assertTrue("TLD test problem", ArchiveUtils.isTld("com")); - assertTrue("TLD test problem", ArchiveUtils.isTld("COM")); + assertTrue(ArchiveUtils.isTld("com"), "TLD test problem"); + assertTrue(ArchiveUtils.isTld("COM"), "TLD test problem"); } - + + @Test public void testUnique17() { HashSet uniqueTimestamps = new HashSet(); for(int i = 0; i<10; i++) { - assertTrue("timestamp17 repeated",uniqueTimestamps.add(ArchiveUtils.getUnique17DigitDate())); + assertTrue(uniqueTimestamps.add(ArchiveUtils.getUnique17DigitDate()),"timestamp17 repeated"); } } + + @Test public void testUnique14() { HashSet uniqueTimestamps = new HashSet(); for(int i = 0; i<10; i++) { - assertTrue("timestamp14 repeated",uniqueTimestamps.add(ArchiveUtils.getUnique14DigitDate())); + assertTrue(uniqueTimestamps.add(ArchiveUtils.getUnique14DigitDate()),"timestamp14 repeated"); } } } diff --git a/src/test/java/org/archive/util/ByteOpTest.java b/src/test/java/org/archive/util/ByteOpTest.java index de6a164f..49781c36 100644 --- a/src/test/java/org/archive/util/ByteOpTest.java +++ b/src/test/java/org/archive/util/ByteOpTest.java @@ -5,14 +5,15 @@ import java.io.DataInputStream; import java.io.IOException; -import org.archive.util.ByteOp; - import com.google.common.io.LittleEndianDataOutputStream; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; -public class ByteOpTest extends TestCase { +public class ByteOpTest { + @Test public void testReadShort() throws IOException { byte a[] = new byte[]{0,1,2,3}; ByteArrayInputStream bais = new ByteArrayInputStream(a); @@ -31,6 +32,7 @@ public void testReadShort() throws IOException { } } + @Test public void testAppend() { byte a[] = new byte[]{1}; byte b[] = new byte[]{2}; @@ -48,8 +50,4 @@ public void testAppend() { assertEquals(5,n2[4]); } - - public void testReadInt() { - } - } diff --git a/src/test/java/org/archive/util/CrossProductTest.java b/src/test/java/org/archive/util/CrossProductTest.java index edadb859..211fa65e 100644 --- a/src/test/java/org/archive/util/CrossProductTest.java +++ b/src/test/java/org/archive/util/CrossProductTest.java @@ -3,11 +3,9 @@ import java.util.ArrayList; import java.util.List; -import org.archive.util.CrossProduct; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; - -public class CrossProductTest extends TestCase { +public class CrossProductTest { private void dumpC(List a) { StringBuilder sb = new StringBuilder(); boolean first = false; @@ -26,10 +24,12 @@ private void dumpLOL(List> coc) { dumpC(co); } } + @Test public void testVersion() { String version = IAUtils.loadCommonsVersion(); System.out.format("Loaded version(%s)\n", version); } + @Test public void testCrossProduct() { ArrayList> input = new ArrayList>(); CrossProduct xp = new CrossProduct(); diff --git a/src/test/java/org/archive/util/FileUtilsTest.java b/src/test/java/org/archive/util/FileUtilsTest.java index 01625627..6142913f 100644 --- a/src/test/java/org/archive/util/FileUtilsTest.java +++ b/src/test/java/org/archive/util/FileUtilsTest.java @@ -21,12 +21,20 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.Collections; import java.util.LinkedList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.math.LongRange; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; /** @@ -36,7 +44,7 @@ * @author gojomo * @version $Date$, $Revision$ */ -public class FileUtilsTest extends TmpDirTestCase { +public class FileUtilsTest { private String srcDirName = FileUtilsTest.class.getName() + ".srcdir"; private File srcDirFile = null; private String tgtDirName = FileUtilsTest.class.getName() + ".tgtdir"; @@ -51,13 +59,15 @@ public class FileUtilsTest extends TmpDirTestCase { protected File largeLinesWindows; protected File nakedLastLineUnix; protected File nakedLastLineWindows; - - + + @TempDir + Path tempDir; + + @BeforeEach protected void setUp() throws Exception { - super.setUp(); - this.srcDirFile = new File(getTmpDir(), srcDirName); + this.srcDirFile = new File(tempDir.toFile(), srcDirName); FileUtils.ensureWriteableDirectory(srcDirFile); - this.tgtDirFile = new File(getTmpDir(), tgtDirName); + this.tgtDirFile = new File(tempDir.toFile(), tgtDirName); FileUtils.ensureWriteableDirectory(tgtDirFile); addFiles(); @@ -76,7 +86,7 @@ protected void setUp() throws Exception { } private void addFiles() throws IOException { - addFiles(3, this.getName()); + addFiles(3, FileUtilsTest.class.getName()); } private void addFiles(final int howMany, final String baseName) @@ -104,9 +114,9 @@ private File setUpLinesFile(String name, int minLineSize, int maxLineSize, int l return file; } - + + @AfterEach protected void tearDown() throws Exception { - super.tearDown(); org.apache.commons.io.FileUtils.deleteQuietly(this.srcDirFile); org.apache.commons.io.FileUtils.deleteQuietly(this.tgtDirFile); org.apache.commons.io.FileUtils.deleteQuietly(zeroLengthLinesUnix); @@ -119,7 +129,8 @@ protected void tearDown() throws Exception { org.apache.commons.io.FileUtils.deleteQuietly(nakedLastLineWindows); } - + + @Test public void testCopyFile() { // Test exception copying nonexistent file. File [] srcFiles = this.srcDirFile.listFiles(); @@ -131,37 +142,45 @@ public void testCopyFile() { } catch (IOException ioe) { e = ioe; } - assertNotNull("Didn't get expected IOE", e); + assertNotNull(e, "Didn't get expected IOE"); } - + + @Test public void testTailLinesZeroLengthUnix() throws IOException { verifyTailLines(zeroLengthLinesUnix); } - + + @Test public void testTailLinesZeroLengthWindows() throws IOException { verifyTailLines(zeroLengthLinesWindows); } - + + @Test public void testTailLinesSmallUnix() throws IOException { verifyTailLines(smallLinesUnix); } + @Test public void testTailLinesLargeUnix() throws IOException { verifyTailLines(largeLinesUnix); } + @Test public void testTailLinesSmallWindows() throws IOException { verifyTailLines(smallLinesWindows); } + @Test public void testTailLinesLargeWindows() throws IOException { verifyTailLines(largeLinesWindows); } + @Test public void testTailLinesNakedUnix() throws IOException { verifyTailLines(nakedLastLineUnix); } + @Test public void testTailLinesNakedWindows() throws IOException { verifyTailLines(nakedLastLineWindows); } @@ -185,8 +204,8 @@ private void verifyTailLines(File file) throws IOException { private void verifyTailLines(File file, List lines, int count, int estimate) throws IOException { List testLines; testLines = getTestTailLines(file,count,estimate); - assertEquals("line counts not equal:"+file.getName()+" "+count+" "+estimate,lines.size(),testLines.size()); - assertEquals("lines not equal: "+file.getName()+" "+count+" "+estimate,lines,testLines); + assertEquals(lines.size(),testLines.size(),"line counts not equal:"+file.getName()+" "+count+" "+estimate); + assertEquals(lines,testLines,"lines not equal: "+file.getName()+" "+count+" "+estimate); } private List getTestTailLines(File file, int count, int estimate) throws IOException { @@ -202,35 +221,43 @@ private List getTestTailLines(File file, int count, int estimate) throws Collections.reverse(testLines); return testLines; } - + + @Test public void testHeadLinesZeroLengthUnix() throws IOException { verifyHeadLines(zeroLengthLinesUnix); } - + + @Test public void testHeadLinesZeroLengthWindows() throws IOException { verifyHeadLines(zeroLengthLinesWindows); } - + + @Test public void testHeadLinesSmallUnix() throws IOException { verifyHeadLines(smallLinesUnix); } + @Test public void testHeadLinesLargeUnix() throws IOException { verifyHeadLines(largeLinesUnix); } + @Test public void testHeadLinesSmallWindows() throws IOException { verifyHeadLines(smallLinesWindows); } + @Test public void testHeadLinesLargeWindows() throws IOException { verifyHeadLines(largeLinesWindows); } + @Test public void testHeadLinesNakedUnix() throws IOException { verifyHeadLines(nakedLastLineUnix); } + @Test public void testHeadLinesNakedWindows() throws IOException { verifyHeadLines(nakedLastLineWindows); } @@ -255,8 +282,8 @@ private void verifyHeadLines(File file) throws IOException { private void verifyHeadLines(File file, List lines, int count, int estimate) throws IOException { List testLines; testLines = getTestHeadLines(file,count,estimate); - assertEquals("line counts not equal:"+file.getName()+" "+count+" "+estimate,lines.size(),testLines.size()); - assertEquals("lines not equal: "+file.getName()+" "+count+" "+estimate,lines,testLines); + assertEquals(lines.size(),testLines.size(),"line counts not equal:"+file.getName()+" "+count+" "+estimate); + assertEquals(lines,testLines,"lines not equal: "+file.getName()+" "+count+" "+estimate); } private List getTestHeadLines(File file, int count, int estimate) throws IOException { diff --git a/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java b/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java index 8b5c5d1b..858faa40 100644 --- a/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java +++ b/src/test/java/org/archive/util/InterruptibleCharSequenceTest.java @@ -22,13 +22,16 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.regex.Pattern; -import junit.framework.TestCase; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Tests (and * @author gojomo */ -public class InterruptibleCharSequenceTest extends TestCase { +public class InterruptibleCharSequenceTest { // this regex takes many seconds to fail on the input // (~20 seconds on 2Ghz Athlon64 JDK 1.6) public static String BACKTRACKER = "^(((((a+)*)*)*)*)*$"; @@ -45,6 +48,8 @@ public class InterruptibleCharSequenceTest extends TestCase { * The runtime overhead of checking interrupt status in this * extreme case is around 5% in my tests. */ + @Test + @Disabled public void xestOverhead() { String regex = BACKTRACKER; String inputNormal = INPUT; @@ -96,16 +101,18 @@ public void run() { t.start(); return t; } - + + @Test public void testNoninterruptible() throws InterruptedException { BlockingQueue q = new LinkedBlockingQueue(); Thread t = tryMatchInThread(INPUT, BACKTRACKER, q); Thread.sleep(1000); t.interrupt(); - Object result = q.take(); - assertTrue("mismatch uncompleted",Boolean.FALSE.equals(result)); + Object result = q.take(); + assertEquals(Boolean.FALSE, result, "mismatch uncompleted"); } - + + @Test public void testInterruptibility() throws InterruptedException { long sleepMillis = 512; while (sleepMillis > 0) { @@ -122,7 +129,7 @@ public void testInterruptibility() throws InterruptedException { if(result instanceof Boolean) { System.err.println(result+" match beat interrupt"); } - assertTrue("exception not thrown",result instanceof RuntimeException); + assertTrue(result instanceof RuntimeException,"exception not thrown"); return; } fail("failed to interrupt InterruptibleCharSequence with given sleeping intervals"); diff --git a/src/test/java/org/archive/util/MimetypeUtilsTest.java b/src/test/java/org/archive/util/MimetypeUtilsTest.java index 564b7762..1ed19616 100644 --- a/src/test/java/org/archive/util/MimetypeUtilsTest.java +++ b/src/test/java/org/archive/util/MimetypeUtilsTest.java @@ -18,46 +18,51 @@ */ package org.archive.util; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertTrue; /** * @author stack * @version $Date$, $Revision$ */ -public class MimetypeUtilsTest extends TestCase { +public class MimetypeUtilsTest { + @Test public void testStraightTruncate() { - assertTrue("Straight broken", - MimetypeUtils.truncate("text/html").equals("text/html")); + assertTrue(MimetypeUtils.truncate("text/html").equals("text/html"), + "Straight broken"); } - + + @Test public void testWhitespaceTruncate() { - assertTrue("Null broken", - MimetypeUtils.truncate(null).equals("no-type")); - assertTrue("Empty broken", - MimetypeUtils.truncate("").equals("no-type")); - assertTrue("Tab broken", - MimetypeUtils.truncate(" ").equals("no-type")); - assertTrue("Multispace broken", - MimetypeUtils.truncate(" ").equals("no-type")); - assertTrue("NL broken", - MimetypeUtils.truncate("\n").equals("no-type")); + assertTrue(MimetypeUtils.truncate(null).equals("no-type"), + "Null broken"); + assertTrue(MimetypeUtils.truncate("").equals("no-type"), + "Empty broken"); + assertTrue(MimetypeUtils.truncate(" ").equals("no-type"), + "Tab broken"); + assertTrue(MimetypeUtils.truncate(" ").equals("no-type"), + "Multispace broken"); + assertTrue(MimetypeUtils.truncate("\n").equals("no-type"), + "NL broken"); } - + + @Test public void testCommaTruncate() { - assertTrue("Comma broken", - MimetypeUtils.truncate("text/html,text/html").equals("text/html")); - assertTrue("Comma space broken", - MimetypeUtils.truncate("text/html, text/html"). - equals("text/html")); - assertTrue("Charset broken", - MimetypeUtils.truncate("text/html;charset=iso9958-1"). - equals("text/html")); - assertTrue("Charset space broken", - MimetypeUtils.truncate("text/html; charset=iso9958-1"). - equals("text/html")); - assertTrue("dbl text/html space charset broken", MimetypeUtils. + assertTrue(MimetypeUtils.truncate("text/html,text/html").equals("text/html"), + "Comma broken"); + assertTrue(MimetypeUtils.truncate("text/html, text/html"). + equals("text/html"), + "Comma space broken"); + assertTrue(MimetypeUtils.truncate("text/html;charset=iso9958-1"). + equals("text/html"), + "Charset broken"); + assertTrue(MimetypeUtils.truncate("text/html; charset=iso9958-1"). + equals("text/html"), + "Charset space broken"); + assertTrue(MimetypeUtils. truncate("text/html, text/html; charset=iso9958-1"). - equals("text/html")); + equals("text/html"), "dbl text/html space charset broken"); } } diff --git a/src/test/java/org/archive/util/PropertyUtilsTest.java b/src/test/java/org/archive/util/PropertyUtilsTest.java index fb73656b..7f703ee5 100644 --- a/src/test/java/org/archive/util/PropertyUtilsTest.java +++ b/src/test/java/org/archive/util/PropertyUtilsTest.java @@ -23,7 +23,9 @@ import java.io.IOException; import java.util.Properties; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; /** @@ -32,14 +34,15 @@ * @author gojomo * @version $Date: 2009-11-19 14:39:53 -0800 (Thu, 19 Nov 2009) $, $Revision: 6674 $ */ -public class PropertyUtilsTest extends TestCase { - +public class PropertyUtilsTest { + + @Test public void testSimpleInterpolate() throws IOException { Properties props = new Properties(); props.put("foo", "OOF"); props.put("bar","RAB"); String original = "FOO|${foo} BAR|${bar}"; String expected = "FOO|OOF BAR|RAB"; - assertEquals("interpalation problem",expected,PropertyUtils.interpolateWithProperties(original,props)); + assertEquals(expected,PropertyUtils.interpolateWithProperties(original,props),"interpalation problem"); } } diff --git a/src/test/java/org/archive/util/StringFieldExtractorTest.java b/src/test/java/org/archive/util/StringFieldExtractorTest.java index 5f0b4464..7ecb4279 100644 --- a/src/test/java/org/archive/util/StringFieldExtractorTest.java +++ b/src/test/java/org/archive/util/StringFieldExtractorTest.java @@ -2,10 +2,13 @@ import org.archive.util.StringFieldExtractor.StringTuple; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class StringFieldExtractorTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class StringFieldExtractorTest { + + @Test public void testExtract() { StringFieldExtractor ex1 = new StringFieldExtractor(' ', 0); StringFieldExtractor ex2 = new StringFieldExtractor(' ', 1); @@ -29,7 +32,8 @@ private void checkSplit(String f, String s,StringTuple t) { assertEquals(f,t.first); assertEquals(s,t.second); } - + + @Test public void testSplit() { StringFieldExtractor sfx = new StringFieldExtractor(' ',2); checkSplit("a b","x y",sfx.split("a b x y")); diff --git a/src/test/java/org/archive/util/TestUtils.java b/src/test/java/org/archive/util/TestUtils.java index 81fd6fd6..01b0d099 100644 --- a/src/test/java/org/archive/util/TestUtils.java +++ b/src/test/java/org/archive/util/TestUtils.java @@ -4,15 +4,12 @@ import java.io.InputStream; import java.util.List; -import junit.framework.TestCase; - - import com.google.common.io.ByteStreams; -public class TestUtils extends TestCase { - public void testNothing() { - assertEquals(2,1+1); - } +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestUtils { public static void dumpMatch(String context, List> res) { System.out.format("Context(%s) Found (%d) matches\n", context, res.size()); @@ -21,7 +18,7 @@ public static void dumpMatch(String context, List> res) { } } - public static void assertLoLMatches(String want[][], List> got) { + public static void assertLoLMatches(String[][] want, List> got) { assertEquals(want.length,got.size()); for(int i = 0; i < want.length; i++) { String [] wantSub = want[i]; @@ -32,8 +29,8 @@ public static void assertLoLMatches(String want[][], List> got) { } } } - public static void assertStreamEquals(InputStream is,byte b[]) throws IOException { - byte got[] = ByteStreams.toByteArray(is); + public static void assertStreamEquals(InputStream is, byte[] b) throws IOException { + byte[] got = ByteStreams.toByteArray(is); assertEquals(got.length,b.length); assertTrue(ByteOp.cmp(got,b)); } diff --git a/src/test/java/org/archive/util/anvl/ANVLRecordTest.java b/src/test/java/org/archive/util/anvl/ANVLRecordTest.java index b31640a5..1889a156 100644 --- a/src/test/java/org/archive/util/anvl/ANVLRecordTest.java +++ b/src/test/java/org/archive/util/anvl/ANVLRecordTest.java @@ -22,15 +22,19 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.logging.Logger; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class ANVLRecordTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +public class ANVLRecordTest { private final Logger logger = Logger.getLogger(this.getClass().getName()); - public void testAdd() throws Exception { + @Test + public void testAdd() { ANVLRecord am = new ANVLRecord(); am.add(new Element(new Label("entry"))); am.add(new Element(new Label("who"), @@ -49,14 +53,16 @@ public void testAdd() throws Exception { Map m = am.asMap(); logger.fine(m.toString()); } - + + @Test public void testEmptyRecord() throws Exception { byte [] b = ANVLRecord.EMPTY_ANVL_RECORD.getUTF8Bytes(); - assertEquals(b.length, 2); - assertEquals(b[0], '\r'); - assertEquals(b[1], '\n'); + assertEquals(2, b.length); + assertEquals('\r', b[0]); + assertEquals('\n', b[1]); } - + + @Test public void testFolding() throws Exception { ANVLRecord am = new ANVLRecord(); Exception e = null; @@ -65,42 +71,45 @@ public void testFolding() throws Exception { } catch (IllegalArgumentException iae) { e = iae; } - assertTrue(e != null && e instanceof IllegalArgumentException); + assertInstanceOf(IllegalArgumentException.class, e); am.addLabelValue("label", "value with \n in it"); } - + + @Test public void testParse() throws UnsupportedEncodingException, IOException { String record = " a: b\r\n#c#\r\nc:d\r\n \t\t\r\t\n\te" + "\r\nx:\r\n # z\r\n\r\n"; ANVLRecord r = ANVLRecord.load(new ByteArrayInputStream( - record.getBytes("ISO-8859-1"))); + record.getBytes(StandardCharsets.ISO_8859_1))); logger.fine(r.toString()); - assertEquals(r.get(0).toString(), "a: b"); + assertEquals("a: b", r.get(0).toString()); record = " a: b\r\n\r\nsdfsdsdfds"; r = ANVLRecord.load(new ByteArrayInputStream( - record.getBytes("ISO-8859-1"))); + record.getBytes(StandardCharsets.ISO_8859_1))); logger.fine(r.toString()); record = "x:\r\n # z\r\ny:\r\n\r\n"; r = ANVLRecord.load(new ByteArrayInputStream( - record.getBytes("ISO-8859-1"))); + record.getBytes(StandardCharsets.ISO_8859_1))); logger.fine(r.toString()); - assertEquals(r.get(0).toString(), "x:"); + assertEquals("x:", r.get(0).toString()); } - + + @Test public void testExampleParse() - throws UnsupportedEncodingException, IOException { + throws IOException { final String sample = "entry:\t\t\r\n# first ###draft\r\n" + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + "what:\tThe Yeoman of\r\n" + "\t\tthe Guard\r\n" + "when/created:\t 1888\r\n\r\n"; ANVLRecord r = ANVLRecord.load(new ByteArrayInputStream( - sample.getBytes("ISO-8859-1"))); + sample.getBytes(StandardCharsets.ISO_8859_1))); logger.fine(r.toString()); } - + + @Test public void testPoundLabel() - throws UnsupportedEncodingException, IOException { + throws IOException { final String sample = "ent#ry:\t\t\r\n# first ###draft\r\n" + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + "what:\tThe Yeoman of\r\n" + @@ -109,9 +118,10 @@ public void testPoundLabel() ANVLRecord r = ANVLRecord.load(sample); logger.fine(r.toString()); } - + + @Test public void testNewlineLabel() - throws UnsupportedEncodingException, IOException { + throws IOException { final String sample = "ent\nry:\t\t\r\n# first ###draft\r\n" + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + "what:\tThe Yeoman of\r\n" + @@ -123,6 +133,6 @@ public void testNewlineLabel() } catch(IllegalArgumentException e) { iae = e; } - assertTrue(iae != null); + assertNotNull(iae); } } diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 8f812b75..5e8889e5 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -7,10 +7,12 @@ import org.archive.util.binsearch.impl.RandomAccessFileSeekableLineReaderFactory; import org.archive.util.iterator.CloseableIterator; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; -public class SortedTextFileTest extends TestCase { +public class SortedTextFileTest { private static String formatS(int i) { return String.format("%07d",i); } @@ -23,7 +25,7 @@ private void createFile(File target, int max) throws FileNotFoundException { pw.close(); } - + @Test public void testGetRecordIteratorStringBoolean() throws IOException { File test = File.createTempFile("test", null); int max = 1000000; diff --git a/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java b/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java index 5b5be272..d35413cd 100644 --- a/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java +++ b/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java @@ -1,8 +1,9 @@ package org.archive.util.iterator; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CachingStringFilterTest extends TestCase { +public class CachingStringFilterTest { + @Test public void testCache() { StringFilter tf = new StringFilter() { public boolean isFiltered(String text) { diff --git a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java index 0c0dce6d..20143289 100644 --- a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java @@ -5,12 +5,16 @@ import java.util.List; import java.util.TreeSet; -import junit.framework.TestCase; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; -public class FilterStringIteratorTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; - public void t2estHasNext() { - String blocks[] = {"a","ab","ba","cc"}; +public class FilterStringIteratorTest { + + @Test + public void testHasNext() { + String[] blocks = {"a","ab","ba","cc"}; List bl = Arrays.asList(blocks); TransformingPrefixStringFilter f = new TransformingPrefixStringFilter(bl); @@ -26,23 +30,20 @@ public void t2estHasNext() { assertBlocked(true,"cc",f); assertBlocked(true,"cca",f); } - + + @Test public void testTreeSet() { - String blocks[] = {"a","ab","ba","cc"}; + String[] blocks = {"a","ab","ba","cc"}; TreeSet s = TransformingPrefixStringFilter.makeTreeSet(Arrays.asList(blocks),null); assertTrue(s.contains("a")); assertFalse(s.contains("ab")); - String blocks2[] = {"ab","a","ba","cc"}; + String[] blocks2 = {"ab","a","ba","cc"}; TreeSet s2 = TransformingPrefixStringFilter.makeTreeSet(Arrays.asList(blocks2),null); assertTrue(s2.contains("a")); assertFalse(s2.contains("ab")); - - - } - - + private void assertBlocked(boolean blocked, String s, StringFilter f) { ArrayList l = new ArrayList(); l.add(s); diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index 11ea1229..98de1416 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -8,10 +8,14 @@ import java.io.PrintWriter; import java.util.Comparator; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class SortedCompositeIteratorTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +public class SortedCompositeIteratorTest { + + @Test public void testHasNext() throws FileNotFoundException, IOException { File a = File.createTempFile("filea", null); diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java index 0c70263e..f53befd3 100644 --- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java +++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java @@ -21,24 +21,25 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.io.IOUtils; import org.archive.util.ArchiveUtils; -import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Tests for GZIPMembersInputStream * @author gojomo * @version $ $ */ -public class GZIPMembersInputStreamTest extends TestCase { +public class GZIPMembersInputStreamTest { byte[] noise1k_gz; byte[] noise32k_gz; byte[] a_gz; @@ -54,8 +55,8 @@ public class GZIPMembersInputStreamTest extends TestCase { buf = new byte[32*1024]; rand.nextBytes(buf); noise32k_gz = ArchiveUtils.gzip(buf); - a_gz = ArchiveUtils.gzip("a".getBytes("ASCII")); - hello_gz = ArchiveUtils.gzip("hello".getBytes("ASCII")); + a_gz = ArchiveUtils.gzip("a".getBytes(StandardCharsets.US_ASCII)); + hello_gz = ArchiveUtils.gzip("hello".getBytes(StandardCharsets.US_ASCII)); allfour_gz = Bytes.concat(noise1k_gz,noise32k_gz,a_gz,hello_gz); sixsmall_gz = Bytes.concat(a_gz,hello_gz,a_gz,hello_gz,a_gz,hello_gz); } catch (IOException e) { @@ -63,132 +64,134 @@ public class GZIPMembersInputStreamTest extends TestCase { } } - public static void main(String [] args) { - junit.textui.TestRunner.run(GZIPMembersInputStreamTest.class); - } - + @Test public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); + assertEquals(1024+(32*1024)+1+5, count, "wrong length uncompressed data"); } - + + @Test public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); + assertEquals(1+5+1+5+1+5, count, "wrong length uncompressed data"); } - + + @Test public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); gzin.nextMember(); int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); gzin.nextMember(); int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); gzin.nextMember(); int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); gzin.nextMember(); int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + assertEquals(0, countEnd, "wrong eof count"); } - + + @Test public void testReadPerMemberSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); + assertEquals(1, count2, "wrong 1-byte member count"); gzin.nextMember(); int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); + assertEquals(5, count3, "wrong 5-byte member count"); gzin.nextMember(); } int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + assertEquals(0, countEnd, "wrong eof count"); } - + @Test public void testByteReadPerMember() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); int count0 = 0; while(gzin.read()>-1) count0++; - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); gzin.nextMember(); int count1 = 0; while(gzin.read()>-1) count1++; - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); gzin.nextMember(); int count2 = 0; while(gzin.read()>-1) count2++; - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); gzin.nextMember(); int count3 = 0; while(gzin.read()>-1) count3++; - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); gzin.nextMember(); int countEnd = 0; while(gzin.read()>-1) countEnd++; - assertEquals("wrong eof count", 0, countEnd); + assertEquals(0, countEnd, "wrong eof count"); } - + + @Test public void testMemberSeek() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); + assertEquals(1, count2, "wrong 1-byte member count"); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); - assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong Member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong Member2 end"); gzin.nextMember(); int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); + assertEquals(5, count3, "wrong 5-byte member count"); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); - assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong Member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong Member3 end"); gzin.nextMember(); int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + assertEquals(0, countEnd, "wrong eof count"); } @SuppressWarnings("deprecation") + @Test public void testMemberIterator() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); @@ -196,34 +199,34 @@ public void testMemberIterator() throws IOException { assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream()); - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream()); - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); assertFalse(iter.hasNext()); } From 21c81e518d8896cf50a9d0616cc77957ed3cbbc6 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 19 May 2025 10:03:13 +0900 Subject: [PATCH 061/169] Remove dependency on Apache Commons HttpClient 3.1 HttpClient 3 was discontinued in 2007 and frequently triggers alerts in dependency vulnerability scanners. We're also not using much of it anymore, with one big exception. The URI class is the foundation of UsableURI and central to Heritrix which has made removing the library difficult. URIException in particular appears a lot in client code. HttpClient 4+ has switched to java.net.URI and the main reason Heritrix was built on HttpClient URI instead was because java.net.URI is not flexible and differs from how browsers behave. (Although, how browsers behave has shifted over time.) Eventually we'll probably need to rework Heritrix's URI handling to follow the WhatWG URL spec. However, to let us remove the dependency while keeping UsableURI working, this copies HttpClient 3's URI, URIException and ChunkedInputStream with some small tweaks remove their dependency on other classes in HttpClient. The HttpClient Header class is replaced with our existing HttpHeader. URI and ChunkedInputStream are marked package private for now. This is a breaking API change and will trigger a bump of the major version number. --- pom.xml | 6 +- .../format/gzip/zipnum/ZipNumBlockLoader.java | 2 +- .../httpclient/HttpRecorderGetMethod.java | 134 - .../httpclient/HttpRecorderMethod.java | 109 - .../httpclient/HttpRecorderPostMethod.java | 84 - .../SingleHttpConnectionManager.java | 72 - .../ThreadLocalHttpConnectionManager.java | 293 -- .../org/archive/io/HeaderedArchiveRecord.java | 38 +- .../java/org/archive/io/arc/ARC2WCDX.java | 62 +- .../java/org/archive/io/arc/ARCRecord.java | 36 +- .../java/org/archive/io/warc/WARCRecord.java | 5 +- .../resource/html/HTMLResourceFactory.java | 7 +- src/main/java/org/archive/url/LaxURI.java | 23 +- src/main/java/org/archive/url/SURT.java | 1 - .../java/org/archive/url/SURTTokenizer.java | 1 - src/main/java/org/archive/url/URI.java | 3978 +++++++++++++++++ .../java/org/archive/url/URIException.java | 180 + src/main/java/org/archive/url/UsableURI.java | 9 +- .../org/archive/url/UsableURIFactory.java | 2 - .../org/archive/util/ChunkedInputStream.java | 324 ++ .../java/org/archive/util/LaxHttpParser.java | 44 +- src/main/java/org/archive/util/Recorder.java | 1 - src/main/java/org/archive/util/SURT.java | 2 +- .../impl/HTTPSeekableLineReaderFactory.java | 13 +- .../binsearch/impl/http/ApacheHttp31SLR.java | 235 - .../impl/http/ApacheHttp31SLRFactory.java | 192 - .../archive/io/HeaderedArchiveRecordTest.java | 6 +- .../url/BasicURLCanonicalizerTest.java | 2 - .../java/org/archive/url/URLParserTest.java | 2 - .../archive/url/URLRegexTransformerTest.java | 2 - .../org/archive/url/UsableURIFactoryTest.java | 1 - .../java/org/archive/url/UsableURITest.java | 2 - 32 files changed, 4616 insertions(+), 1252 deletions(-) delete mode 100644 src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java delete mode 100644 src/main/java/org/archive/httpclient/HttpRecorderMethod.java delete mode 100644 src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java delete mode 100644 src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java delete mode 100644 src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java create mode 100644 src/main/java/org/archive/url/URI.java create mode 100644 src/main/java/org/archive/url/URIException.java create mode 100644 src/main/java/org/archive/util/ChunkedInputStream.java delete mode 100644 src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java delete mode 100644 src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java diff --git a/pom.xml b/pom.xml index c70a2cd7..81bd9b32 100644 --- a/pom.xml +++ b/pom.xml @@ -82,9 +82,9 @@ - commons-httpclient - commons-httpclient - 3.1 + commons-codec + commons-codec + 1.18.0 diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java index 2247eda4..c28ee536 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java @@ -31,7 +31,7 @@ public class ZipNumBlockLoader { protected int signDurationSecs = DEFAULT_SIG_DURATION_SECS; protected boolean useNio = false; - protected String httpLib = HttpLibs.APACHE_31.name(); + protected String httpLib = HttpLibs.APACHE_43.name(); protected boolean bufferFully = true; protected boolean noKeepAlive = true; diff --git a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java deleted file mode 100644 index 1a94af1f..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpState; -import org.apache.commons.httpclient.methods.GetMethod; -import org.archive.util.Recorder; - - -/** - * Override of GetMethod that marks the passed HttpRecorder w/ the transition - * from HTTP head to body and that forces a close on the http connection. - * - * The actions done in this subclass used to be done by copying - * org.apache.commons.HttpMethodBase, overlaying our version in place of the - * one that came w/ httpclient. Here is the patch of the difference between - * shipped httpclient code and our mods: - *
- *    -- -1338,6 +1346,12 --
- *
- *        public void releaseConnection() {
- *
- *   +        // HERITRIX always ants the streams closed.
- *   +        if (responseConnection != null)
- *   +        {
- *   +            responseConnection.close();
- *   +        }
- *   +
- *            if (responseStream != null) {
- *                try {
- *                    // FYI - this may indirectly invoke responseBodyConsumed.
- *   -- -1959,6 +1973,11 --
- *                        this.statusLine = null;
- *                    }
- *                }
- *   +            // HERITRIX mark transition from header to content.
- *   +            if (this.httpRecorder != null)
- *   +            {
- *   +                this.httpRecorder.markContentBegin();
- *   +            }
- *                readResponseBody(state, conn);
- *                processResponseBody(state, conn);
- *            } catch (IOException e) {
- * 
- * - *

We're not supposed to have access to the underlying connection object; - * am only violating contract because see cases where httpclient is skipping - * out w/o cleaning up after itself. - * - * @author stack - * @version $Revision$, $Date$ - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class HttpRecorderGetMethod extends GetMethod { - - protected static Logger logger = - Logger.getLogger(HttpRecorderGetMethod.class.getName()); - - /** - * Instance of http recorder method. - */ - protected HttpRecorderMethod httpRecorderMethod = null; - - - public HttpRecorderGetMethod(String uri, Recorder recorder) { - super(uri); - this.httpRecorderMethod = new HttpRecorderMethod(recorder); - } - - protected void readResponseBody(HttpState state, HttpConnection connection) - throws IOException, HttpException { - // We're about to read the body. Mark transition in http recorder. - this.httpRecorderMethod.markContentBegin(connection); - super.readResponseBody(state, connection); - } - - protected boolean shouldCloseConnection(HttpConnection conn) { - // Always close connection after each request. As best I can tell, this - // is superfluous -- we've set our client to be HTTP/1.0. Doing this - // out of paranoia. - return true; - } - - public int execute(HttpState state, HttpConnection conn) - throws HttpException, IOException { - // Save off the connection so we can close it on our way out in case - // httpclient fails to (We're not supposed to have access to the - // underlying connection object; am only violating contract because - // see cases where httpclient is skipping out w/o cleaning up - // after itself). - this.httpRecorderMethod.setConnection(conn); - return super.execute(state, conn); - } - - protected void addProxyConnectionHeader(HttpState state, HttpConnection conn) - throws IOException, HttpException { - super.addProxyConnectionHeader(state, conn); - this.httpRecorderMethod.handleAddProxyConnectionHeader(this); - } - - // XXX see https://webarchive.jira.com/browse/HER-2059 - // We never call this method with the implied question mark prepended, so - // adding it does the trick, since commons-httpclient will strip it later. - public void setQueryString(String queryString) { - if (queryString != null) { - super.setQueryString('?' + queryString); - } else { - super.setQueryString(queryString); - } - } - -} diff --git a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderMethod.java deleted file mode 100644 index b08bc0bd..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.util.logging.Logger; - -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpMethod; -import org.archive.util.Recorder; - - -/** - * This class encapsulates the specializations supplied by the - * overrides {@link HttpRecorderGetMethod} and {@link HttpRecorderPostMethod}. - * - * It keeps instance of HttpRecorder and HttpConnection. - * - * @author stack - * @version $Revision$, $Date$ - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class HttpRecorderMethod { - protected static Logger logger = - Logger.getLogger(HttpRecorderMethod.class.getName()); - - /** - * Instance of http recorder we're using recording this http get. - */ - private Recorder httpRecorder = null; - - /** - * Save around so can force close. - * - * See [ 922080 ] IllegalArgumentException (size is wrong). - * https://sourceforge.net/tracker/?func=detail&aid=922080&group_id=73833&atid=539099 - */ - private HttpConnection connection = null; - - - public HttpRecorderMethod(Recorder recorder) { - this.httpRecorder = recorder; - } - - public void markContentBegin(HttpConnection c) { - if (c != this.connection) { - // We're checking that we're not being asked to work on - // a connection that is other than the one we started - // this method#execute with. - throw new IllegalArgumentException("Connections differ: " + - this.connection + " " + c + " " + - Thread.currentThread().getName()); - } - this.httpRecorder.markContentBegin(); - } - - /** - * @return Returns the connection. - */ - public HttpConnection getConnection() { - return this.connection; - } - - /** - * @param connection The connection to set. - */ - public void setConnection(HttpConnection connection) { - this.connection = connection; - } - /** - * @return Returns the httpRecorder. - */ - public Recorder getHttpRecorder() { - return httpRecorder; - } - - /** - * If a 'Proxy-Connection' header has been added to the request, - * it'll be of a 'keep-alive' type. Until we support 'keep-alives', - * override the Proxy-Connection setting and instead pass a 'close' - * (Otherwise every request has to timeout before we notice - * end-of-document). - * @param method Method to find proxy-connection header in. - */ - public void handleAddProxyConnectionHeader(HttpMethod method) { - Header h = method.getRequestHeader("Proxy-Connection"); - if (h != null) { - h.setValue("close"); - method.setRequestHeader(h); - } - } -} diff --git a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java deleted file mode 100644 index d55d816a..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; - -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpState; -import org.apache.commons.httpclient.methods.PostMethod; -import org.archive.util.Recorder; - - -/** - * Override of PostMethod that marks the passed HttpRecorder w/ the transition - * from HTTP head to body and that forces a close on the responseConnection. - * - * This is a copy of {@link HttpRecorderGetMethod}. Only difference is the - * parent subclass. - * - * @author stack - * @version $Date$ $Revision$ - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class HttpRecorderPostMethod extends PostMethod { - /** - * Instance of http recorder method. - */ - protected HttpRecorderMethod httpRecorderMethod = null; - - - public HttpRecorderPostMethod(String uri, Recorder recorder) { - super(uri); - this.httpRecorderMethod = new HttpRecorderMethod(recorder); - } - - protected void readResponseBody(HttpState state, HttpConnection connection) - throws IOException, HttpException { - // We're about to read the body. Mark transition in http recorder. - this.httpRecorderMethod.markContentBegin(connection); - super.readResponseBody(state, connection); - } - - protected boolean shouldCloseConnection(HttpConnection conn) { - // Always close connection after each request. As best I can tell, this - // is superfluous -- we've set our client to be HTTP/1.0. Doing this - // out of paranoia. - return true; - } - - public int execute(HttpState state, HttpConnection conn) - throws HttpException, IOException { - // Save off the connection so we can close it on our way out in case - // httpclient fails to (We're not supposed to have access to the - // underlying connection object; am only violating contract because - // see cases where httpclient is skipping out w/o cleaning up - // after itself). - this.httpRecorderMethod.setConnection(conn); - return super.execute(state, conn); - } - - protected void addProxyConnectionHeader(HttpState state, HttpConnection conn) - throws IOException, HttpException { - super.addProxyConnectionHeader(state, conn); - this.httpRecorderMethod.handleAddProxyConnectionHeader(this); - } -} diff --git a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java b/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java deleted file mode 100644 index d6cf27ab..00000000 --- a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.SimpleHttpConnectionManager; - -/** - * An HttpClient-compatible HttpConnection "manager" that actually - * just gives out a new connection each time -- skipping the overhead - * of connection management, since we already throttle our crawler - * with external mechanisms. - * - * @author gojomo - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class SingleHttpConnectionManager extends SimpleHttpConnectionManager { - - public SingleHttpConnectionManager() { - super(); - } - - public HttpConnection getConnectionWithTimeout( - HostConfiguration hostConfiguration, long timeout) { - - HttpConnection conn = new HttpConnection(hostConfiguration); - conn.setHttpConnectionManager(this); - conn.getParams().setDefaults(this.getParams()); - return conn; - } - - public void releaseConnection(HttpConnection conn) { - // ensure connection is closed - conn.close(); - finishLast(conn); - } - - protected static void finishLast(HttpConnection conn) { - // copied from superclass because it wasn't made available to subclasses - InputStream lastResponse = conn.getLastResponseInputStream(); - if (lastResponse != null) { - conn.setLastResponseInputStream(null); - try { - lastResponse.close(); - } catch (IOException ioe) { - //FIXME: badness - close to force reconnect. - conn.close(); - } - } - } -} diff --git a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java b/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java deleted file mode 100644 index 16821b36..00000000 --- a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java +++ /dev/null @@ -1,293 +0,0 @@ -/** - * ==================================================================== - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ==================================================================== - * - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpConnectionManager; -import org.apache.commons.httpclient.params.HttpConnectionManagerParams; - -/** - * A simple, but thread-safe HttpClient {@link HttpConnectionManager}. - * Based on {@link org.apache.commons.httpclient.SimpleHttpConnectionManager}. - * - * Java >= 1.4 is recommended. - * - * @author Christian Kohlschuetter - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public final class ThreadLocalHttpConnectionManager implements - HttpConnectionManager { - - private static final CloserThread closer = new CloserThread(); - private static final Logger logger = Logger - .getLogger(ThreadLocalHttpConnectionManager.class.getName()); - - private final ThreadLocal tl = new ThreadLocal() { - protected synchronized ConnectionInfo initialValue() { - return new ConnectionInfo(); - } - }; - - private ConnectionInfo getConnectionInfo() { - return (ConnectionInfo) tl.get(); - } - - private static final class ConnectionInfo { - /** The http connection */ - private HttpConnection conn = null; - - /** - * The time the connection was made idle. - */ - private long idleStartTime = Long.MAX_VALUE; - } - - public ThreadLocalHttpConnectionManager() { - } - - /** - * Since the same connection is about to be reused, make sure the - * previous request was completely processed, and if not - * consume it now. - * @param conn The connection - * @return true, if the connection is reusable - */ - private static boolean finishLastResponse(final HttpConnection conn) { - InputStream lastResponse = conn.getLastResponseInputStream(); - if(lastResponse != null) { - conn.setLastResponseInputStream(null); - try { - lastResponse.close(); - return true; - } catch (IOException ioe) { - // force reconnect. - return false; - } - } else { - return false; - } - } - - /** - * Collection of parameters associated with this connection manager. - */ - private HttpConnectionManagerParams params = new HttpConnectionManagerParams(); - - /** - * @see HttpConnectionManager#getConnection(HostConfiguration) - */ - public HttpConnection getConnection( - final HostConfiguration hostConfiguration) { - return getConnection(hostConfiguration, 0); - } - - /** - * Gets the staleCheckingEnabled value to be set on HttpConnections that are created. - * - * @return true if stale checking will be enabled on HttpConections - * - * @see HttpConnection#isStaleCheckingEnabled() - * - * @deprecated Use {@link HttpConnectionManagerParams#isStaleCheckingEnabled()}, - * {@link HttpConnectionManager#getParams()}. - */ - public boolean isConnectionStaleCheckingEnabled() { - return this.params.isStaleCheckingEnabled(); - } - - /** - * Sets the staleCheckingEnabled value to be set on HttpConnections that are created. - * - * @param connectionStaleCheckingEnabled true if stale checking will be enabled - * on HttpConections - * - * @see HttpConnection#setStaleCheckingEnabled(boolean) - * - * @deprecated Use {@link HttpConnectionManagerParams#setStaleCheckingEnabled(boolean)}, - * {@link HttpConnectionManager#getParams()}. - */ - public void setConnectionStaleCheckingEnabled( - final boolean connectionStaleCheckingEnabled) { - this.params.setStaleCheckingEnabled(connectionStaleCheckingEnabled); - } - - /** - * @see HttpConnectionManager#getConnectionWithTimeout(HostConfiguration, long) - * - * @since 3.0 - */ - public HttpConnection getConnectionWithTimeout( - final HostConfiguration hostConfiguration, final long timeout) { - - final ConnectionInfo ci = getConnectionInfo(); - HttpConnection httpConnection = ci.conn; - - // make sure the host and proxy are correct for this connection - // close it and set the values if they are not - if(httpConnection == null || !finishLastResponse(httpConnection) - || !hostConfiguration.hostEquals(httpConnection) - || !hostConfiguration.proxyEquals(httpConnection)) { - - if(httpConnection != null && httpConnection.isOpen()) { - closer.closeConnection(httpConnection); - } - - httpConnection = new HttpConnection(hostConfiguration); - httpConnection.setHttpConnectionManager(this); - httpConnection.getParams().setDefaults(this.params); - ci.conn = httpConnection; - - httpConnection.setHost(hostConfiguration.getHost()); - httpConnection.setPort(hostConfiguration.getPort()); - httpConnection.setProtocol(hostConfiguration.getProtocol()); - httpConnection.setLocalAddress(hostConfiguration.getLocalAddress()); - - httpConnection.setProxyHost(hostConfiguration.getProxyHost()); - httpConnection.setProxyPort(hostConfiguration.getProxyPort()); - } - - // remove the connection from the timeout handler - ci.idleStartTime = Long.MAX_VALUE; - - return httpConnection; - } - - /** - * @see HttpConnectionManager#getConnection(HostConfiguration, long) - * - * @deprecated Use #getConnectionWithTimeout(HostConfiguration, long) - */ - public HttpConnection getConnection( - final HostConfiguration hostConfiguration, final long timeout) { - return getConnectionWithTimeout(hostConfiguration, timeout); - } - - /** - * @see HttpConnectionManager#releaseConnection(org.apache.commons.httpclient.HttpConnection) - */ - public void releaseConnection(final HttpConnection conn) { - final ConnectionInfo ci = getConnectionInfo(); - HttpConnection httpConnection = ci.conn; - - if(conn != httpConnection) { - throw new IllegalStateException( - "Unexpected release of an unknown connection."); - } - - finishLastResponse(httpConnection); - - // track the time the connection was made idle - ci.idleStartTime = System.currentTimeMillis(); - } - - /** - * Returns {@link HttpConnectionManagerParams parameters} associated - * with this connection manager. - * - * @since 2.1 - * - * @see HttpConnectionManagerParams - */ - public HttpConnectionManagerParams getParams() { - return this.params; - } - - /** - * Assigns {@link HttpConnectionManagerParams parameters} for this - * connection manager. - * - * @since 2.1 - * - * @see HttpConnectionManagerParams - */ - public void setParams(final HttpConnectionManagerParams p) { - if(p == null) { - throw new IllegalArgumentException("Parameters may not be null"); - } - this.params = p; - } - - /** - * @since 3.0 - */ - public void closeIdleConnections(final long idleTimeout) { - long maxIdleTime = System.currentTimeMillis() - idleTimeout; - - final ConnectionInfo ci = getConnectionInfo(); - - if(ci.idleStartTime <= maxIdleTime) { - ci.conn.close(); - } - } - - private static final class CloserThread extends Thread { - private List connections - = new ArrayList(); - - private static final int SLEEP_INTERVAL = 5000; - - public CloserThread() { - super("HttpConnection closer"); - // Make this a daemon thread so it can't be responsible for the JVM - // not shutting down. - setDaemon(true); - start(); - } - - public void closeConnection(final HttpConnection conn) { - synchronized (connections) { - connections.add(conn); - } - } - - public void run() { - try { - while (!Thread.interrupted()) { - Thread.sleep(SLEEP_INTERVAL); - - List s; - synchronized (connections) { - s = connections; - connections = new ArrayList(); - } - logger.log(Level.INFO, "Closing " + s.size() - + " HttpConnections"); - for(final Iterator it = s.iterator(); - it.hasNext();) { - HttpConnection conn = it.next(); - conn.close(); - conn.setHttpConnectionManager(null); - it.remove(); - } - } - } catch (InterruptedException e) { - return; - } - } - } -} diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index 3cce595b..ac4b82f6 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -26,10 +26,7 @@ import java.io.OutputStream; import java.io.PrintStream; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpParser; -import org.apache.commons.httpclient.StatusLine; -import org.apache.commons.httpclient.util.EncodingUtil; +import org.archive.format.http.HttpHeader; import org.archive.io.arc.ARCConstants; import org.archive.util.LaxHttpParser; @@ -59,7 +56,7 @@ public class HeaderedArchiveRecord extends ArchiveRecord { * * Only available after the reading of headers. */ - private Header [] contentHeaders = null; + private HttpHeader[] contentHeaders = null; public HeaderedArchiveRecord(final ArchiveRecord ar) throws IOException { @@ -149,13 +146,14 @@ private InputStream readContentHeaders() throws IOException { throw new IOException("Failed to read raw lie where one " + " was expected: " + new String(statusBytes)); } - String statusLine = EncodingUtil.getString(statusBytes, 0, + String statusLine = new String(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); if (statusLine == null) { throw new NullPointerException("Expected status line is null"); } + statusLine = statusLine.trim(); // TODO: Tighten up this test. - boolean isHttpResponse = StatusLine.startsWithHTTP(statusLine); + boolean isHttpResponse = statusLine.startsWith("HTTP"); boolean isHttpRequest = false; if (!isHttpResponse) { isHttpRequest = statusLine.toUpperCase().startsWith("GET") || @@ -165,9 +163,13 @@ private InputStream readContentHeaders() throws IOException { throw new UnexpectedStartLineIOException("Failed parse of " + "status line: " + statusLine); } - this.statusCode = isHttpResponse? - (new StatusLine(statusLine)).getStatusCode(): -1; - + + if (isHttpResponse) { + this.statusCode = parseStatusCode(statusLine); + } else { + this.statusCode = -1; + } + // Save off all bytes read. Keep them as bytes rather than // convert to strings so we don't have to worry about encodings // though this should never be a problem doing http headers since @@ -210,7 +212,19 @@ private InputStream readContentHeaders() throws IOException { bais.reset(); return bais; } - + + public static int parseStatusCode(String statusLine) { + int i = statusLine.indexOf(' '); + if (i < 0) return -1; + int j = statusLine.indexOf(' ', i + 1); + if (j < 0) j = statusLine.length(); + try { + return Integer.parseInt(statusLine.substring(i + 1, j)); + } catch (NumberFormatException e) { + return -1; + } + } + public static class UnexpectedStartLineIOException extends RecoverableIOException { private static final long serialVersionUID = 1L; @@ -252,7 +266,7 @@ public int getContentHeadersLength() { return this.contentHeadersLength; } - public Header[] getContentHeaders() { + public HttpHeader[] getContentHeaders() { return contentHeaders; } diff --git a/src/main/java/org/archive/io/arc/ARC2WCDX.java b/src/main/java/org/archive/io/arc/ARC2WCDX.java index 19010131..f0515694 100644 --- a/src/main/java/org/archive/io/arc/ARC2WCDX.java +++ b/src/main/java/org/archive/io/arc/ARC2WCDX.java @@ -22,14 +22,12 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; -import java.util.Date; -import java.util.Iterator; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; import java.util.zip.GZIPOutputStream; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HeaderGroup; -import org.apache.commons.httpclient.util.DateParseException; -import org.apache.commons.httpclient.util.DateUtil; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.util.ArchiveUtils; import org.archive.util.SURT; @@ -95,12 +93,15 @@ public static Object[] createWcdx(ARCReader reader) { ARCRecord record = (ARCRecord) iter.next(); record.close(); ARCRecordMetaData h = (ARCRecordMetaData) record.getHeader(); - Header[] httpHeaders = record.getHttpHeaders(); + HttpHeader[] httpHeaders = record.getHttpHeaders(); if(httpHeaders==null) { - httpHeaders = new Header[0]; + httpHeaders = new HttpHeader[0]; } - HeaderGroup hg = new HeaderGroup(); - hg.setHeaders(httpHeaders); + Map headerMap = new HashMap<>(); + for (HttpHeader header : httpHeaders) { + headerMap.putIfAbsent(header.getName().toLowerCase(Locale.ROOT), header); + } + StringBuilder builder = new StringBuilder(); // SURT-form URI @@ -108,7 +109,7 @@ public static Object[] createWcdx(ARCReader reader) { // record timestamp ('b') appendField(builder,h.getDate()); // http header date - appendTimeField(builder,hg.getFirstHeader("Date")); + appendTimeField(builder, headerMap.get("date")); // response code ('s') appendField(builder,h.getStatusCode()); // media type ('m') @@ -131,17 +132,17 @@ public static Object[] createWcdx(ARCReader reader) { // uncompressed (declared in ARC headerline) record length appendField(builder,h.getLength()); // http header content-length - appendField(builder,hg.getFirstHeader("Content-Length")); + appendField(builder, headerMap.get("content-length")); // http header mod-date - appendTimeField(builder,hg.getFirstHeader("Last-Modified")); + appendTimeField(builder, headerMap.get("last-modified")); // http header expires - appendTimeField(builder,hg.getFirstHeader("Expires")); + appendTimeField(builder, headerMap.get("expires")); // http header etag - appendField(builder,hg.getFirstHeader("ETag")); + appendField(builder, headerMap.get("etag")); // http header redirect ('Location' header?) - appendField(builder,hg.getFirstHeader("Location")); + appendField(builder, headerMap.get("location")); // ip ('e') appendField(builder,h.getIp()); // original URI @@ -186,8 +187,8 @@ protected static void appendField(StringBuilder builder, Object obj) { // prepend with delimiter builder.append(' '); } - if(obj instanceof Header) { - obj = ((Header)obj).getValue().trim(); + if(obj instanceof HttpHeader) { + obj = ((HttpHeader)obj).getValue().trim(); } builder.append((obj==null||obj.toString().length()==0)?"-":obj); @@ -202,16 +203,16 @@ protected static void appendTimeField(StringBuilder builder, Object obj) { builder.append("-"); return; } - if(obj instanceof Header) { - String s = ((Header)obj).getValue().trim(); + if(obj instanceof HttpHeader) { + String s = ((HttpHeader)obj).getValue().trim(); try { - Date date = DateUtil.parseDate(s); + Date date = parseDate(s); String d = ArchiveUtils.get14DigitDate(date); if(d.startsWith("209")) { d = "199"+d.substring(3); } obj = d; - } catch (DateParseException e) { + } catch (ParseException e) { builder.append('e'); return; } @@ -219,6 +220,23 @@ protected static void appendTimeField(StringBuilder builder, Object obj) { } builder.append(obj); } + + private static Date parseDate(String s) throws ParseException { + SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); + format.setTimeZone(TimeZone.getTimeZone("GMT")); + format.set2DigitYearStart(new Date(946684800)); // year 2000 + try { + return format.parse(s); + } catch (ParseException e) { + try { + format.applyPattern("EEEE, dd-MMM-yy HH:mm:ss zzz"); + return format.parse(s); + } catch (ParseException e1) { + format.applyPattern("EEE MMM d HH:mm:ss yyyy"); + return format.parse(s); + } + } + } } //'wide' CDX diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index bacaca38..d3c036ba 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -32,12 +32,11 @@ import java.util.logging.Logger; import java.util.regex.Matcher; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.StatusLine; -import org.apache.commons.httpclient.util.EncodingUtil; import org.apache.commons.lang.StringUtils; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.io.ArchiveRecordHeader; +import org.archive.io.HeaderedArchiveRecord; import org.archive.io.RecoverableIOException; import org.archive.util.InetAddressUtil; import org.archive.util.LaxHttpParser; @@ -50,11 +49,11 @@ */ public class ARCRecord extends ArchiveRecord implements ARCConstants { /** - * Http status line object. + * Http status code. * - * May be null if record is not http. + * May be -1 if record is not http. */ - private StatusLine httpStatus = null; + private int statusCode = -1; /** * Http header bytes. @@ -69,7 +68,7 @@ public class ARCRecord extends ArchiveRecord implements ARCConstants { * * Only populated after reading of headers. */ - private Header [] httpHeaders = null; + private HttpHeader[] httpHeaders = null; /** * Array of field names. @@ -589,8 +588,8 @@ private InputStream readHttpHeader() throws IOException { "Failed to read http status where one was expected: " + ((statusBytes == null) ? "" : new String(statusBytes))); } - - statusLine = EncodingUtil.getString(statusBytes, 0, + + statusLine = new String(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); // If a null or DELETED break immediately @@ -600,7 +599,7 @@ private InputStream readHttpHeader() throws IOException { // If it's actually the status line, break, otherwise continue skipping any // previous header values - if (!statusLine.contains(":") && StatusLine.startsWithHTTP(statusLine)) { + if (!statusLine.contains(":") && statusLine.trim().startsWith("HTTP")) { break; } @@ -613,7 +612,7 @@ private InputStream readHttpHeader() throws IOException { } if ((statusLine == null) || - !StatusLine.startsWithHTTP(statusLine)) { + !statusLine.trim().startsWith("HTTP")) { if (statusLine.startsWith("DELETED")) { // Some old ARCs have deleted records like following: // http://vireo.gatech.edu:80/ebt-bin/nph-dweb/dynaweb/SGI_Developer/SGITCL_PG/@Generic__BookTocView/11108%3Btd%3D2 130.207.168.42 19991010131803 text/html 29202 @@ -629,13 +628,12 @@ private InputStream readHttpHeader() throws IOException { } } - try { - this.httpStatus = new StatusLine(statusLine); - } catch(IOException e) { - logger.warning(e.getMessage() + " at offset: " + h.getOffset()); - this.errors.add(ArcRecordErrors.HTTP_STATUS_LINE_EXCEPTION); + this.statusCode = HeaderedArchiveRecord.parseStatusCode(statusLine.trim()); + if (statusCode == -1) { + logger.warning("Bad status line at offset: " + h.getOffset()); + this.errors.add(ArcRecordErrors.HTTP_STATUS_LINE_EXCEPTION); } - + // Save off all bytes read. Keep them as bytes rather than // convert to strings so we don't have to worry about encodings // though this should never be a problem doing http headers since @@ -706,7 +704,7 @@ public DeletedARCRecordIOException(final String reason) { * @return Status code. */ public int getStatusCode() { - return (this.httpStatus == null)? -1: this.httpStatus.getStatusCode(); + return statusCode; } /** @@ -735,7 +733,7 @@ public ARCRecordMetaData getMetaData() { /** * @return http headers (Only available after header has been read). */ - public Header [] getHttpHeaders() { + public HttpHeader[] getHttpHeaders() { return this.httpHeaders; } diff --git a/src/main/java/org/archive/io/warc/WARCRecord.java b/src/main/java/org/archive/io/warc/WARCRecord.java index 635d1c3b..cf106270 100644 --- a/src/main/java/org/archive/io/warc/WARCRecord.java +++ b/src/main/java/org/archive/io/warc/WARCRecord.java @@ -29,8 +29,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpParser; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.io.ArchiveRecordHeader; import org.archive.util.LaxHttpParser; @@ -123,7 +122,7 @@ protected ArchiveRecordHeader parseHeaders(final InputStream in, // keep count of bytes read, digest and fail properly if EOR too soon... // We don't want digesting while reading Headers. // - Header [] h = LaxHttpParser.parseHeaders(in, WARC_HEADER_ENCODING); + HttpHeader[] h = LaxHttpParser.parseHeaders(in, WARC_HEADER_ENCODING); for (int i = 0; i < h.length; i++) { m.put(h[i].getName(), h[i].getValue()); } diff --git a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java index afb1c850..6e95270c 100644 --- a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java +++ b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java @@ -4,9 +4,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.util.logging.Logger; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.archive.format.http.HttpHeaders; import org.archive.format.json.JSONUtils; import org.archive.format.text.charset.CharsetDetector; @@ -25,7 +24,7 @@ public class HTMLResourceFactory implements ResourceFactory { - public static final Log LOG = LogFactory.getLog(HTMLResourceFactory.class); + private static final Logger LOG = Logger.getLogger(HTMLResourceFactory.class.getName()); protected static final int CHARSET_GUESS_CHUNK_SIZE = 8192; protected static final String HTTP_HEADER_PATH = "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers"; @@ -58,7 +57,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData, try { charset = charSetDetector.getCharset(chunk, chunkSize, httpHeaders); } catch (Exception e) { - LOG.error("Failed to guess charset: " + e.getMessage()); + LOG.severe("Failed to guess charset: " + e.getMessage()); } } diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index d7318dfd..57071460 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -18,13 +18,11 @@ */ package org.archive.url; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.BitSet; -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; -import org.apache.commons.httpclient.util.EncodingUtil; - /** * URI subclass which allows partial/inconsistent encoding, matching * the URIs which will be relayed in requests from popular web @@ -121,13 +119,12 @@ protected static String decode(String component, String charset) "Component array of chars may not be null"); } byte[] rawdata = null; - // try { - rawdata = LaxURLCodec.decodeUrlLoose(EncodingUtil - .getAsciiBytes(component)); - // } catch (DecoderException e) { - // throw new URIException(e.getMessage()); - // } - return EncodingUtil.getString(rawdata, charset); + rawdata = LaxURLCodec.decodeUrlLoose(component.getBytes(StandardCharsets.US_ASCII)); + try { + return new String(rawdata, charset); + } catch (UnsupportedEncodingException e) { + return new String(rawdata); + } } // overidden to lax() the acceptable-char BitSet passed in @@ -183,7 +180,7 @@ protected BitSet lax(BitSet generous) { * two instances to one where possible, slimming * instances. * - * @see org.apache.commons.httpclient.URI#parseAuthority(java.lang.String, boolean) + * @see URI#parseAuthority(java.lang.String, boolean) */ protected void parseAuthority(String original, boolean escaped) throws URIException { @@ -204,7 +201,7 @@ protected void parseAuthority(String original, boolean escaped) * long-lived instance from a static field, saving 12-14 bytes * per instance. * - * @see org.apache.commons.httpclient.URI#setURI() + * @see URI#setURI() */ protected void setURI() { if (_scheme != null) { diff --git a/src/main/java/org/archive/url/SURT.java b/src/main/java/org/archive/url/SURT.java index 2c8e1b02..3e0bcd55 100644 --- a/src/main/java/org/archive/url/SURT.java +++ b/src/main/java/org/archive/url/SURT.java @@ -6,7 +6,6 @@ import java.util.Iterator; import java.util.logging.Logger; -import org.apache.commons.httpclient.URIException; import org.archive.util.iterator.AbstractPeekableIterator; public class SURT { diff --git a/src/main/java/org/archive/url/SURTTokenizer.java b/src/main/java/org/archive/url/SURTTokenizer.java index da8f58f2..52b80a03 100644 --- a/src/main/java/org/archive/url/SURTTokenizer.java +++ b/src/main/java/org/archive/url/SURTTokenizer.java @@ -19,7 +19,6 @@ */ package org.archive.url; -import org.apache.commons.httpclient.URIException; import org.archive.util.SURT; /** diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java new file mode 100644 index 00000000..e420ca51 --- /dev/null +++ b/src/main/java/org/archive/url/URI.java @@ -0,0 +1,3978 @@ +/* + * $HeadURL: https://svn.apache.org/repos/asf/jakarta/httpcomponents/oac.hc3x/tags/HTTPCLIENT_3_1/src/java/org/apache/commons/httpclient/URI.java $ + * $Revision: 564973 $ + * $Date: 2007-08-11 22:51:47 +0200 (Sat, 11 Aug 2007) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.url; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.net.URLCodec; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Hashtable; +import java.util.Locale; + +/** + * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. + * This class has the purpose of supportting of parsing a URI reference to + * extend any specific protocols, the character encoding of the protocol to + * be transported and the charset of the document. + *

+ * A URI is always in an "escaped" form, since escaping or unescaping a + * completed URI might change its semantics. + *

+ * Implementers should be careful not to escape or unescape the same string + * more than once, since unescaping an already unescaped string might lead to + * misinterpreting a percent data character as another escaped character, + * or vice versa in the case of escaping an already escaped string. + *

+ * In order to avoid these problems, data types used as follows: + *

+ *   URI character sequence: char
+ *   octet sequence: byte
+ *   original character sequence: String
+ * 

+ * + * So, a URI is a sequence of characters as an array of a char type, which + * is not always represented as a sequence of octets as an array of byte. + *

+ * + * URI Syntactic Components + *

+ * - In general, written as follows:
+ *   Absolute URI = <scheme>:<scheme-specific-part>
+ *   Generic URI = <scheme>://<authority><path>?<query>
+ *
+ * - Syntax
+ *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
+ *   hier_part     = ( net_path | abs_path ) [ "?" query ]
+ *   net_path      = "//" authority [ abs_path ]
+ *   abs_path      = "/"  path_segments
+ * 

+ * + * The following examples illustrate URI that are in common use. + *

+ * ftp://ftp.is.co.za/rfc/rfc1808.txt
+ *    -- ftp scheme for File Transfer Protocol services
+ * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
+ *    -- gopher scheme for Gopher and Gopher+ Protocol services
+ * http://www.math.uio.no/faq/compression-faq/part1.html
+ *    -- http scheme for Hypertext Transfer Protocol services
+ * mailto:mduerst@ifi.unizh.ch
+ *    -- mailto scheme for electronic mail addresses
+ * news:comp.infosystems.www.servers.unix
+ *    -- news scheme for USENET news groups and articles
+ * telnet://melvyl.ucop.edu/
+ *    -- telnet scheme for interactive services via the TELNET Protocol
+ * 
+ * Please, notice that there are many modifications from URL(RFC 1738) and + * relative URL(RFC 1808). + *

+ * The expressions for a URI + *

+ * For escaped URI forms
+ *  - URI(char[]) // constructor
+ *  - char[] getRawXxx() // method
+ *  - String getEscapedXxx() // method
+ *  - String toString() // method
+ * 

+ * For unescaped URI forms + * - URI(String) // constructor + * - String getXXX() // method + *

+ * + * @author Sung-Gu + * @author Mike Bowler + * @version $Revision: 564973 $ $Date: 2002/03/14 15:14:01 + */ +class URI implements Cloneable, Comparable, Serializable { + + + // ----------------------------------------------------------- Constructors + + /** Create an instance as an internal use */ + protected URI() { + } + + /** + * Construct a URI from a string with the given charset. The input string can + * be either in escaped or unescaped form. + * + * @param s URI character sequence + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * @param charset the charset string to do escape encoding, if required + * + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if input string is null + * + * @see #getProtocolCharset + * + * @since 3.0 + */ + public URI(String s, boolean escaped, String charset) + throws URIException, NullPointerException { + protocolCharset = charset; + parseUriReference(s, escaped); + } + + /** + * Construct a URI from a string with the given charset. The input string can + * be either in escaped or unescaped form. + * + * @param s URI character sequence + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if input string is null + * + * @see #getProtocolCharset + * + * @since 3.0 + */ + public URI(String s, boolean escaped) + throws URIException, NullPointerException { + parseUriReference(s, escaped); + } + + /** + * Construct a URI as an escaped form of a character array with the given + * charset. + * + * @param escaped the URI character sequence + * @param charset the charset string to do escape encoding + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if escaped is null + * @see #getProtocolCharset + * + * @deprecated Use #URI(String, boolean, String) + */ + public URI(char[] escaped, String charset) + throws URIException, NullPointerException { + protocolCharset = charset; + parseUriReference(new String(escaped), true); + } + + + /** + * Construct a URI as an escaped form of a character array. + * An URI can be placed within double-quotes or angle brackets like + * "http://test.com/" and <http://test.com/> + * + * @param escaped the URI character sequence + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if escaped is null + * @see #getDefaultProtocolCharset + * + * @deprecated Use #URI(String, boolean) + */ + public URI(char[] escaped) + throws URIException, NullPointerException { + parseUriReference(new String(escaped), true); + } + + + /** + * Construct a URI from the given string with the given charset. + * + * @param original the string to be represented to URI character sequence + * It is one of absoluteURI and relativeURI. + * @param charset the charset string to do escape encoding + * @throws URIException If the URI cannot be created. + * @see #getProtocolCharset + * + * @deprecated Use #URI(String, boolean, String) + */ + public URI(String original, String charset) throws URIException { + protocolCharset = charset; + parseUriReference(original, false); + } + + + /** + * Construct a URI from the given string. + *

+     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+     * 

+ * An URI can be placed within double-quotes or angle brackets like + * "http://test.com/" and <http://test.com/> + * + * @param original the string to be represented to URI character sequence + * It is one of absoluteURI and relativeURI. + * @throws URIException If the URI cannot be created. + * @see #getDefaultProtocolCharset + * + * @deprecated Use #URI(String, boolean) + */ + public URI(String original) throws URIException { + parseUriReference(original, false); + } + + + /** + * Construct a general URI from the given components. + *

+     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
+     *   opaque_part   = uric_no_slash *uric
+     * 

+ * It's for absolute URI = <scheme>:<scheme-specific-part># + * <fragment>. + * + * @param scheme the scheme string + * @param schemeSpecificPart scheme_specific_part + * @param fragment the fragment string + * @throws URIException If the URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String schemeSpecificPart, String fragment) + throws URIException { + + // validate and contruct the URI character sequence + if (scheme == null) { + throw new URIException(URIException.PARSING, "scheme required"); + } + char[] s = scheme.toLowerCase().toCharArray(); + if (validate(s, URI.scheme)) { + _scheme = s; // is_absoluteURI + } else { + throw new URIException(URIException.PARSING, "incorrect scheme"); + } + _opaque = encode(schemeSpecificPart, allowed_opaque_part, + getProtocolCharset()); + // Set flag + _is_opaque_part = true; + _fragment = fragment == null ? null : fragment.toCharArray(); + setURI(); + } + + + /** + * Construct a general URI from the given components. + *

+     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
+     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+     *   hier_part     = ( net_path | abs_path ) [ "?" query ]
+     * 

+ * It's for absolute URI = <scheme>:<path>?<query>#< + * fragment> and relative URI = <path>?<query>#<fragment + * >. + * + * @param scheme the scheme string + * @param authority the authority string + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String authority, String path, String query, + String fragment) throws URIException { + + // validate and contruct the URI character sequence + StringBuffer buff = new StringBuffer(); + if (scheme != null) { + buff.append(scheme); + buff.append(':'); + } + if (authority != null) { + buff.append("//"); + buff.append(authority); + } + if (path != null) { // accept empty path + if ((scheme != null || authority != null) + && !path.startsWith("/")) { + throw new URIException(URIException.PARSING, + "abs_path requested"); + } + buff.append(path); + } + if (query != null) { + buff.append('?'); + buff.append(query); + } + if (fragment != null) { + buff.append('#'); + buff.append(fragment); + } + parseUriReference(buff.toString(), false); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port) + throws URIException { + + this(scheme, userinfo, host, port, null, null, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path) throws URIException { + + this(scheme, userinfo, host, port, path, null, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path, String query) throws URIException { + + this(scheme, userinfo, host, port, path, query, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path, String query, String fragment) throws URIException { + + this(scheme, (host == null) ? null + : ((userinfo != null) ? userinfo + '@' : "") + host + + ((port != -1) ? ":" + port : ""), path, query, fragment); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param host the host string + * @param path the path string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String host, String path, String fragment) + throws URIException { + + this(scheme, host, path, null, fragment); + } + + + /** + * Construct a general URI with the given relative URI string. + * + * @param base the base URI + * @param relative the relative URI string + * @throws URIException If the new URI cannot be created. + * + * @deprecated Use #URI(URI, String, boolean) + */ + public URI(URI base, String relative) throws URIException { + this(base, new URI(relative)); + } + + + /** + * Construct a general URI with the given relative URI string. + * + * @param base the base URI + * @param relative the relative URI string + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * + * @throws URIException If the new URI cannot be created. + * + * @since 3.0 + */ + public URI(URI base, String relative, boolean escaped) throws URIException { + this(base, new URI(relative, escaped)); + } + + + /** + * Construct a general URI with the given relative URI. + *

+     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+     * 

+ * Resolving Relative References to Absolute Form. + * + * Examples of Resolving Relative URI References + * + * Within an object with a well-defined base URI of + *

+     *   http://a/b/c/d;p?q
+     * 

+ * the relative URI would be resolved as follows: + * + * Normal Examples + * + *

+     *   g:h           =  g:h
+     *   g             =  http://a/b/c/g
+     *   ./g           =  http://a/b/c/g
+     *   g/            =  http://a/b/c/g/
+     *   /g            =  http://a/g
+     *   //g           =  http://g
+     *   ?y            =  http://a/b/c/?y
+     *   g?y           =  http://a/b/c/g?y
+     *   #s            =  (current document)#s
+     *   g#s           =  http://a/b/c/g#s
+     *   g?y#s         =  http://a/b/c/g?y#s
+     *   ;x            =  http://a/b/c/;x
+     *   g;x           =  http://a/b/c/g;x
+     *   g;x?y#s       =  http://a/b/c/g;x?y#s
+     *   .             =  http://a/b/c/
+     *   ./            =  http://a/b/c/
+     *   ..            =  http://a/b/
+     *   ../           =  http://a/b/
+     *   ../g          =  http://a/b/g
+     *   ../..         =  http://a/
+     *   ../../        =  http://a/ 
+     *   ../../g       =  http://a/g
+     * 

+ * + * Some URI schemes do not allow a hierarchical syntax matching the + * syntax, and thus cannot use relative references. + * + * @param base the base URI + * @param relative the relative URI + * @throws URIException If the new URI cannot be created. + */ + public URI(URI base, URI relative) throws URIException { + + if (base._scheme == null) { + throw new URIException(URIException.PARSING, "base URI required"); + } + if (base._scheme != null) { + this._scheme = base._scheme; + this._authority = base._authority; + this._is_net_path = base._is_net_path; + } + if (base._is_opaque_part || relative._is_opaque_part) { + this._scheme = base._scheme; + this._is_opaque_part = base._is_opaque_part + || relative._is_opaque_part; + this._opaque = relative._opaque; + this._fragment = relative._fragment; + this.setURI(); + return; + } + boolean schemesEqual = Arrays.equals(base._scheme,relative._scheme); + if (relative._scheme != null + && (!schemesEqual || relative._authority != null)) { + this._scheme = relative._scheme; + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._is_server = relative._is_server; + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } else if (base._authority != null && relative._scheme == null) { + this._is_net_path = base._is_net_path; + this._authority = base._authority; + if (base._is_server) { + this._is_server = base._is_server; + this._userinfo = base._userinfo; + this._host = base._host; + this._port = base._port; + } else if (base._is_reg_name) { + this._is_reg_name = base._is_reg_name; + } + } + if (relative._authority != null) { + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._is_server = relative._is_server; + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } + // resolve the path and query if necessary + if (relative._authority == null + && (relative._scheme == null || schemesEqual)) { + if ((relative._path == null || relative._path.length == 0) + && relative._query == null) { + // handle a reference to the current document, see RFC 2396 + // section 5.2 step 2 + this._path = base._path; + this._query = base._query; + } else { + this._path = resolvePath(base._path, relative._path); + } + } + // base._query removed + if (relative._query != null) { + this._query = relative._query; + } + // base._fragment removed + if (relative._fragment != null) { + this._fragment = relative._fragment; + } + this.setURI(); + // reparse the newly built URI, this will ensure that all flags are set correctly. + // TODO there must be a better way to do this + parseUriReference(new String(_uri), true); + } + + // --------------------------------------------------- Instance Variables + + /** Version ID for serialization */ + static final long serialVersionUID = 604752400577948726L; + + + /** + * Cache the hash code for this URI. + */ + protected int hash = 0; + + + /** + * This Uniform Resource Identifier (URI). + * The URI is always in an "escaped" form, since escaping or unescaping + * a completed URI might change its semantics. + */ + protected char[] _uri = null; + + + /** + * The charset of the protocol used by this URI instance. + */ + protected String protocolCharset = null; + + + /** + * The default charset of the protocol. RFC 2277, 2396 + */ + protected static String defaultProtocolCharset = "UTF-8"; + + + /** + * The default charset of the document. RFC 2277, 2396 + * The platform's charset is used for the document by default. + */ + protected static String defaultDocumentCharset = null; + protected static String defaultDocumentCharsetByLocale = null; + protected static String defaultDocumentCharsetByPlatform = null; + // Static initializer for defaultDocumentCharset + static { + Locale locale = Locale.getDefault(); + // in order to support backward compatiblity + if (locale != null) { + defaultDocumentCharsetByLocale = + LocaleToCharsetMap.getCharset(locale); + // set the default document charset + defaultDocumentCharset = defaultDocumentCharsetByLocale; + } + // in order to support platform encoding + try { + defaultDocumentCharsetByPlatform = System.getProperty("file.encoding"); + } catch (SecurityException ignore) { + } + if (defaultDocumentCharset == null) { + // set the default document charset + defaultDocumentCharset = defaultDocumentCharsetByPlatform; + } + } + + + /** + * The scheme. + */ + protected char[] _scheme = null; + + + /** + * The opaque. + */ + protected char[] _opaque = null; + + + /** + * The authority. + */ + protected char[] _authority = null; + + + /** + * The userinfo. + */ + protected char[] _userinfo = null; + + + /** + * The host. + */ + protected char[] _host = null; + + + /** + * The port. + */ + protected int _port = -1; + + + /** + * The path. + */ + protected char[] _path = null; + + + /** + * The query. + */ + protected char[] _query = null; + + + /** + * The fragment. + */ + protected char[] _fragment = null; + + + /** + * The root path. + */ + protected static final char[] rootPath = { '/' }; + + // ---------------------- Generous characters for each component validation + + /** + * The percent "%" character always has the reserved purpose of being the + * escape indicator, it must be escaped as "%25" in order to be used as + * data within a URI. + */ + protected static final BitSet percent = new BitSet(256); + // Static initializer for percent + static { + percent.set('%'); + } + + + /** + * BitSet for digit. + *

+     * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
+     *            "8" | "9"
+     * 

+ */ + protected static final BitSet digit = new BitSet(256); + // Static initializer for digit + static { + for (int i = '0'; i <= '9'; i++) { + digit.set(i); + } + } + + + /** + * BitSet for alpha. + *

+     * alpha         = lowalpha | upalpha
+     * 

+ */ + protected static final BitSet alpha = new BitSet(256); + // Static initializer for alpha + static { + for (int i = 'a'; i <= 'z'; i++) { + alpha.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + alpha.set(i); + } + } + + + /** + * BitSet for alphanum (join of alpha & digit). + *

+     *  alphanum      = alpha | digit
+     * 

+ */ + protected static final BitSet alphanum = new BitSet(256); + // Static initializer for alphanum + static { + alphanum.or(alpha); + alphanum.or(digit); + } + + + /** + * BitSet for hex. + *

+     * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
+     *                         "a" | "b" | "c" | "d" | "e" | "f"
+     * 

+ */ + protected static final BitSet hex = new BitSet(256); + // Static initializer for hex + static { + hex.or(digit); + for (int i = 'a'; i <= 'f'; i++) { + hex.set(i); + } + for (int i = 'A'; i <= 'F'; i++) { + hex.set(i); + } + } + + + /** + * BitSet for escaped. + *

+     * escaped       = "%" hex hex
+     * 

+ */ + protected static final BitSet escaped = new BitSet(256); + // Static initializer for escaped + static { + escaped.or(percent); + escaped.or(hex); + } + + + /** + * BitSet for mark. + *

+     * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
+     *                 "(" | ")"
+     * 

+ */ + protected static final BitSet mark = new BitSet(256); + // Static initializer for mark + static { + mark.set('-'); + mark.set('_'); + mark.set('.'); + mark.set('!'); + mark.set('~'); + mark.set('*'); + mark.set('\''); + mark.set('('); + mark.set(')'); + } + + + /** + * Data characters that are allowed in a URI but do not have a reserved + * purpose are called unreserved. + *

+     * unreserved    = alphanum | mark
+     * 

+ */ + protected static final BitSet unreserved = new BitSet(256); + // Static initializer for unreserved + static { + unreserved.or(alphanum); + unreserved.or(mark); + } + + + /** + * BitSet for reserved. + *

+     * reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
+     *                 "$" | ","
+     * 

+ */ + protected static final BitSet reserved = new BitSet(256); + // Static initializer for reserved + static { + reserved.set(';'); + reserved.set('/'); + reserved.set('?'); + reserved.set(':'); + reserved.set('@'); + reserved.set('&'); + reserved.set('='); + reserved.set('+'); + reserved.set('$'); + reserved.set(','); + } + + + /** + * BitSet for uric. + *

+     * uric          = reserved | unreserved | escaped
+     * 

+ */ + protected static final BitSet uric = new BitSet(256); + // Static initializer for uric + static { + uric.or(reserved); + uric.or(unreserved); + uric.or(escaped); + } + + + /** + * BitSet for fragment (alias for uric). + *

+     * fragment      = *uric
+     * 

+ */ + protected static final BitSet fragment = uric; + + + /** + * BitSet for query (alias for uric). + *

+     * query         = *uric
+     * 

+ */ + protected static final BitSet query = uric; + + + /** + * BitSet for pchar. + *

+     * pchar         = unreserved | escaped |
+     *                 ":" | "@" | "&" | "=" | "+" | "$" | ","
+     * 

+ */ + protected static final BitSet pchar = new BitSet(256); + // Static initializer for pchar + static { + pchar.or(unreserved); + pchar.or(escaped); + pchar.set(':'); + pchar.set('@'); + pchar.set('&'); + pchar.set('='); + pchar.set('+'); + pchar.set('$'); + pchar.set(','); + } + + + /** + * BitSet for param (alias for pchar). + *

+     * param         = *pchar
+     * 

+ */ + protected static final BitSet param = pchar; + + + /** + * BitSet for segment. + *

+     * segment       = *pchar *( ";" param )
+     * 

+ */ + protected static final BitSet segment = new BitSet(256); + // Static initializer for segment + static { + segment.or(pchar); + segment.set(';'); + segment.or(param); + } + + + /** + * BitSet for path segments. + *

+     * path_segments = segment *( "/" segment )
+     * 

+ */ + protected static final BitSet path_segments = new BitSet(256); + // Static initializer for path_segments + static { + path_segments.set('/'); + path_segments.or(segment); + } + + + /** + * URI absolute path. + *

+     * abs_path      = "/"  path_segments
+     * 

+ */ + protected static final BitSet abs_path = new BitSet(256); + // Static initializer for abs_path + static { + abs_path.set('/'); + abs_path.or(path_segments); + } + + + /** + * URI bitset for encoding typical non-slash characters. + *

+     * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
+     *                 "&" | "=" | "+" | "$" | ","
+     * 

+ */ + protected static final BitSet uric_no_slash = new BitSet(256); + // Static initializer for uric_no_slash + static { + uric_no_slash.or(unreserved); + uric_no_slash.or(escaped); + uric_no_slash.set(';'); + uric_no_slash.set('?'); + uric_no_slash.set(';'); + uric_no_slash.set('@'); + uric_no_slash.set('&'); + uric_no_slash.set('='); + uric_no_slash.set('+'); + uric_no_slash.set('$'); + uric_no_slash.set(','); + } + + + /** + * URI bitset that combines uric_no_slash and uric. + *

+     * opaque_part   = uric_no_slash *uric
+     * 

+ */ + protected static final BitSet opaque_part = new BitSet(256); + // Static initializer for opaque_part + static { + // it's generous. because first character must not include a slash + opaque_part.or(uric_no_slash); + opaque_part.or(uric); + } + + + /** + * URI bitset that combines absolute path and opaque part. + *

+     * path          = [ abs_path | opaque_part ]
+     * 

+ */ + protected static final BitSet path = new BitSet(256); + // Static initializer for path + static { + path.or(abs_path); + path.or(opaque_part); + } + + + /** + * Port, a logical alias for digit. + */ + protected static final BitSet port = digit; + + + /** + * Bitset that combines digit and dot fo IPv$address. + *

+     * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
+     * 

+ */ + protected static final BitSet IPv4address = new BitSet(256); + // Static initializer for IPv4address + static { + IPv4address.or(digit); + IPv4address.set('.'); + } + + + /** + * RFC 2373. + *

+     * IPv6address = hexpart [ ":" IPv4address ]
+     * 

+ */ + protected static final BitSet IPv6address = new BitSet(256); + // Static initializer for IPv6address reference + static { + IPv6address.or(hex); // hexpart + IPv6address.set(':'); + IPv6address.or(IPv4address); + } + + + /** + * RFC 2732, 2373. + *

+     * IPv6reference   = "[" IPv6address "]"
+     * 

+ */ + protected static final BitSet IPv6reference = new BitSet(256); + // Static initializer for IPv6reference + static { + IPv6reference.set('['); + IPv6reference.or(IPv6address); + IPv6reference.set(']'); + } + + + /** + * BitSet for toplabel. + *

+     * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
+     * 

+ */ + protected static final BitSet toplabel = new BitSet(256); + // Static initializer for toplabel + static { + toplabel.or(alphanum); + toplabel.set('-'); + } + + + /** + * BitSet for domainlabel. + *

+     * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
+     * 

+ */ + protected static final BitSet domainlabel = toplabel; + + + /** + * BitSet for hostname. + *

+     * hostname      = *( domainlabel "." ) toplabel [ "." ]
+     * 

+ */ + protected static final BitSet hostname = new BitSet(256); + // Static initializer for hostname + static { + hostname.or(toplabel); + // hostname.or(domainlabel); + hostname.set('.'); + } + + + /** + * BitSet for host. + *

+     * host          = hostname | IPv4address | IPv6reference
+     * 

+ */ + protected static final BitSet host = new BitSet(256); + // Static initializer for host + static { + host.or(hostname); + // host.or(IPv4address); + host.or(IPv6reference); // IPv4address + } + + + /** + * BitSet for hostport. + *

+     * hostport      = host [ ":" port ]
+     * 

+ */ + protected static final BitSet hostport = new BitSet(256); + // Static initializer for hostport + static { + hostport.or(host); + hostport.set(':'); + hostport.or(port); + } + + + /** + * Bitset for userinfo. + *

+     * userinfo      = *( unreserved | escaped |
+     *                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
+     * 

+ */ + protected static final BitSet userinfo = new BitSet(256); + // Static initializer for userinfo + static { + userinfo.or(unreserved); + userinfo.or(escaped); + userinfo.set(';'); + userinfo.set(':'); + userinfo.set('&'); + userinfo.set('='); + userinfo.set('+'); + userinfo.set('$'); + userinfo.set(','); + } + + + /** + * BitSet for within the userinfo component like user and password. + */ + public static final BitSet within_userinfo = new BitSet(256); + // Static initializer for within_userinfo + static { + within_userinfo.or(userinfo); + within_userinfo.clear(';'); // reserved within authority + within_userinfo.clear(':'); + within_userinfo.clear('@'); + within_userinfo.clear('?'); + within_userinfo.clear('/'); + } + + + /** + * Bitset for server. + *

+     * server        = [ [ userinfo "@" ] hostport ]
+     * 

+ */ + protected static final BitSet server = new BitSet(256); + // Static initializer for server + static { + server.or(userinfo); + server.set('@'); + server.or(hostport); + } + + + /** + * BitSet for reg_name. + *

+     * reg_name      = 1*( unreserved | escaped | "$" | "," |
+     *                     ";" | ":" | "@" | "&" | "=" | "+" )
+     * 

+ */ + protected static final BitSet reg_name = new BitSet(256); + // Static initializer for reg_name + static { + reg_name.or(unreserved); + reg_name.or(escaped); + reg_name.set('$'); + reg_name.set(','); + reg_name.set(';'); + reg_name.set(':'); + reg_name.set('@'); + reg_name.set('&'); + reg_name.set('='); + reg_name.set('+'); + } + + + /** + * BitSet for authority. + *

+     * authority     = server | reg_name
+     * 

+ */ + protected static final BitSet authority = new BitSet(256); + // Static initializer for authority + static { + authority.or(server); + authority.or(reg_name); + } + + + /** + * BitSet for scheme. + *

+     * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
+     * 

+ */ + protected static final BitSet scheme = new BitSet(256); + // Static initializer for scheme + static { + scheme.or(alpha); + scheme.or(digit); + scheme.set('+'); + scheme.set('-'); + scheme.set('.'); + } + + + /** + * BitSet for rel_segment. + *

+     * rel_segment   = 1*( unreserved | escaped |
+     *                     ";" | "@" | "&" | "=" | "+" | "$" | "," )
+     * 

+ */ + protected static final BitSet rel_segment = new BitSet(256); + // Static initializer for rel_segment + static { + rel_segment.or(unreserved); + rel_segment.or(escaped); + rel_segment.set(';'); + rel_segment.set('@'); + rel_segment.set('&'); + rel_segment.set('='); + rel_segment.set('+'); + rel_segment.set('$'); + rel_segment.set(','); + } + + + /** + * BitSet for rel_path. + *

+     * rel_path      = rel_segment [ abs_path ]
+     * 

+ */ + protected static final BitSet rel_path = new BitSet(256); + // Static initializer for rel_path + static { + rel_path.or(rel_segment); + rel_path.or(abs_path); + } + + + /** + * BitSet for net_path. + *

+     * net_path      = "//" authority [ abs_path ]
+     * 

+ */ + protected static final BitSet net_path = new BitSet(256); + // Static initializer for net_path + static { + net_path.set('/'); + net_path.or(authority); + net_path.or(abs_path); + } + + + /** + * BitSet for hier_part. + *

+     * hier_part     = ( net_path | abs_path ) [ "?" query ]
+     * 

+ */ + protected static final BitSet hier_part = new BitSet(256); + // Static initializer for hier_part + static { + hier_part.or(net_path); + hier_part.or(abs_path); + // hier_part.set('?'); aleady included + hier_part.or(query); + } + + + /** + * BitSet for relativeURI. + *

+     * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+     * 

+ */ + protected static final BitSet relativeURI = new BitSet(256); + // Static initializer for relativeURI + static { + relativeURI.or(net_path); + relativeURI.or(abs_path); + relativeURI.or(rel_path); + // relativeURI.set('?'); aleady included + relativeURI.or(query); + } + + + /** + * BitSet for absoluteURI. + *

+     * absoluteURI   = scheme ":" ( hier_part | opaque_part )
+     * 

+ */ + protected static final BitSet absoluteURI = new BitSet(256); + // Static initializer for absoluteURI + static { + absoluteURI.or(scheme); + absoluteURI.set(':'); + absoluteURI.or(hier_part); + absoluteURI.or(opaque_part); + } + + + /** + * BitSet for URI-reference. + *

+     * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+     * 

+ */ + protected static final BitSet URI_reference = new BitSet(256); + // Static initializer for URI_reference + static { + URI_reference.or(absoluteURI); + URI_reference.or(relativeURI); + URI_reference.set('#'); + URI_reference.or(fragment); + } + + // ---------------------------- Characters disallowed within the URI syntax + // Excluded US-ASCII Characters are like control, space, delims and unwise + + /** + * BitSet for control. + */ + public static final BitSet control = new BitSet(256); + // Static initializer for control + static { + for (int i = 0; i <= 0x1F; i++) { + control.set(i); + } + control.set(0x7F); + } + + /** + * BitSet for space. + */ + public static final BitSet space = new BitSet(256); + // Static initializer for space + static { + space.set(0x20); + } + + + /** + * BitSet for delims. + */ + public static final BitSet delims = new BitSet(256); + // Static initializer for delims + static { + delims.set('<'); + delims.set('>'); + delims.set('#'); + delims.set('%'); + delims.set('"'); + } + + + /** + * BitSet for unwise. + */ + public static final BitSet unwise = new BitSet(256); + // Static initializer for unwise + static { + unwise.set('{'); + unwise.set('}'); + unwise.set('|'); + unwise.set('\\'); + unwise.set('^'); + unwise.set('['); + unwise.set(']'); + unwise.set('`'); + } + + + /** + * Disallowed rel_path before escaping. + */ + public static final BitSet disallowed_rel_path = new BitSet(256); + // Static initializer for disallowed_rel_path + static { + disallowed_rel_path.or(uric); + disallowed_rel_path.andNot(rel_path); + } + + + /** + * Disallowed opaque_part before escaping. + */ + public static final BitSet disallowed_opaque_part = new BitSet(256); + // Static initializer for disallowed_opaque_part + static { + disallowed_opaque_part.or(uric); + disallowed_opaque_part.andNot(opaque_part); + } + + // ----------------------- Characters allowed within and for each component + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_authority = new BitSet(256); + // Static initializer for allowed_authority + static { + allowed_authority.or(authority); + allowed_authority.clear('%'); + } + + + /** + * Those characters that are allowed for the opaque_part. + */ + public static final BitSet allowed_opaque_part = new BitSet(256); + // Static initializer for allowed_opaque_part + static { + allowed_opaque_part.or(opaque_part); + allowed_opaque_part.clear('%'); + } + + + /** + * Those characters that are allowed for the reg_name. + */ + public static final BitSet allowed_reg_name = new BitSet(256); + // Static initializer for allowed_reg_name + static { + allowed_reg_name.or(reg_name); + // allowed_reg_name.andNot(percent); + allowed_reg_name.clear('%'); + } + + + /** + * Those characters that are allowed for the userinfo component. + */ + public static final BitSet allowed_userinfo = new BitSet(256); + // Static initializer for allowed_userinfo + static { + allowed_userinfo.or(userinfo); + // allowed_userinfo.andNot(percent); + allowed_userinfo.clear('%'); + } + + + /** + * Those characters that are allowed for within the userinfo component. + */ + public static final BitSet allowed_within_userinfo = new BitSet(256); + // Static initializer for allowed_within_userinfo + static { + allowed_within_userinfo.or(within_userinfo); + allowed_within_userinfo.clear('%'); + } + + + /** + * Those characters that are allowed for the IPv6reference component. + * The characters '[', ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_IPv6reference = new BitSet(256); + // Static initializer for allowed_IPv6reference + static { + allowed_IPv6reference.or(IPv6reference); + // allowed_IPv6reference.andNot(unwise); + allowed_IPv6reference.clear('['); + allowed_IPv6reference.clear(']'); + } + + + /** + * Those characters that are allowed for the host component. + * The characters '[', ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_host = new BitSet(256); + // Static initializer for allowed_host + static { + allowed_host.or(hostname); + allowed_host.or(allowed_IPv6reference); + } + + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_within_authority = new BitSet(256); + // Static initializer for allowed_within_authority + static { + allowed_within_authority.or(server); + allowed_within_authority.or(reg_name); + allowed_within_authority.clear(';'); + allowed_within_authority.clear(':'); + allowed_within_authority.clear('@'); + allowed_within_authority.clear('?'); + allowed_within_authority.clear('/'); + } + + + /** + * Those characters that are allowed for the abs_path. + */ + public static final BitSet allowed_abs_path = new BitSet(256); + // Static initializer for allowed_abs_path + static { + allowed_abs_path.or(abs_path); + // allowed_abs_path.set('/'); // aleady included + allowed_abs_path.andNot(percent); + allowed_abs_path.clear('+'); + } + + + /** + * Those characters that are allowed for the rel_path. + */ + public static final BitSet allowed_rel_path = new BitSet(256); + // Static initializer for allowed_rel_path + static { + allowed_rel_path.or(rel_path); + allowed_rel_path.clear('%'); + allowed_rel_path.clear('+'); + } + + + /** + * Those characters that are allowed within the path. + */ + public static final BitSet allowed_within_path = new BitSet(256); + // Static initializer for allowed_within_path + static { + allowed_within_path.or(abs_path); + allowed_within_path.clear('/'); + allowed_within_path.clear(';'); + allowed_within_path.clear('='); + allowed_within_path.clear('?'); + } + + + /** + * Those characters that are allowed for the query component. + */ + public static final BitSet allowed_query = new BitSet(256); + // Static initializer for allowed_query + static { + allowed_query.or(uric); + allowed_query.clear('%'); + } + + + /** + * Those characters that are allowed within the query component. + */ + public static final BitSet allowed_within_query = new BitSet(256); + // Static initializer for allowed_within_query + static { + allowed_within_query.or(allowed_query); + allowed_within_query.andNot(reserved); // excluded 'reserved' + } + + + /** + * Those characters that are allowed for the fragment component. + */ + public static final BitSet allowed_fragment = new BitSet(256); + // Static initializer for allowed_fragment + static { + allowed_fragment.or(uric); + allowed_fragment.clear('%'); + } + + // ------------------------------------------- Flags for this URI-reference + + // TODO: Figure out what all these variables are for and provide javadoc + + // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + // absoluteURI = scheme ":" ( hier_part | opaque_part ) + protected boolean _is_hier_part; + protected boolean _is_opaque_part; + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + // hier_part = ( net_path | abs_path ) [ "?" query ] + protected boolean _is_net_path; + protected boolean _is_abs_path; + protected boolean _is_rel_path; + // net_path = "//" authority [ abs_path ] + // authority = server | reg_name + protected boolean _is_reg_name; + protected boolean _is_server; // = _has_server + // server = [ [ userinfo "@" ] hostport ] + // host = hostname | IPv4address | IPv6reference + protected boolean _is_hostname; + protected boolean _is_IPv4address; + protected boolean _is_IPv6reference; + + // ------------------------------------------ Character and escape encoding + + /** + * Encodes URI string. + * + * This is a two mapping, one from original characters to octets, and + * subsequently a second from octets to URI characters: + *

+     *   original character sequence->octet sequence->URI character sequence
+     * 

+ * + * An escaped octet is encoded as a character triplet, consisting of the + * percent character "%" followed by the two hexadecimal digits + * representing the octet code. For example, "%20" is the escaped + * encoding for the US-ASCII space character. + *

+ * Conversion from the local filesystem character set to UTF-8 will + * normally involve a two step process. First convert the local character + * set to the UCS; then convert the UCS to UTF-8. + * The first step in the process can be performed by maintaining a mapping + * table that includes the local character set code and the corresponding + * UCS code. + * The next step is to convert the UCS character code to the UTF-8 encoding. + *

+ * Mapping between vendor codepages can be done in a very similar manner + * as described above. + *

+ * The only time escape encodings can allowedly be made is when a URI is + * being created from its component parts. The escape and validate methods + * are internally performed within this method. + * + * @param original the original character sequence + * @param allowed those characters that are allowed within a component + * @param charset the protocol charset + * @return URI character sequence + * @throws URIException null component or unsupported character encoding + */ + + protected static char[] encode(String original, BitSet allowed, + String charset) throws URIException { + if (original == null) { + throw new IllegalArgumentException("Original string may not be null"); + } + if (allowed == null) { + throw new IllegalArgumentException("Allowed bitset may not be null"); + } + byte[] rawdata = URLCodec.encodeUrl(allowed, getBytes(original, charset)); + return new String(rawdata, StandardCharsets.US_ASCII).toCharArray(); + } + + private static byte[] getBytes(String original, String charset) { + try { + return original.getBytes(charset); + } catch (UnsupportedEncodingException e) { + return original.getBytes(); + } + } + + /** + * Decodes URI encoded string. + * + * This is a two mapping, one from URI characters to octets, and + * subsequently a second from octets to original characters: + *

+     *   URI character sequence->octet sequence->original character sequence
+     * 

+ * + * A URI must be separated into its components before the escaped + * characters within those components can be allowedly decoded. + *

+ * Notice that there is a chance that URI characters that are non UTF-8 + * may be parsed as valid UTF-8. A recent non-scientific analysis found + * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a + * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% + * false reading. + *

+ * The percent "%" character always has the reserved purpose of being + * the escape indicator, it must be escaped as "%25" in order to be used + * as data within a URI. + *

+ * The unescape method is internally performed within this method. + * + * @param component the URI character sequence + * @param charset the protocol charset + * @return original character sequence + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + */ + protected static String decode(char[] component, String charset) + throws URIException { + if (component == null) { + throw new IllegalArgumentException("Component array of chars may not be null"); + } + return decode(new String(component), charset); + } + + /** + * Decodes URI encoded string. + * + * This is a two mapping, one from URI characters to octets, and + * subsequently a second from octets to original characters: + *

+     *   URI character sequence->octet sequence->original character sequence
+     * 

+ * + * A URI must be separated into its components before the escaped + * characters within those components can be allowedly decoded. + *

+ * Notice that there is a chance that URI characters that are non UTF-8 + * may be parsed as valid UTF-8. A recent non-scientific analysis found + * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a + * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% + * false reading. + *

+ * The percent "%" character always has the reserved purpose of being + * the escape indicator, it must be escaped as "%25" in order to be used + * as data within a URI. + *

+ * The unescape method is internally performed within this method. + * + * @param component the URI character sequence + * @param charset the protocol charset + * @return original character sequence + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * + * @since 3.0 + */ + protected static String decode(String component, String charset) + throws URIException { + if (component == null) { + throw new IllegalArgumentException("Component array of chars may not be null"); + } + byte[] rawdata = null; + try { + rawdata = URLCodec.decodeUrl(component.getBytes(StandardCharsets.US_ASCII)); + } catch (DecoderException e) { + throw new URIException(e.getMessage()); + } + try { + return new String(rawdata, charset); + } catch (UnsupportedEncodingException e) { + return new String(rawdata); + } + } + /** + * Pre-validate the unescaped URI string within a specific component. + * + * @param component the component string within the component + * @param disallowed those characters disallowed within the component + * @return if true, it doesn't have the disallowed characters + * if false, the component is undefined or an incorrect one + */ + protected boolean prevalidate(String component, BitSet disallowed) { + // prevalidate the given component by disallowed characters + if (component == null) { + return false; // undefined + } + char[] target = component.toCharArray(); + for (int i = 0; i < target.length; i++) { + if (disallowed.get(target[i])) { + return false; + } + } + return true; + } + + + /** + * Validate the URI characters within a specific component. + * The component must be performed after escape encoding. Or it doesn't + * include escaped characters. + * + * @param component the characters sequence within the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + */ + protected boolean validate(char[] component, BitSet generous) { + // validate each component by generous characters + return validate(component, 0, -1, generous); + } + + + /** + * Validate the URI characters within a specific component. + * The component must be performed after escape encoding. Or it doesn't + * include escaped characters. + *

+ * It's not that much strict, generous. The strict validation might be + * performed before being called this method. + * + * @param component the characters sequence within the component + * @param soffset the starting offset of the given component + * @param eoffset the ending offset of the given component + * if -1, it means the length of the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + */ + protected boolean validate(char[] component, int soffset, int eoffset, + BitSet generous) { + // validate each component by generous characters + if (eoffset == -1) { + eoffset = component.length - 1; + } + for (int i = soffset; i <= eoffset; i++) { + if (!generous.get(component[i])) { + return false; + } + } + return true; + } + + + /** + * In order to avoid any possilbity of conflict with non-ASCII characters, + * Parse a URI reference as a String with the character + * encoding of the local system or the document. + *

+ * The following line is the regular expression for breaking-down a URI + * reference into its components. + *

+     *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+     *    12            3  4          5       6  7        8 9
+     * 

+ * For example, matching the above expression to + * http://jakarta.apache.org/ietf/uri/#Related + * results in the following subexpression matches: + *

+     *               $1 = http:
+     *  scheme    =  $2 = http
+     *               $3 = //jakarta.apache.org
+     *  authority =  $4 = jakarta.apache.org
+     *  path      =  $5 = /ietf/uri/
+     *               $6 = 
+     *  query     =  $7 = 
+     *               $8 = #Related
+     *  fragment  =  $9 = Related
+     * 

+ * + * @param original the original character sequence + * @param escaped true if original is escaped + * @throws URIException If an error occurs. + */ + protected void parseUriReference(String original, boolean escaped) + throws URIException { + + // validate and contruct the URI character sequence + if (original == null) { + throw new URIException("URI-Reference required"); + } + + /* @ + * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + */ + String tmp = original.trim(); + + /* + * The length of the string sequence of characters. + * It may not be equal to the length of the byte array. + */ + int length = tmp.length(); + + /* + * Remove the delimiters like angle brackets around an URI. + */ + if (length > 0) { + char[] firstDelimiter = { tmp.charAt(0) }; + if (validate(firstDelimiter, delims)) { + if (length >= 2) { + char[] lastDelimiter = { tmp.charAt(length - 1) }; + if (validate(lastDelimiter, delims)) { + tmp = tmp.substring(1, length - 1); + length = length - 2; + } + } + } + } + + /* + * The starting index + */ + int from = 0; + + /* + * The test flag whether the URI is started from the path component. + */ + boolean isStartedFromPath = false; + int atColon = tmp.indexOf(':'); + int atSlash = tmp.indexOf('/'); + if ((atColon <= 0 && !tmp.startsWith("//")) + || (atSlash >= 0 && atSlash < atColon)) { + isStartedFromPath = true; + } + + /* + *

+         *     @@@@@@@@
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); + if (at == -1) { + at = 0; + } + + /* + * Parse the scheme. + *

+         *  scheme    =  $2 = http
+         *              @
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + if (at > 0 && at < length && tmp.charAt(at) == ':') { + char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); + if (validate(target, scheme)) { + _scheme = target; + } else { + throw new URIException("incorrect scheme"); + } + from = ++at; + } + + /* + * Parse the authority component. + *

+         *  authority =  $4 = jakarta.apache.org
+         *                  @@
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + // Reset flags + _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; + if (0 <= at && at < length && tmp.charAt(at) == '/') { + // Set flag + _is_hier_part = true; + if (at + 2 < length && tmp.charAt(at + 1) == '/' + && !isStartedFromPath) { + // the temporary index to start the search from + int next = indexFirstOf(tmp, "/?#", at + 2); + if (next == -1) { + next = (tmp.substring(at + 2).length() == 0) ? at + 2 + : tmp.length(); + } + parseAuthority(tmp.substring(at + 2, next), escaped); + from = at = next; + // Set flag + _is_net_path = true; + } + if (from == at) { + // Set flag + _is_abs_path = true; + } + } + + /* + * Parse the path component. + *

+         *  path      =  $5 = /ietf/uri/
+         *                                @@@@@@
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + if (from < length) { + // rel_path = rel_segment [ abs_path ] + int next = indexFirstOf(tmp, "?#", from); + if (next == -1) { + next = tmp.length(); + } + if (!_is_abs_path) { + if (!escaped + && prevalidate(tmp.substring(from, next), disallowed_rel_path) + || escaped + && validate(tmp.substring(from, next).toCharArray(), rel_path)) { + // Set flag + _is_rel_path = true; + } else if (!escaped + && prevalidate(tmp.substring(from, next), disallowed_opaque_part) + || escaped + && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { + // Set flag + _is_opaque_part = true; + } else { + // the path component may be empty + _path = null; + } + } + String s = tmp.substring(from, next); + if (escaped) { + setRawPath(s.toCharArray()); + } else { + setPath(s); + } + at = next; + } + + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + /* + * Parse the query component. + *

+         *  query     =  $7 = 
+         *                                        @@@@@@@@@
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { + int next = tmp.indexOf('#', at + 1); + if (next == -1) { + next = tmp.length(); + } + if (escaped) { + _query = tmp.substring(at + 1, next).toCharArray(); + if (!validate(_query, uric)) { + throw new URIException("Invalid query"); + } + } else { + _query = encode(tmp.substring(at + 1, next), allowed_query, charset); + } + at = next; + } + + /* + * Parse the fragment component. + *

+         *  fragment  =  $9 = Related
+         *                                                   @@@@@@@@
+         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+         * 

+ */ + if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { + if (at + 1 == length) { // empty fragment + _fragment = "".toCharArray(); + } else { + _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() + : encode(tmp.substring(at + 1), allowed_fragment, charset); + } + } + + // set this URI. + setURI(); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims) { + return indexFirstOf(s, delims, -1); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @param offset the from index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims, int offset) { + if (s == null || s.length() == 0) { + return -1; + } + if (delims == null || delims.length() == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length()) { + return -1; + } + // s is never null + int min = s.length(); + char[] delim = delims.toCharArray(); + for (int i = 0; i < delim.length; i++) { + int at = s.indexOf(delim[i], offset); + if (at >= 0 && at < min) { + min = at; + } + } + return (min == s.length()) ? -1 : min; + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @return the ealier index if there are a delimiter + */ + protected int indexFirstOf(char[] s, char delim) { + return indexFirstOf(s, delim, 0); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @param offset The offset. + * @return the ealier index if there is a delimiter + */ + protected int indexFirstOf(char[] s, char delim, int offset) { + if (s == null || s.length == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length) { + return -1; + } + for (int i = offset; i < s.length; i++) { + if (s[i] == delim) { + return i; + } + } + return -1; + } + + + /** + * Parse the authority component. + * + * @param original the original character sequence of authority component + * @param escaped true if original is escaped + * @throws URIException If an error occurs. + */ + protected void parseAuthority(String original, boolean escaped) + throws URIException { + + // Reset flags + _is_reg_name = _is_server = + _is_hostname = _is_IPv4address = _is_IPv6reference = false; + + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + boolean hasPort = true; + int from = 0; + int next = original.indexOf('@'); + if (next != -1) { // neither -1 and 0 + // each protocol extented from URI supports the specific userinfo + _userinfo = (escaped) ? original.substring(0, next).toCharArray() + : encode(original.substring(0, next), allowed_userinfo, + charset); + from = next + 1; + } + next = original.indexOf('[', from); + if (next >= from) { + next = original.indexOf(']', from); + if (next == -1) { + throw new URIException(URIException.PARSING, "IPv6reference"); + } else { + next++; + } + // In IPv6reference, '[', ']' should be excluded + _host = (escaped) ? original.substring(from, next).toCharArray() + : encode(original.substring(from, next), allowed_IPv6reference, + charset); + // Set flag + _is_IPv6reference = true; + } else { // only for !_is_IPv6reference + next = original.indexOf(':', from); + if (next == -1) { + next = original.length(); + hasPort = false; + } + // REMINDME: it doesn't need the pre-validation + _host = original.substring(from, next).toCharArray(); + if (validate(_host, IPv4address)) { + // Set flag + _is_IPv4address = true; + } else if (validate(_host, hostname)) { + // Set flag + _is_hostname = true; + } else { + // Set flag + _is_reg_name = true; + } + } + if (_is_reg_name) { + // Reset flags for a server-based naming authority + _is_server = _is_hostname = _is_IPv4address = + _is_IPv6reference = false; + // set a registry-based naming authority + if (escaped) { + _authority = original.toCharArray(); + if (!validate(_authority, reg_name)) { + throw new URIException("Invalid authority"); + } + } else { + _authority = encode(original, allowed_reg_name, charset); + } + } else { + if (original.length() - 1 > next && hasPort + && original.charAt(next) == ':') { // not empty + from = next + 1; + try { + _port = Integer.parseInt(original.substring(from)); + } catch (NumberFormatException error) { + throw new URIException(URIException.PARSING, + "invalid port number"); + } + } + // set a server-based naming authority + StringBuffer buf = new StringBuffer(); + if (_userinfo != null) { // has_userinfo + buf.append(_userinfo); + buf.append('@'); + } + if (_host != null) { + buf.append(_host); + if (_port != -1) { + buf.append(':'); + buf.append(_port); + } + } + _authority = buf.toString().toCharArray(); + // Set flag + _is_server = true; + } + } + + + /** + * Once it's parsed successfully, set this URI. + * + * @see #getRawURI + */ + protected void setURI() { + // set _uri + StringBuffer buf = new StringBuffer(); + // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + buf.append(_authority); + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + // ignore the fragment identifier + _uri = buf.toString().toCharArray(); + hash = 0; + } + + // ----------------------------------------------------------- Test methods + + + /** + * Tell whether or not this URI is absolute. + * + * @return true iif this URI is absoluteURI + */ + public boolean isAbsoluteURI() { + return (_scheme != null); + } + + + /** + * Tell whether or not this URI is relative. + * + * @return true iif this URI is relativeURI + */ + public boolean isRelativeURI() { + return (_scheme == null); + } + + + /** + * Tell whether or not the absoluteURI of this URI is hier_part. + * + * @return true iif the absoluteURI is hier_part + */ + public boolean isHierPart() { + return _is_hier_part; + } + + + /** + * Tell whether or not the absoluteURI of this URI is opaque_part. + * + * @return true iif the absoluteURI is opaque_part + */ + public boolean isOpaquePart() { + return _is_opaque_part; + } + + + /** + * Tell whether or not the relativeURI or heir_part of this URI is net_path. + * It's the same function as the has_authority() method. + * + * @return true iif the relativeURI or heir_part is net_path + * @see #hasAuthority + */ + public boolean isNetPath() { + return _is_net_path || (_authority != null); + } + + + /** + * Tell whether or not the relativeURI or hier_part of this URI is abs_path. + * + * @return true iif the relativeURI or hier_part is abs_path + */ + public boolean isAbsPath() { + return _is_abs_path; + } + + + /** + * Tell whether or not the relativeURI of this URI is rel_path. + * + * @return true iif the relativeURI is rel_path + */ + public boolean isRelPath() { + return _is_rel_path; + } + + + /** + * Tell whether or not this URI has authority. + * It's the same function as the is_net_path() method. + * + * @return true iif this URI has authority + * @see #isNetPath + */ + public boolean hasAuthority() { + return (_authority != null) || _is_net_path; + } + + /** + * Tell whether or not the authority component of this URI is reg_name. + * + * @return true iif the authority component is reg_name + */ + public boolean isRegName() { + return _is_reg_name; + } + + + /** + * Tell whether or not the authority component of this URI is server. + * + * @return true iif the authority component is server + */ + public boolean isServer() { + return _is_server; + } + + + /** + * Tell whether or not this URI has userinfo. + * + * @return true iif this URI has userinfo + */ + public boolean hasUserinfo() { + return (_userinfo != null); + } + + + /** + * Tell whether or not the host part of this URI is hostname. + * + * @return true iif the host part is hostname + */ + public boolean isHostname() { + return _is_hostname; + } + + + /** + * Tell whether or not the host part of this URI is IPv4address. + * + * @return true iif the host part is IPv4address + */ + public boolean isIPv4address() { + return _is_IPv4address; + } + + + /** + * Tell whether or not the host part of this URI is IPv6reference. + * + * @return true iif the host part is IPv6reference + */ + public boolean isIPv6reference() { + return _is_IPv6reference; + } + + + /** + * Tell whether or not this URI has query. + * + * @return true iif this URI has query + */ + public boolean hasQuery() { + return (_query != null); + } + + + /** + * Tell whether or not this URI has fragment. + * + * @return true iif this URI has fragment + */ + public boolean hasFragment() { + return (_fragment != null); + } + + + // ---------------------------------------------------------------- Charset + + + /** + * Set the default charset of the protocol. + *

+ * The character set used to store files SHALL remain a local decision and + * MAY depend on the capability of local operating systems. Prior to the + * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format + * and UTF-8 encoded. This approach, while allowing international exchange + * of URIs, will still allow backward compatibility with older systems + * because the code set positions for ASCII characters are identical to the + * one byte sequence in UTF-8. + *

+ * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + * + *

+ * Always all the time, the setter method is always succeeded and throws + * DefaultCharsetChanged exception. + * + * So API programmer must follow the following way: + *

+     *  import org.apache.util.URI$DefaultCharsetChanged;
+     *      .
+     *      .
+     *      .
+     *  try {
+     *      URI.setDefaultProtocolCharset("UTF-8");
+     *  } catch (DefaultCharsetChanged cc) {
+     *      // CASE 1: the exception could be ignored, when it is set by user
+     *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
+     *      // CASE 2: let user know the default protocol charset changed
+     *      } else {
+     *      // CASE 2: let user know the default document charset changed
+     *      }
+     *  }
+     *  
+ * + * The API programmer is responsible to set the correct charset. + * And each application should remember its own charset to support. + * + * @param charset the default charset for each protocol + * @throws DefaultCharsetChanged default charset changed + */ + public static void setDefaultProtocolCharset(String charset) + throws DefaultCharsetChanged { + + defaultProtocolCharset = charset; + throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET, + "the default protocol charset changed"); + } + + + /** + * Get the default charset of the protocol. + *

+ * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + *

+ * To work globally either requires support of a number of character sets + * and to be able to convert between them, or the use of a single preferred + * character set. + * For support of global compatibility it is STRONGLY RECOMMENDED that + * clients and servers use UTF-8 encoding when exchanging URIs. + * + * @return the default charset string + */ + public static String getDefaultProtocolCharset() { + return defaultProtocolCharset; + } + + + /** + * Get the protocol charset used by this current URI instance. + * It was set by the constructor for this instance. If it was not set by + * contructor, it will return the default protocol charset. + * + * @return the protocol charset string + * @see #getDefaultProtocolCharset + */ + public String getProtocolCharset() { + return (protocolCharset != null) + ? protocolCharset + : defaultProtocolCharset; + } + + + /** + * Set the default charset of the document. + *

+ * Notice that it will be possible to contain mixed characters (e.g. + * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional + * display of these character sets, the protocol charset could be simply + * used again. Because it's not yet implemented that the insertion of BIDI + * control characters at different points during composition is extracted. + *

+ * + * Always all the time, the setter method is always succeeded and throws + * DefaultCharsetChanged exception. + * + * So API programmer must follow the following way: + *

+     *  import org.apache.util.URI$DefaultCharsetChanged;
+     *      .
+     *      .
+     *      .
+     *  try {
+     *      URI.setDefaultDocumentCharset("EUC-KR");
+     *  } catch (DefaultCharsetChanged cc) {
+     *      // CASE 1: the exception could be ignored, when it is set by user
+     *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
+     *      // CASE 2: let user know the default document charset changed
+     *      } else {
+     *      // CASE 2: let user know the default protocol charset changed
+     *      }
+     *  }
+     *  
+ * + * The API programmer is responsible to set the correct charset. + * And each application should remember its own charset to support. + * + * @param charset the default charset for the document + * @throws DefaultCharsetChanged default charset changed + */ + public static void setDefaultDocumentCharset(String charset) + throws DefaultCharsetChanged { + + defaultDocumentCharset = charset; + throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET, + "the default document charset changed"); + } + + + /** + * Get the recommended default charset of the document. + * + * @return the default charset string + */ + public static String getDefaultDocumentCharset() { + return defaultDocumentCharset; + } + + + /** + * Get the default charset of the document by locale. + * + * @return the default charset string by locale + */ + public static String getDefaultDocumentCharsetByLocale() { + return defaultDocumentCharsetByLocale; + } + + + /** + * Get the default charset of the document by platform. + * + * @return the default charset string by platform + */ + public static String getDefaultDocumentCharsetByPlatform() { + return defaultDocumentCharsetByPlatform; + } + + // ------------------------------------------------------------- The scheme + + /** + * Get the scheme. + * + * @return the scheme + */ + public char[] getRawScheme() { + return _scheme; + } + + + /** + * Get the scheme. + * + * @return the scheme + * null if undefined scheme + */ + public String getScheme() { + return (_scheme == null) ? null : new String(_scheme); + } + + // ---------------------------------------------------------- The authority + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. + *

+     *   authority     = server | reg_name
+     * 

+ * + * @param escapedAuthority the raw escaped authority + * @throws URIException If {@link + * #parseAuthority(String,boolean)} fails + * @throws NullPointerException null authority + */ + public void setRawAuthority(char[] escapedAuthority) + throws URIException, NullPointerException { + + parseAuthority(new String(escapedAuthority), true); + setURI(); + } + + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. + * Note that there is no setAuthority method by the escape encoding reason. + * + * @param escapedAuthority the escaped authority string + * @throws URIException If {@link + * #parseAuthority(String,boolean)} fails + */ + public void setEscapedAuthority(String escapedAuthority) + throws URIException { + + parseAuthority(escapedAuthority, true); + setURI(); + } + + + /** + * Get the raw-escaped authority. + * + * @return the raw-escaped authority + */ + public char[] getRawAuthority() { + return _authority; + } + + + /** + * Get the escaped authority. + * + * @return the escaped authority + */ + public String getEscapedAuthority() { + return (_authority == null) ? null : new String(_authority); + } + + + /** + * Get the authority. + * + * @return the authority + * @throws URIException If {@link #decode} fails + */ + public String getAuthority() throws URIException { + return (_authority == null) ? null : decode(_authority, + getProtocolCharset()); + } + + // ----------------------------------------------------------- The userinfo + + /** + * Get the raw-escaped userinfo. + * + * @return the raw-escaped userinfo + * @see #getAuthority + */ + public char[] getRawUserinfo() { + return _userinfo; + } + + + /** + * Get the escaped userinfo. + * + * @return the escaped userinfo + * @see #getAuthority + */ + public String getEscapedUserinfo() { + return (_userinfo == null) ? null : new String(_userinfo); + } + + + /** + * Get the userinfo. + * + * @return the userinfo + * @throws URIException If {@link #decode} fails + * @see #getAuthority + */ + public String getUserinfo() throws URIException { + return (_userinfo == null) ? null : decode(_userinfo, + getProtocolCharset()); + } + + // --------------------------------------------------------------- The host + + /** + * Get the host. + *

+     *   host          = hostname | IPv4address | IPv6reference
+     * 

+ * + * @return the host + * @see #getAuthority + */ + public char[] getRawHost() { + return _host; + } + + + /** + * Get the host. + *

+     *   host          = hostname | IPv4address | IPv6reference
+     * 

+ * + * @return the host + * @throws URIException If {@link #decode} fails + * @see #getAuthority + */ + public String getHost() throws URIException { + if (_host != null) { + return decode(_host, getProtocolCharset()); + } else { + return null; + } + } + + // --------------------------------------------------------------- The port + + /** + * Get the port. In order to get the specfic default port, the specific + * protocol-supported class extended from the URI class should be used. + * It has the server-based naming authority. + * + * @return the port + * if -1, it has the default port for the scheme or the server-based + * naming authority is not supported in the specific URI. + */ + public int getPort() { + return _port; + } + + // --------------------------------------------------------------- The path + + /** + * Set the raw-escaped path. + * + * @param escapedPath the path character sequence + * @throws URIException encoding error or not proper for initial instance + * @see #encode + */ + public void setRawPath(char[] escapedPath) throws URIException { + if (escapedPath == null || escapedPath.length == 0) { + _path = _opaque = escapedPath; + setURI(); + return; + } + // remove the fragment identifier + escapedPath = removeFragmentIdentifier(escapedPath); + if (_is_net_path || _is_abs_path) { + if (escapedPath[0] != '/') { + throw new URIException(URIException.PARSING, + "not absolute path"); + } + if (!validate(escapedPath, abs_path)) { + throw new URIException(URIException.ESCAPING, + "escaped absolute path not valid"); + } + _path = escapedPath; + } else if (_is_rel_path) { + int at = indexFirstOf(escapedPath, '/'); + if (at == 0) { + throw new URIException(URIException.PARSING, "incorrect path"); + } + if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) + && !validate(escapedPath, at, -1, abs_path) + || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) { + + throw new URIException(URIException.ESCAPING, + "escaped relative path not valid"); + } + _path = escapedPath; + } else if (_is_opaque_part) { + if (!uric_no_slash.get(escapedPath[0]) + && !validate(escapedPath, 1, -1, uric)) { + throw new URIException(URIException.ESCAPING, + "escaped opaque part not valid"); + } + _opaque = escapedPath; + } else { + throw new URIException(URIException.PARSING, "incorrect path"); + } + setURI(); + } + + + /** + * Set the escaped path. + * + * @param escapedPath the escaped path string + * @throws URIException encoding error or not proper for initial instance + * @see #encode + */ + public void setEscapedPath(String escapedPath) throws URIException { + if (escapedPath == null) { + _path = _opaque = null; + setURI(); + return; + } + setRawPath(escapedPath.toCharArray()); + } + + + /** + * Set the path. + * + * @param path the path string + * @throws URIException set incorrectly or fragment only + * @see #encode + */ + public void setPath(String path) throws URIException { + + if (path == null || path.length() == 0) { + _path = _opaque = (path == null) ? null : path.toCharArray(); + setURI(); + return; + } + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + if (_is_net_path || _is_abs_path) { + _path = encode(path, allowed_abs_path, charset); + } else if (_is_rel_path) { + StringBuffer buff = new StringBuffer(path.length()); + int at = path.indexOf('/'); + if (at == 0) { // never 0 + throw new URIException(URIException.PARSING, + "incorrect relative path"); + } + if (at > 0) { + buff.append(encode(path.substring(0, at), allowed_rel_path, + charset)); + buff.append(encode(path.substring(at), allowed_abs_path, + charset)); + } else { + buff.append(encode(path, allowed_rel_path, charset)); + } + _path = buff.toString().toCharArray(); + } else if (_is_opaque_part) { + StringBuffer buf = new StringBuffer(); + buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset)); + buf.insert(1, encode(path.substring(1), uric, charset)); + _opaque = buf.toString().toCharArray(); + } else { + throw new URIException(URIException.PARSING, "incorrect path"); + } + setURI(); + } + + + /** + * Resolve the base and relative path. + * + * @param basePath a character array of the basePath + * @param relPath a character array of the relPath + * @return the resolved path + * @throws URIException no more higher path level to be resolved + */ + protected char[] resolvePath(char[] basePath, char[] relPath) + throws URIException { + + // REMINDME: paths are never null + String base = (basePath == null) ? "" : new String(basePath); + + // _path could be empty + if (relPath == null || relPath.length == 0) { + return normalize(basePath); + } else if (relPath[0] == '/') { + return normalize(relPath); + } else { + int at = base.lastIndexOf('/'); + if (at != -1) { + basePath = base.substring(0, at + 1).toCharArray(); + } + StringBuffer buff = new StringBuffer(base.length() + + relPath.length); + buff.append((at != -1) ? base.substring(0, at + 1) : "/"); + buff.append(relPath); + return normalize(buff.toString().toCharArray()); + } + } + + + /** + * Get the raw-escaped current hierarchy level in the given path. + * If the last namespace is a collection, the slash mark ('/') should be + * ended with at the last character of the path string. + * + * @param path the path + * @return the current hierarchy level + * @throws URIException no hierarchy level + */ + protected char[] getRawCurrentHierPath(char[] path) throws URIException { + + if (_is_opaque_part) { + throw new URIException(URIException.PARSING, "no hierarchy level"); + } + if (path == null) { + throw new URIException(URIException.PARSING, "empty path"); + } + String buff = new String(path); + int first = buff.indexOf('/'); + int last = buff.lastIndexOf('/'); + if (last == 0) { + return rootPath; + } else if (first != last && last != -1) { + return buff.substring(0, last).toCharArray(); + } + // FIXME: it could be a document on the server side + return path; + } + + + /** + * Get the raw-escaped current hierarchy level. + * + * @return the raw-escaped current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public char[] getRawCurrentHierPath() throws URIException { + return (_path == null) ? null : getRawCurrentHierPath(_path); + } + + + /** + * Get the escaped current hierarchy level. + * + * @return the escaped current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public String getEscapedCurrentHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the current hierarchy level. + * + * @return the current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + * @see #decode + */ + public String getCurrentHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public char[] getRawAboveHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : getRawCurrentHierPath(path); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public String getEscapedAboveHierPath() throws URIException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + * @see #decode + */ + public String getAboveHierPath() throws URIException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the raw-escaped path. + *

+     *   path          = [ abs_path | opaque_part ]
+     * 

+ * + * @return the raw-escaped path + */ + public char[] getRawPath() { + return _is_opaque_part ? _opaque : _path; + } + + + /** + * Get the escaped path. + *

+     *   path          = [ abs_path | opaque_part ]
+     *   abs_path      = "/"  path_segments 
+     *   opaque_part   = uric_no_slash *uric
+     * 

+ * + * @return the escaped path string + */ + public String getEscapedPath() { + char[] path = getRawPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the path. + *

+     *   path          = [ abs_path | opaque_part ]
+     * 

+ * @return the path string + * @throws URIException If {@link #decode} fails. + * @see #decode + */ + public String getPath() throws URIException { + char[] path = getRawPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the raw-escaped basename of the path. + * + * @return the raw-escaped basename + */ + public char[] getRawName() { + if (_path == null) { + return null; + } + + int at = 0; + for (int i = _path.length - 1; i >= 0; i--) { + if (_path[i] == '/') { + at = i + 1; + break; + } + } + int len = _path.length - at; + char[] basename = new char[len]; + System.arraycopy(_path, at, basename, 0, len); + return basename; + } + + + /** + * Get the escaped basename of the path. + * + * @return the escaped basename string + */ + public String getEscapedName() { + char[] basename = getRawName(); + return (basename == null) ? null : new String(basename); + } + + + /** + * Get the basename of the path. + * + * @return the basename string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getName() throws URIException { + char[] basename = getRawName(); + return (basename == null) ? null : decode(getRawName(), + getProtocolCharset()); + } + + // ----------------------------------------------------- The path and query + + /** + * Get the raw-escaped path and query. + * + * @return the raw-escaped path and query + */ + public char[] getRawPathQuery() { + + if (_path == null && _query == null) { + return null; + } + StringBuffer buff = new StringBuffer(); + if (_path != null) { + buff.append(_path); + } + if (_query != null) { + buff.append('?'); + buff.append(_query); + } + return buff.toString().toCharArray(); + } + + + /** + * Get the escaped query. + * + * @return the escaped path and query string + */ + public String getEscapedPathQuery() { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : new String(rawPathQuery); + } + + + /** + * Get the path and query. + * + * @return the path and query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getPathQuery() throws URIException { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : decode(rawPathQuery, + getProtocolCharset()); + } + + // -------------------------------------------------------------- The query + + /** + * Set the raw-escaped query. + * + * @param escapedQuery the raw-escaped query + * @throws URIException escaped query not valid + */ + public void setRawQuery(char[] escapedQuery) throws URIException { + if (escapedQuery == null || escapedQuery.length == 0) { + _query = escapedQuery; + setURI(); + return; + } + // remove the fragment identifier + escapedQuery = removeFragmentIdentifier(escapedQuery); + if (!validate(escapedQuery, query)) { + throw new URIException(URIException.ESCAPING, + "escaped query not valid"); + } + _query = escapedQuery; + setURI(); + } + + + /** + * Set the escaped query string. + * + * @param escapedQuery the escaped query string + * @throws URIException escaped query not valid + */ + public void setEscapedQuery(String escapedQuery) throws URIException { + if (escapedQuery == null) { + _query = null; + setURI(); + return; + } + setRawQuery(escapedQuery.toCharArray()); + } + + + /** + * Set the query. + *

+ * When a query string is not misunderstood the reserved special characters + * ("&", "=", "+", ",", and "$") within a query component, it is + * recommended to use in encoding the whole query with this method. + *

+ * The additional APIs for the special purpose using by the reserved + * special characters used in each protocol are implemented in each protocol + * classes inherited from URI. So refer to the same-named APIs + * implemented in each specific protocol instance. + * + * @param query the query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #encode + */ + public void setQuery(String query) throws URIException { + if (query == null || query.length() == 0) { + _query = (query == null) ? null : query.toCharArray(); + setURI(); + return; + } + setRawQuery(encode(query, allowed_query, getProtocolCharset())); + } + + + /** + * Get the raw-escaped query. + * + * @return the raw-escaped query + */ + public char[] getRawQuery() { + return _query; + } + + + /** + * Get the escaped query. + * + * @return the escaped query string + */ + public String getEscapedQuery() { + return (_query == null) ? null : new String(_query); + } + + + /** + * Get the query. + * + * @return the query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getQuery() throws URIException { + return (_query == null) ? null : decode(_query, getProtocolCharset()); + } + + // ----------------------------------------------------------- The fragment + + /** + * Set the raw-escaped fragment. + * + * @param escapedFragment the raw-escaped fragment + * @throws URIException escaped fragment not valid + */ + public void setRawFragment(char[] escapedFragment) throws URIException { + if (escapedFragment == null || escapedFragment.length == 0) { + _fragment = escapedFragment; + hash = 0; + return; + } + if (!validate(escapedFragment, fragment)) { + throw new URIException(URIException.ESCAPING, + "escaped fragment not valid"); + } + _fragment = escapedFragment; + hash = 0; + } + + + /** + * Set the escaped fragment string. + * + * @param escapedFragment the escaped fragment string + * @throws URIException escaped fragment not valid + */ + public void setEscapedFragment(String escapedFragment) throws URIException { + if (escapedFragment == null) { + _fragment = null; + hash = 0; + return; + } + setRawFragment(escapedFragment.toCharArray()); + } + + + /** + * Set the fragment. + * + * @param fragment the fragment string. + * @throws URIException If an error occurs. + */ + public void setFragment(String fragment) throws URIException { + if (fragment == null || fragment.length() == 0) { + _fragment = (fragment == null) ? null : fragment.toCharArray(); + hash = 0; + return; + } + _fragment = encode(fragment, allowed_fragment, getProtocolCharset()); + hash = 0; + } + + + /** + * Get the raw-escaped fragment. + *

+ * The optional fragment identifier is not part of a URI, but is often used + * in conjunction with a URI. + *

+ * The format and interpretation of fragment identifiers is dependent on + * the media type [RFC2046] of the retrieval result. + *

+ * A fragment identifier is only meaningful when a URI reference is + * intended for retrieval and the result of that retrieval is a document + * for which the identified fragment is consistently defined. + * + * @return the raw-escaped fragment + */ + public char[] getRawFragment() { + return _fragment; + } + + + /** + * Get the escaped fragment. + * + * @return the escaped fragment string + */ + public String getEscapedFragment() { + return (_fragment == null) ? null : new String(_fragment); + } + + + /** + * Get the fragment. + * + * @return the fragment string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getFragment() throws URIException { + return (_fragment == null) ? null : decode(_fragment, + getProtocolCharset()); + } + + // ------------------------------------------------------------- Utilities + + /** + * Remove the fragment identifier of the given component. + * + * @param component the component that a fragment may be included + * @return the component that the fragment identifier is removed + */ + protected char[] removeFragmentIdentifier(char[] component) { + if (component == null) { + return null; + } + int lastIndex = new String(component).indexOf('#'); + if (lastIndex != -1) { + component = new String(component).substring(0, + lastIndex).toCharArray(); + } + return component; + } + + + /** + * Normalize the given hier path part. + * + *

Algorithm taken from URI reference parser at + * http://www.apache.org/~fielding/uri/rev-2002/issues.html. + * + * @param path the path to normalize + * @return the normalized path + * @throws URIException no more higher path level to be normalized + */ + protected char[] normalize(char[] path) throws URIException { + + if (path == null) { + return null; + } + + String normalized = new String(path); + + // If the buffer begins with "./" or "../", the "." or ".." is removed. + if (normalized.startsWith("./")) { + normalized = normalized.substring(1); + } else if (normalized.startsWith("../")) { + normalized = normalized.substring(2); + } else if (normalized.startsWith("..")) { + normalized = normalized.substring(2); + } + + // All occurrences of "/./" in the buffer are replaced with "/" + int index = -1; + while ((index = normalized.indexOf("/./")) != -1) { + normalized = normalized.substring(0, index) + normalized.substring(index + 2); + } + + // If the buffer ends with "/.", the "." is removed. + if (normalized.endsWith("/.")) { + normalized = normalized.substring(0, normalized.length() - 1); + } + + int startIndex = 0; + + // All occurrences of "//../" in the buffer, where ".." + // and are complete path segments, are iteratively replaced + // with "/" in order from left to right until no matching pattern remains. + // If the buffer ends with "//..", that is also replaced + // with "/". Note that may be empty. + while ((index = normalized.indexOf("/../", startIndex)) != -1) { + int slashIndex = normalized.lastIndexOf('/', index - 1); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3); + } else { + startIndex = index + 3; + } + } + if (normalized.endsWith("/..")) { + int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex + 1); + } + } + + // All prefixes of "/../" in the buffer, where ".." + // and are complete path segments, are iteratively replaced + // with "/" in order from left to right until no matching pattern remains. + // If the buffer ends with "/..", that is also replaced + // with "/". Note that may be empty. + while ((index = normalized.indexOf("/../")) != -1) { + int slashIndex = normalized.lastIndexOf('/', index - 1); + if (slashIndex >= 0) { + break; + } else { + normalized = normalized.substring(index + 3); + } + } + if (normalized.endsWith("/..")) { + int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); + if (slashIndex < 0) { + normalized = "/"; + } + } + + return normalized.toCharArray(); + } + + + /** + * Normalizes the path part of this URI. Normalization is only meant to be performed on + * URIs with an absolute path. Calling this method on a relative path URI will have no + * effect. + * + * @throws URIException no more higher path level to be normalized + * + * @see #isAbsPath() + */ + public void normalize() throws URIException { + if (isAbsPath()) { + _path = normalize(_path); + setURI(); + } + } + + + /** + * Test if the first array is equal to the second array. + * + * @param first the first character array + * @param second the second character array + * @return true if they're equal + */ + protected boolean equals(char[] first, char[] second) { + + if (first == null && second == null) { + return true; + } + if (first == null || second == null) { + return false; + } + if (first.length != second.length) { + return false; + } + for (int i = 0; i < first.length; i++) { + if (first[i] != second[i]) { + return false; + } + } + return true; + } + + + /** + * Test an object if this URI is equal to another. + * + * @param obj an object to compare + * @return true if two URI objects are equal + */ + public boolean equals(Object obj) { + + // normalize and test each components + if (obj == this) { + return true; + } + if (!(obj instanceof URI)) { + return false; + } + URI another = (URI) obj; + // scheme + if (!equals(_scheme, another._scheme)) { + return false; + } + // is_opaque_part or is_hier_part? and opaque + if (!equals(_opaque, another._opaque)) { + return false; + } + // is_hier_part + // has_authority + if (!equals(_authority, another._authority)) { + return false; + } + // path + if (!equals(_path, another._path)) { + return false; + } + // has_query + if (!equals(_query, another._query)) { + return false; + } + // has_fragment? should be careful of the only fragment case. + if (!equals(_fragment, another._fragment)) { + return false; + } + return true; + } + + // ---------------------------------------------------------- Serialization + + /** + * Write the content of this URI. + * + * @param oos the object-output stream + * @throws IOException If an IO problem occurs. + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + + oos.defaultWriteObject(); + } + + + /** + * Read a URI. + * + * @param ois the object-input stream + * @throws ClassNotFoundException If one of the classes specified in the + * input stream cannot be found. + * @throws IOException If an IO problem occurs. + */ + private void readObject(ObjectInputStream ois) + throws ClassNotFoundException, IOException { + + ois.defaultReadObject(); + } + + // -------------------------------------------------------------- Hash code + + /** + * Return a hash code for this URI. + * + * @return a has code value for this URI + */ + public int hashCode() { + if (hash == 0) { + char[] c = _uri; + if (c != null) { + for (int i = 0, len = c.length; i < len; i++) { + hash = 31 * hash + c[i]; + } + } + c = _fragment; + if (c != null) { + for (int i = 0, len = c.length; i < len; i++) { + hash = 31 * hash + c[i]; + } + } + } + return hash; + } + + // ------------------------------------------------------------- Comparison + + /** + * Compare this URI to another object. + * + * @param obj the object to be compared. + * @return 0, if it's same, + * -1, if failed, first being compared with in the authority component + * @throws ClassCastException not URI argument + */ + public int compareTo(Object obj) throws ClassCastException { + + URI another = (URI) obj; + if (!equals(_authority, another.getRawAuthority())) { + return -1; + } + return toString().compareTo(another.toString()); + } + + // ------------------------------------------------------------------ Clone + + /** + * Create and return a copy of this object, the URI-reference containing + * the userinfo component. Notice that the whole URI-reference including + * the userinfo component counld not be gotten as a String. + *

+ * To copy the identical URI object including the userinfo + * component, it should be used. + * + * @return a clone of this instance + */ + public synchronized Object clone() throws CloneNotSupportedException { + + URI instance = (URI) super.clone(); + + instance._uri = _uri; + instance._scheme = _scheme; + instance._opaque = _opaque; + instance._authority = _authority; + instance._userinfo = _userinfo; + instance._host = _host; + instance._port = _port; + instance._path = _path; + instance._query = _query; + instance._fragment = _fragment; + // the charset to do escape encoding for this instance + instance.protocolCharset = protocolCharset; + // flags + instance._is_hier_part = _is_hier_part; + instance._is_opaque_part = _is_opaque_part; + instance._is_net_path = _is_net_path; + instance._is_abs_path = _is_abs_path; + instance._is_rel_path = _is_rel_path; + instance._is_reg_name = _is_reg_name; + instance._is_server = _is_server; + instance._is_hostname = _is_hostname; + instance._is_IPv4address = _is_IPv4address; + instance._is_IPv6reference = _is_IPv6reference; + + return instance; + } + + // ------------------------------------------------------------ Get the URI + + /** + * It can be gotten the URI character sequence. It's raw-escaped. + * For the purpose of the protocol to be transported, it will be useful. + *

+ * It is clearly unwise to use a URL that contains a password which is + * intended to be secret. In particular, the use of a password within + * the 'userinfo' component of a URL is strongly disrecommended except + * in those rare cases where the 'password' parameter is intended to be + * public. + *

+ * When you want to get each part of the userinfo, you need to use the + * specific methods in the specific URL. It depends on the specific URL. + * + * @return the URI character sequence + */ + public char[] getRawURI() { + return _uri; + } + + + /** + * It can be gotten the URI character sequence. It's escaped. + * For the purpose of the protocol to be transported, it will be useful. + * + * @return the escaped URI string + */ + public String getEscapedURI() { + return (_uri == null) ? null : new String(_uri); + } + + + /** + * It can be gotten the URI character sequence. + * + * @return the original URI string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getURI() throws URIException { + return (_uri == null) ? null : decode(_uri, getProtocolCharset()); + } + + + /** + * Get the URI reference character sequence. + * + * @return the URI reference character sequence + */ + public char[] getRawURIReference() { + if (_fragment == null) { + return _uri; + } + if (_uri == null) { + return _fragment; + } + // if _uri != null && _fragment != null + String uriReference = new String(_uri) + "#" + new String(_fragment); + return uriReference.toCharArray(); + } + + + /** + * Get the escaped URI reference string. + * + * @return the escaped URI reference string + */ + public String getEscapedURIReference() { + char[] uriReference = getRawURIReference(); + return (uriReference == null) ? null : new String(uriReference); + } + + + /** + * Get the original URI reference string. + * + * @return the original URI reference string + * @throws URIException If {@link #decode} fails. + */ + public String getURIReference() throws URIException { + char[] uriReference = getRawURIReference(); + return (uriReference == null) ? null : decode(uriReference, + getProtocolCharset()); + } + + + /** + * Get the escaped URI string. + *

+ * On the document, the URI-reference form is only used without the userinfo + * component like http://jakarta.apache.org/ by the security reason. + * But the URI-reference form with the userinfo component could be parsed. + *

+ * In other words, this URI and any its subclasses must not expose the + * URI-reference expression with the userinfo component like + * http://user:password@hostport/restricted_zone.
+ * It means that the API client programmer should extract each user and + * password to access manually. Probably it will be supported in the each + * subclass, however, not a whole URI-reference expression. + * + * @return the escaped URI string + * @see #clone() + */ + public String toString() { + return getEscapedURI(); + } + + + // ------------------------------------------------------------ Inner class + + /** + * The charset-changed normal operation to represent to be required to + * alert to user the fact the default charset is changed. + */ + public static class DefaultCharsetChanged extends RuntimeException { + + // ------------------------------------------------------- constructors + + /** + * The constructor with a reason string and its code arguments. + * + * @param reasonCode the reason code + * @param reason the reason + */ + public DefaultCharsetChanged(int reasonCode, String reason) { + super(reason); + this.reason = reason; + this.reasonCode = reasonCode; + } + + // ---------------------------------------------------------- constants + + /** No specified reason code. */ + public static final int UNKNOWN = 0; + + /** Protocol charset changed. */ + public static final int PROTOCOL_CHARSET = 1; + + /** Document charset changed. */ + public static final int DOCUMENT_CHARSET = 2; + + // ------------------------------------------------- instance variables + + /** The reason code. */ + private int reasonCode; + + /** The reason message. */ + private String reason; + + // ------------------------------------------------------------ methods + + /** + * Get the reason code. + * + * @return the reason code + */ + public int getReasonCode() { + return reasonCode; + } + + /** + * Get the reason message. + * + * @return the reason message + */ + public String getReason() { + return reason; + } + + } + + + /** + * A mapping to determine the (somewhat arbitrarily) preferred charset for a + * given locale. Supports all locales recognized in JDK 1.1. + *

+ * The distribution of this class is Servlets.com. It was originally + * written by Jason Hunter [jhunter at acm.org] and used by with permission. + */ + public static class LocaleToCharsetMap { + + /** A mapping of language code to charset */ + private static final Hashtable LOCALE_TO_CHARSET_MAP; + static { + LOCALE_TO_CHARSET_MAP = new Hashtable(); + LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6"); + LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7"); + LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8"); + LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS"); + LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR"); + LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9"); + LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("zh", "GB2312"); + LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5"); + } + + /** + * Get the preferred charset for the given locale. + * + * @param locale the locale + * @return the preferred charset or null if the locale is not + * recognized. + */ + public static String getCharset(Locale locale) { + // try for an full name match (may include country) + String charset = + (String) LOCALE_TO_CHARSET_MAP.get(locale.toString()); + if (charset != null) { + return charset; + } + + // if a full name didn't match, try just the language + charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage()); + return charset; // may be null + } + + } + +} + diff --git a/src/main/java/org/archive/url/URIException.java b/src/main/java/org/archive/url/URIException.java new file mode 100644 index 00000000..b32c68cf --- /dev/null +++ b/src/main/java/org/archive/url/URIException.java @@ -0,0 +1,180 @@ +/* + * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/URIException.java,v 1.12 2004/09/30 18:53:20 olegk Exp $ + * $Revision: 480424 $ + * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.url; + +import java.io.IOException; + +/** + * The URI parsing and escape encoding exception. + * + * @author Sung-Gu + * @author Oleg Kalnichevski + * @version $Revision: 480424 $ $Date: 2002/03/14 15:14:01 + */ +public class URIException extends IOException { + + // ----------------------------------------------------------- constructors + + /** + * Default constructor. + */ + public URIException() { + } + + + /** + * The constructor with a reason code argument. + * + * @param reasonCode the reason code + */ + public URIException(int reasonCode) { + this.reasonCode = reasonCode; + } + + + /** + * The constructor with a reason string and its code arguments. + * + * @param reasonCode the reason code + * @param reason the reason + */ + public URIException(int reasonCode, String reason) { + super(reason); // for backward compatibility of Throwable + this.reason = reason; + this.reasonCode = reasonCode; + } + + + /** + * The constructor with a reason string argument. + * + * @param reason the reason + */ + public URIException(String reason) { + super(reason); // for backward compatibility of Throwable + this.reason = reason; + this.reasonCode = UNKNOWN; + } + + // -------------------------------------------------------------- constants + + /** + * No specified reason code. + */ + public static final int UNKNOWN = 0; + + + /** + * The URI parsing error. + */ + public static final int PARSING = 1; + + + /** + * The unsupported character encoding. + */ + public static final int UNSUPPORTED_ENCODING = 2; + + + /** + * The URI escape encoding and decoding error. + */ + public static final int ESCAPING = 3; + + + /** + * The DNS punycode encoding or decoding error. + */ + public static final int PUNYCODE = 4; + + // ------------------------------------------------------------- properties + + /** + * The reason code. + */ + protected int reasonCode; + + + /** + * The reason message. + */ + protected String reason; + + // ---------------------------------------------------------------- methods + + /** + * Get the reason code. + * + * @return the reason code + */ + public int getReasonCode() { + return reasonCode; + } + + /** + * Set the reason code. + * + * @param reasonCode the reason code + * + * @deprecated Callers should set the reason code as a parameter to the + * constructor. + */ + public void setReasonCode(int reasonCode) { + this.reasonCode = reasonCode; + } + + + /** + * Get the reason message. + * + * @return the reason message + * + * @deprecated You should instead call {@link #getMessage()}. + */ + public String getReason() { + return reason; + } + + + /** + * Set the reason message. + * + * @param reason the reason message + * + * @deprecated Callers should instead set this via a parameter to the constructor. + */ + public void setReason(String reason) { + this.reason = reason; + } + + +} + diff --git a/src/main/java/org/archive/url/UsableURI.java b/src/main/java/org/archive/url/UsableURI.java index ed40f41a..b7d0cf71 100644 --- a/src/main/java/org/archive/url/UsableURI.java +++ b/src/main/java/org/archive/url/UsableURI.java @@ -26,14 +26,13 @@ import java.net.URI; import java.net.URISyntaxException; -import org.apache.commons.httpclient.URIException; import org.archive.util.SURT; import org.archive.util.TextUtils; /** * Usable URI. * - * This class wraps {@link org.apache.commons.httpclient.URI} adding caching + * This class wraps {@link org.archive.url.URI} adding caching * and methods. It cannot be instantiated directly. Go via UURIFactory. * *

We used to use {@link java.net.URI} for parsing URIs but ran across @@ -50,7 +49,7 @@ * @author gojomo * @author stack * - * @see org.apache.commons.httpclient.URI + * @see org.archive.url.URI */ public class UsableURI extends LaxURI implements CharSequence, Serializable { @@ -121,7 +120,6 @@ protected UsableURI() { * @param uri String representation of an absolute URI. * @param escaped If escaped. * @param charset Charset to use. - * @throws org.apache.commons.httpclient.URIException */ protected UsableURI(String uri, boolean escaped, String charset) throws URIException { @@ -132,7 +130,6 @@ protected UsableURI(String uri, boolean escaped, String charset) /** * @param relative String representation of URI. * @param base Parent UURI to use derelativizing. - * @throws org.apache.commons.httpclient.URIException */ protected UsableURI(UsableURI base, UsableURI relative) throws URIException { super(base, relative); @@ -275,7 +272,7 @@ public String toString() { /** * In the case of a puny encoded IDN, this method returns the decoded Unicode version. *

- * Most of this implementation is copied from {@link org.apache.commons.httpclient.URI#setURI()}. + * Most of this implementation is copied from {@link org.archive.url.URI#setURI()}. * * @return decoded IDN version of URI */ diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 3dfc33a7..08f18999 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -28,8 +28,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; import org.archive.util.TextUtils; /** diff --git a/src/main/java/org/archive/util/ChunkedInputStream.java b/src/main/java/org/archive/util/ChunkedInputStream.java new file mode 100644 index 00000000..69b23047 --- /dev/null +++ b/src/main/java/org/archive/util/ChunkedInputStream.java @@ -0,0 +1,324 @@ +/* + * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $ + * $Revision: 480424 $ + * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.util; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + + +/** + *

Transparently coalesces chunks of a HTTP stream that uses + * Transfer-Encoding chunked.

+ * + *

Note that this class NEVER closes the underlying stream, even when close + * gets called. Instead, it will read until the "end" of its chunking on close, + * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while + * not requiring the client to remember to read the entire contents of the + * response.

+ * + * @author Ortwin Glueck + * @author Sean C. Sullivan + * @author Martin Elwin + * @author Eric Johnson + * @author Mike Bowler + * @author Michael Becke + * @author Oleg Kalnichevski + * + * @since 2.0 + * + */ +class ChunkedInputStream extends InputStream { + /** The inputstream that we're wrapping */ + private InputStream in; + + /** The chunk size */ + private int chunkSize; + + /** The current position within the current chunk */ + private int pos; + + /** True if we'are at the beginning of stream */ + private boolean bof = true; + + /** True if we've reached the end of stream */ + private boolean eof = false; + + /** True if this stream is closed */ + private boolean closed = false; + + /** + * ChunkedInputStream constructor + * + * @param in the raw input stream + * + */ + public ChunkedInputStream(final InputStream in) { + + if (in == null) { + throw new IllegalArgumentException("InputStream parameter may not be null"); + } + this.in = in; + this.pos = 0; + } + + /** + *

Returns all the data in a chunked stream in coalesced form. A chunk + * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0 + * is detected.

+ * + *

Trailer headers are read automcatically at the end of the stream and + * can be obtained with the getResponseFooters() method.

+ * + * @return -1 of the end of the stream has been reached or the next data + * byte + * @throws IOException If an IO problem occurs + */ + public int read() throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + pos++; + return in.read(); + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @param off The offset into the byte array at which bytes will start to be + * placed. + * @param len the maximum number of bytes that can be returned. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see InputStream#read(byte[], int, int) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b, int off, int len) throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + len = Math.min(len, chunkSize - pos); + int count = in.read(b, off, len); + pos += count; + return count; + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see InputStream#read(byte[]) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Read the CRLF terminator. + * @throws IOException If an IO error occurs. + */ + private void readCRLF() throws IOException { + int cr = in.read(); + int lf = in.read(); + if ((cr != '\r') || (lf != '\n')) { + throw new IOException( + "CRLF expected at end of chunk: " + cr + "/" + lf); + } + } + + + /** + * Read the next chunk. + * @throws IOException If an IO error occurs. + */ + private void nextChunk() throws IOException { + if (!bof) { + readCRLF(); + } + chunkSize = getChunkSizeFromInputStream(in); + bof = false; + pos = 0; + if (chunkSize == 0) { + eof = true; + parseTrailerHeaders(); + } + } + + /** + * Expects the stream to start with a chunksize in hex with optional + * comments after a semicolon. The line must end with a CRLF: "a3; some + * comment\r\n" Positions the stream at the start of the next line. + * + * @param in The new input stream. + * + * @return the chunk size as integer + * + * @throws IOException when the chunk size could not be parsed + */ + private static int getChunkSizeFromInputStream(final InputStream in) + throws IOException { + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end + int state = 0; + while (state != -1) { + int b = in.read(); + if (b == -1) { + throw new IOException("chunked stream ended unexpectedly"); + } + switch (state) { + case 0: + switch (b) { + case '\r': + state = 1; + break; + case '\"': + state = 2; + /* fall through */ + default: + baos.write(b); + } + break; + + case 1: + if (b == '\n') { + state = -1; + } else { + // this was not CRLF + throw new IOException("Protocol violation: Unexpected" + + " single newline character in chunk size"); + } + break; + + case 2: + switch (b) { + case '\\': + b = in.read(); + baos.write(b); + break; + case '\"': + state = 0; + /* fall through */ + default: + baos.write(b); + } + break; + default: throw new RuntimeException("assertion failed"); + } + } + + //parse data + String dataString = baos.toString(StandardCharsets.US_ASCII.name()); + int separator = dataString.indexOf(';'); + dataString = (separator > 0) + ? dataString.substring(0, separator).trim() + : dataString.trim(); + + int result; + try { + result = Integer.parseInt(dataString.trim(), 16); + } catch (NumberFormatException e) { + throw new IOException ("Bad chunk size: " + dataString); + } + return result; + } + + /** + * Reads and stores the Trailer headers. + * @throws IOException If an IO problem occurs + */ + private void parseTrailerHeaders() throws IOException { + String charset = "US-ASCII"; + LaxHttpParser.parseHeaders(in, charset); + } + + /** + * Upon close, this reads the remainder of the chunked message, + * leaving the underlying socket at a position to start reading the + * next response without scanning. + * @throws IOException If an IO problem occurs. + */ + public void close() throws IOException { + if (!closed) { + try { + if (!eof) { + exhaustInputStream(this); + } + } finally { + eof = true; + closed = true; + } + } + } + + /** + * Exhaust an input stream, reading until EOF has been encountered. + * + *

Note that this function is intended as a non-public utility. + * This is a little weird, but it seemed silly to make a utility + * class for this one function, so instead it is just static and + * shared that way.

+ * + * @param inStream The {@link InputStream} to exhaust. + * @throws IOException If an IO problem occurs + */ + static void exhaustInputStream(InputStream inStream) throws IOException { + // read and discard the remainder of the message + byte buffer[] = new byte[1024]; + while (inStream.read(buffer) >= 0) { + ; + } + } +} diff --git a/src/main/java/org/archive/util/LaxHttpParser.java b/src/main/java/org/archive/util/LaxHttpParser.java index 9e38669b..0545fd95 100644 --- a/src/main/java/org/archive/util/LaxHttpParser.java +++ b/src/main/java/org/archive/util/LaxHttpParser.java @@ -35,13 +35,11 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; import java.util.ArrayList; +import java.util.logging.Logger; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.util.EncodingUtil; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.archive.format.http.HttpHeader; /** * A Modified version of HttpParser which doesn't throw exceptions on bad header lines @@ -57,7 +55,7 @@ public class LaxHttpParser { /** Log object for this class. */ - private static final Log LOG = LogFactory.getLog(LaxHttpParser.class); + private static final Logger LOG = Logger.getLogger(LaxHttpParser.class.getName()); /** * Constructor for LaxHttpParser. @@ -77,7 +75,7 @@ protected LaxHttpParser() { } * @return a byte array from the stream */ public static byte[] readRawLine(InputStream inputStream) throws IOException { - LOG.trace("enter LaxHttpParser.readRawLine()"); + LOG.finest("enter LaxHttpParser.readRawLine()"); ByteArrayOutputStream buf = new ByteArrayOutputStream(); int ch; @@ -108,7 +106,7 @@ public static byte[] readRawLine(InputStream inputStream) throws IOException { * @since 3.0 */ public static String readLine(InputStream inputStream, String charset) throws IOException { - LOG.trace("enter LaxHttpParser.readLine(InputStream, String)"); + LOG.finest("enter LaxHttpParser.readLine(InputStream, String)"); byte[] rawdata = readRawLine(inputStream); if (rawdata == null) { return null; @@ -126,7 +124,11 @@ public static String readLine(InputStream inputStream, String charset) throws IO } } } - return EncodingUtil.getString(rawdata, 0, len - offset, charset); + try { + return new String(rawdata, 0, len - offset, charset); + } catch (UnsupportedEncodingException e) { + return new String(rawdata, 0, len - offset); + } } /** @@ -144,7 +146,7 @@ public static String readLine(InputStream inputStream, String charset) throws IO */ public static String readLine(InputStream inputStream) throws IOException { - LOG.trace("enter LaxHttpParser.readLine(InputStream)"); + LOG.finest("enter LaxHttpParser.readLine(InputStream)"); return readLine(inputStream, "US-ASCII"); } @@ -158,14 +160,13 @@ public static String readLine(InputStream inputStream) throws IOException { * @return an array of headers in the order in which they were parsed * * @throws IOException if an IO error occurs while reading from the stream - * @throws HttpException if there is an error parsing a header value - * + * * @since 3.0 */ - public static Header[] parseHeaders(InputStream is, String charset) throws IOException, HttpException { - LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)"); + public static HttpHeader[] parseHeaders(InputStream is, String charset) throws IOException { + LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)"); - ArrayList
headers = new ArrayList
(); + ArrayList headers = new ArrayList<>(); String name = null; StringBuffer value = null; for (; ;) { @@ -188,7 +189,7 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc } else { // make sure we save the previous name,value pair if present if (name != null) { - headers.add(new Header(name, value.toString())); + headers.add(new HttpHeader(name, value.toString())); } // Otherwise we should have normal HTTP header line @@ -216,10 +217,10 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc // make sure we save the last name,value pair if present if (name != null) { - headers.add(new Header(name, value.toString())); + headers.add(new HttpHeader(name, value.toString())); } - return (Header[]) headers.toArray(new Header[headers.size()]); + return headers.toArray(new HttpHeader[0]); } /** @@ -231,12 +232,11 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc * @return an array of headers in the order in which they were parsed * * @throws IOException if an IO error occurs while reading from the stream - * @throws HttpException if there is an error parsing a header value - * + * * @deprecated use #parseHeaders(InputStream, String) */ - public static Header[] parseHeaders(InputStream is) throws IOException, HttpException { - LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)"); + public static HttpHeader[] parseHeaders(InputStream is) throws IOException { + LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)"); return parseHeaders(is, "US-ASCII"); } } diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index dff02bff..61cbf871 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -32,7 +32,6 @@ import java.util.zip.DeflaterInputStream; import java.util.zip.GZIPInputStream; -import org.apache.commons.httpclient.ChunkedInputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; diff --git a/src/main/java/org/archive/util/SURT.java b/src/main/java/org/archive/util/SURT.java index 69daf247..059b2ec6 100644 --- a/src/main/java/org/archive/util/SURT.java +++ b/src/main/java/org/archive/util/SURT.java @@ -29,7 +29,7 @@ import java.io.PrintStream; import java.util.regex.Matcher; -import org.apache.commons.httpclient.URIException; +import org.archive.url.URIException; import org.archive.url.UsableURIFactory; /** diff --git a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java index 68ee6551..69189862 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java @@ -3,7 +3,6 @@ import java.io.IOException; import org.archive.util.binsearch.SeekableLineReaderFactory; -import org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory; import org.archive.util.binsearch.impl.http.ApacheHttp43SLRFactory; import org.archive.util.binsearch.impl.http.HTTPURLConnSLRFactory; @@ -20,15 +19,13 @@ protected HTTPSeekableLineReaderFactory() public enum HttpLibs { - @Deprecated - APACHE_31, APACHE_43, URLCONN, } public static HTTPSeekableLineReaderFactory getHttpFactory() { - return getHttpFactory(HttpLibs.APACHE_31); + return getHttpFactory(HttpLibs.APACHE_43); } public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type) @@ -38,7 +35,7 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type) public static HTTPSeekableLineReaderFactory getHttpFactory(String defaultURL) { - return getHttpFactory(HttpLibs.APACHE_31, defaultURL); + return getHttpFactory(HttpLibs.APACHE_43, defaultURL); } public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String defaultURL) @@ -46,10 +43,6 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String HTTPSeekableLineReaderFactory factory = null; switch (type) { - case APACHE_31: - factory = new ApacheHttp31SLRFactory(); - break; - case URLCONN: factory = new HTTPURLConnSLRFactory(); break; @@ -60,7 +53,7 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String } if (factory == null) { - factory = new ApacheHttp31SLRFactory(); + factory = new ApacheHttp43SLRFactory(); } factory.defaultURL = defaultURL; diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java deleted file mode 100644 index 124d3d03..00000000 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java +++ /dev/null @@ -1,235 +0,0 @@ -package org.archive.util.binsearch.impl.http; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URISyntaxException; - -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.cookie.CookiePolicy; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.HeadMethod; -import org.apache.commons.io.input.CountingInputStream; -import org.archive.util.binsearch.impl.HTTPSeekableLineReader; - -/** - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class ApacheHttp31SLR extends HTTPSeekableLineReader { - - private HttpClient http; - private String url; - private long length = -1; - - protected CountingInputStream cin; - - private GetMethod activeMethod; - - public ApacheHttp31SLR(HttpClient http, String url) { - this.http = http; - this.url = url; - } - - private void acquireLength() throws URISyntaxException, HttpException, IOException { - HttpMethod head = new HeadMethod(url); - int code = http.executeMethod(head); - if(code != 200) { - throw new IOException("Unable to retrieve from " + url); - } - Header lengthHeader = head.getResponseHeader(CONTENT_LENGTH); - if(lengthHeader == null) { - throw new IOException("No Content-Length header for " + url); - } - String val = lengthHeader.getValue(); - try { - length = Long.parseLong(val); - } catch(NumberFormatException e) { - throw new IOException("Bad Content-Length value " +url+ ": " + val); - } - } - - protected String getHeader(String header) throws URISyntaxException, HttpException, IOException { - HttpMethod head = new HeadMethod(url); - int code = http.executeMethod(head); - if(code != 200) { - throw new IOException("Unable to retrieve from " + url); - } - Header theHeader = head.getResponseHeader(header); - if(theHeader == null) { - throw new IOException("No " + header + " header for " + url); - } - String val = theHeader.getValue(); - return val; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getUrl() - */ - @Override - public String getUrl() - { - return url; - } - -// public void seek(long offset, boolean gzip) throws IOException { -// is = doSeekLoad(offset, -1); -// -// if (gzip) { -// is = new GZIPMembersInputStream(is, blockSize); -// } -// } - -// public void seekWithMaxRead(long offset, boolean gzip, int maxLength) throws IOException { -// is = doSeekLoad(offset, maxLength); -// -// if (bufferFully && (maxLength > 0) && (maxLength < 1e10)) { -// try { -// byte[] buffer = new byte[maxLength]; -// ByteStreams.readFully(is, buffer); -// is.close(); -// -// // Create new stream -// is = new ByteArrayInputStream(buffer); -// } finally { -// activeMethod.releaseConnection(); -// activeMethod = null; -// } -// } -// -// if (gzip) { -// is = new GZIPMembersInputStream(is, blockSize); -// } -// } - - protected InputStream doSeekLoad(long offset, int maxLength) throws IOException { - if (activeMethod != null) { - doClose(); - } - - br = null; - - try { - - activeMethod = new GetMethod(url); - - String rangeHeader = makeRangeHeader(offset, maxLength); - - if (rangeHeader != null) { - activeMethod.setRequestHeader("Range", rangeHeader); - } - - if (this.isNoKeepAlive()) { - activeMethod.setRequestHeader("Connection", "close"); - } - - if (this.getCookie() != null) { - activeMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); - activeMethod.setRequestHeader("Cookie", this.getCookie()); - } - - int code = http.executeMethod(activeMethod); - - connectedUrl = activeMethod.getURI().toString(); - - if ((code != 206) && (code != 200)) { - throw new BadHttpStatusException(code, connectedUrl + " " + rangeHeader); - } - - InputStream is = activeMethod.getResponseBodyAsStream(); - cin = new CountingInputStream(is); - return cin; - - } catch (IOException io) { - if (saveErrHeader != null) { - errHeader = getHeaderValue(saveErrHeader); - } - - connectedUrl = activeMethod.getURI().toString(); - doClose(); - throw io; - } - } - - public GetMethod getHttpMethod() - { - return activeMethod; - } - - public void doClose() throws IOException { - - if (activeMethod == null) { - return; - } - - try { - long contentLength = activeMethod.getResponseContentLength(); - - long bytesRead = (cin != null ? cin.getByteCount() : 0); - - // If fully read, close gracefully, otherwise abort - if ((contentLength > 0) && (contentLength == bytesRead)) { -// try { -// cin.close(); -// } catch (IOException e) { -// activeMethod.abort(); -// } - } else { - activeMethod.abort(); - } - - activeMethod.releaseConnection(); - activeMethod = null; - - } finally { - if (activeMethod != null) { - activeMethod.abort(); - activeMethod.releaseConnection(); - activeMethod = null; - } - } - - cin = null; - is = null; - br = null; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getSize() - */ - @Override - public long getSize() throws IOException { - if (length < 0) { - try { - if (activeMethod != null) { - length = activeMethod.getResponseContentLength(); - } else { - acquireLength(); - } - } catch (URISyntaxException e) { - throw new IOException(e); - } - } - return length; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getHeaderValue(java.lang.String) - */ - @Override - public String getHeaderValue(String headerName) { - if (activeMethod == null) { - return null; - } - - Header header = activeMethod.getResponseHeader(headerName); - - if (header == null) { - return null; - } - - return header.getValue(); - } -} diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java deleted file mode 100644 index 2af03dab..00000000 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java +++ /dev/null @@ -1,192 +0,0 @@ -package org.archive.util.binsearch.impl.http; - -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Locale; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpConnectionManager; -import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; -import org.apache.commons.httpclient.params.HttpClientParams; -import org.archive.util.binsearch.impl.HTTPSeekableLineReader; -import org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory; - -/** - * - * @deprecated Commons HttpClient 3 is end of life, this will be removed in webarchive-commons 2.0 - */ -@Deprecated -public class ApacheHttp31SLRFactory extends HTTPSeekableLineReaderFactory { - private final static Logger LOGGER = Logger.getLogger(ApacheHttp31SLRFactory.class.getName()); - - private HttpConnectionManager connectionManager = null; - private HostConfiguration hostConfiguration = null; - private HttpClient http = null; - - public ApacheHttp31SLRFactory(String uriString) { - this(); - } - - public ApacheHttp31SLRFactory() { - connectionManager = new MultiThreadedHttpConnectionManager(); - //connectionManager = new ThreadLocalHttpConnectionManager(); - hostConfiguration = new HostConfiguration(); - HttpClientParams params = new HttpClientParams(); - http = new HttpClient(params,connectionManager); - http.setHostConfiguration(hostConfiguration); - } - - public void close() throws IOException - { - //connectionManager.deleteClosedConnections(); - connectionManager.closeIdleConnections(0); - } - - @Override - public ApacheHttp31SLR get(String url) throws IOException { - -// if (LOGGER.isLoggable(Level.FINEST)) { -// LOGGER.finest("Connections: " + connectionManager.getConnectionsInPool(hostConfiguration)); -// } - - return new ApacheHttp31SLR(http, url); - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setProxyHostPort(java.lang.String) - */ - @Override - public void setProxyHostPort(String hostPort) { - int colonIdx = hostPort.indexOf(':'); - if(colonIdx > 0) { - String host = hostPort.substring(0,colonIdx); - int port = Integer.valueOf(hostPort.substring(colonIdx+1)); - -// http.getHostConfiguration().setProxy(host, port); - hostConfiguration.setProxy(host, port); - } - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setMaxTotalConnections(int) - */ - @Override - public void setMaxTotalConnections(int maxTotalConnections) { - connectionManager.getParams(). - setMaxTotalConnections(maxTotalConnections); - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getMaxTotalConnections() - */ - @Override - public int getMaxTotalConnections() { - return connectionManager.getParams().getMaxTotalConnections(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setMaxHostConnections(int) - */ - @Override - public void setMaxHostConnections(int maxHostConnections) { - connectionManager.getParams().setDefaultMaxConnectionsPerHost(maxHostConnections); - connectionManager.getParams().setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getMaxHostConnections() - */ - @Override - public int getMaxHostConnections() { - return connectionManager.getParams(). - getMaxConnectionsPerHost(hostConfiguration); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getConnectionTimeoutMS() - */ - @Override - public int getConnectionTimeoutMS() { - return connectionManager.getParams().getConnectionTimeout(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setConnectionTimeoutMS(int) - */ - @Override - public void setConnectionTimeoutMS(int connectionTimeoutMS) { - connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); - http.getParams().setConnectionManagerTimeout(connectionTimeoutMS); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getSocketTimeoutMS() - */ - @Override - public int getSocketTimeoutMS() { - return connectionManager.getParams().getSoTimeout(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setSocketTimeoutMS(int) - */ - @Override - public void setSocketTimeoutMS(int socketTimeoutMS) { - connectionManager.getParams().setSoTimeout(socketTimeoutMS); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setStaleChecking(boolean) - */ - @Override - public void setStaleChecking(boolean enabled) - { - connectionManager.getParams().setStaleCheckingEnabled(enabled); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#isStaleChecking() - */ - @Override - public boolean isStaleChecking() - { - return connectionManager.getParams().isStaleCheckingEnabled(); - } - - // Experimental - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getModTime() - */ - @Override - public long getModTime() - { - HTTPSeekableLineReader reader = null; - SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.ENGLISH); - - try { - reader = get(); - String result = reader.getHeaderValue(HTTPSeekableLineReader.LAST_MODIFIED); - Date date = lastModFormat.parse(result); - return date.getTime(); - - } catch (Exception e) { - e.printStackTrace(); - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - - } - } - } - - return 0; - } - - @Override - public void setNumRetries(int numRetries) { - http.getParams().setParameter(HttpClientParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(numRetries, true)); - } -} diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java index 7988cb2b..005e2c49 100644 --- a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java +++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java @@ -26,7 +26,7 @@ import java.util.Map; import java.util.Set; -import org.apache.commons.httpclient.Header; +import org.archive.format.http.HttpHeader; import org.archive.io.arc.ARCRecord; import org.archive.io.warc.WARCRecord; import org.junit.jupiter.api.Test; @@ -188,12 +188,12 @@ public void testEasierParseHttpHeadersInARC() throws IOException { assertEquals(har.getHeader().getUrl(), url, "failed to retrieve Url from metadata"); } - private void assertHeaderCorrectlyParsed(Header[] headers) { + private void assertHeaderCorrectlyParsed(HttpHeader[] headers) { final List orgHeaders = Arrays.asList(HTTPHEADER.split("\r\n")); assertEquals(orgHeaders.size(), headers.length + 1, "not all HTTP header entries have been retrieved"); - for (Header header : headers) { + for (HttpHeader header : headers) { assertTrue(orgHeaders.contains(header.getName() + ": " + header.getValue())); } diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index dc000265..19b1984f 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -2,8 +2,6 @@ import java.net.URISyntaxException; -import org.apache.commons.httpclient.URIException; - import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java index ff99fe38..bc8fc3a5 100644 --- a/src/test/java/org/archive/url/URLParserTest.java +++ b/src/test/java/org/archive/url/URLParserTest.java @@ -4,8 +4,6 @@ import java.net.URISyntaxException; import java.net.URLDecoder; -import org.apache.commons.httpclient.URIException; - import com.google.common.net.InetAddresses; import org.junit.jupiter.api.Test; diff --git a/src/test/java/org/archive/url/URLRegexTransformerTest.java b/src/test/java/org/archive/url/URLRegexTransformerTest.java index 01e97aac..73c43f96 100644 --- a/src/test/java/org/archive/url/URLRegexTransformerTest.java +++ b/src/test/java/org/archive/url/URLRegexTransformerTest.java @@ -1,8 +1,6 @@ package org.archive.url; -import org.apache.commons.httpclient.URIException; - import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; diff --git a/src/test/java/org/archive/url/UsableURIFactoryTest.java b/src/test/java/org/archive/url/UsableURIFactoryTest.java index 368cc93d..85d423c0 100644 --- a/src/test/java/org/archive/url/UsableURIFactoryTest.java +++ b/src/test/java/org/archive/url/UsableURIFactoryTest.java @@ -21,7 +21,6 @@ import java.util.TreeMap; -import org.apache.commons.httpclient.URIException; import org.apache.commons.lang.SerializationUtils; import org.junit.jupiter.api.Test; diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 9a4c1860..161e215a 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -20,8 +20,6 @@ import java.net.URISyntaxException; -import org.apache.commons.httpclient.URIException; - import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; From 83648323d55f9e9df3e8ce3e83e0f26aff2ddd21 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 11:47:24 +0900 Subject: [PATCH 062/169] Remove deprecated class org.archive.io.ArchiveFileConstants --- CHANGES.md | 6 +++++ .../org/archive/io/ArchiveFileConstants.java | 24 ------------------- .../java/org/archive/io/ArchiveReader.java | 9 +++---- .../org/archive/io/ArchiveReaderFactory.java | 3 ++- .../java/org/archive/io/ArchiveRecord.java | 1 + src/main/java/org/archive/io/WriterPool.java | 3 ++- .../java/org/archive/io/WriterPoolMember.java | 3 ++- .../java/org/archive/io/warc/WARCWriter.java | 6 ++--- 8 files changed, 21 insertions(+), 34 deletions(-) delete mode 100644 src/main/java/org/archive/io/ArchiveFileConstants.java diff --git a/CHANGES.md b/CHANGES.md index 478238bf..61597616 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,12 @@ Unreleased ---------- +#### Removals + +* Removed `org.archive.io.ArchiveFileConstants` (use `org.archive.format.ArchiveFileConstants` instead) +* `ArchiveReader`, `ArchiveReaderFactory`, `WARCWriter`, `WriterPool` and `WriterPoolMember` no longer implement + `ArchiveFileConstants`. Use static imports instead. + #### Dependency upgrades - **junit**: 4.13.2 → 5.12.2 diff --git a/src/main/java/org/archive/io/ArchiveFileConstants.java b/src/main/java/org/archive/io/ArchiveFileConstants.java deleted file mode 100644 index b1a39194..00000000 --- a/src/main/java/org/archive/io/ArchiveFileConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io; - -@Deprecated -public interface ArchiveFileConstants extends org.archive.format.ArchiveFileConstants { -} diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 0038cccf..449cdc24 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -42,13 +42,15 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.ArchiveFileConstants.*; + /** * Reader for an Archive file of Archive {@link ArchiveRecord}s. * @author stack * @version $Date$ $Version$ */ -public abstract class ArchiveReader implements ArchiveFileConstants, Iterable, Closeable { +public abstract class ArchiveReader implements Iterable, Closeable { /** * Is this Archive file compressed? */ @@ -601,8 +603,7 @@ public String getStrippedFileName() { */ public static String getStrippedFileName(String name, final String dotFileExtension) { - name = stripExtension(name, - ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION); + name = stripExtension(name, DOT_COMPRESSED_FILE_EXTENSION); return stripExtension(name, dotFileExtension); } @@ -699,7 +700,7 @@ public boolean outputRecord(final String format) boolean result = true; if (format.equals(CDX)) { System.out.println(get().outputCdx(getStrippedFileName())); - } else if(format.equals(ArchiveFileConstants.DUMP)) { + } else if(format.equals(DUMP)) { // No point digesting if dumping content. setDigest(false); get().dump(); diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java index 17f14d3a..bc316893 100644 --- a/src/main/java/org/archive/io/ArchiveReaderFactory.java +++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java @@ -33,6 +33,7 @@ import org.archive.url.UsableURI; import org.archive.util.FileUtils; +import static org.archive.format.ArchiveFileConstants.*; /** * Factory that returns an Archive file Reader. @@ -40,7 +41,7 @@ * @author stack * @version $Date$ $Revision$ */ -public class ArchiveReaderFactory implements ArchiveFileConstants { +public class ArchiveReaderFactory { // Static block to enable S3 URLs static { if (System.getProperty("java.protocol.handler.pkgs") != null) { diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index 63bfe628..4bd1fa02 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -25,6 +25,7 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Level; +import org.archive.format.ArchiveFileConstants; import org.archive.util.Base32; /** diff --git a/src/main/java/org/archive/io/WriterPool.java b/src/main/java/org/archive/io/WriterPool.java index db184c5f..79da16c0 100644 --- a/src/main/java/org/archive/io/WriterPool.java +++ b/src/main/java/org/archive/io/WriterPool.java @@ -30,6 +30,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.archive.format.ArchiveFileConstants; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -215,7 +216,7 @@ public synchronized void invalidateFile(WriterPoolMember f) // gets attention. File file = f.getFile(); file.renameTo(new File(file.getAbsoluteFile() + - WriterPoolMember.INVALID_SUFFIX)); + ArchiveFileConstants.INVALID_SUFFIX)); } /** diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index e10d443b..a488354a 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -38,6 +38,7 @@ import org.archive.util.FileUtils; import org.archive.util.PropertyUtils; +import static org.archive.format.ArchiveFileConstants.*; /** @@ -48,7 +49,7 @@ * @author stack * @version $Date$ $Revision$ */ -public abstract class WriterPoolMember implements ArchiveFileConstants { +public abstract class WriterPoolMember { private final Logger logger = Logger.getLogger(this.getClass().getName()); public static final String UTF8 = "UTF-8"; diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index 982b8bc4..f1816597 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -37,7 +37,7 @@ import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; -import org.archive.io.ArchiveFileConstants; +import org.archive.format.ArchiveFileConstants; import org.archive.io.UTF8Bytes; import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; @@ -343,9 +343,9 @@ public URI writeWarcinfoRecord(String filename, final String description) recordInfo.setMimetype("application/warc-fields"); // Strip .open suffix if present. - if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { + if (filename.endsWith(ArchiveFileConstants.OCCUPIED_SUFFIX)) { filename = filename.substring(0, - filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); + filename.length() - ArchiveFileConstants.OCCUPIED_SUFFIX.length()); } recordInfo.addExtraHeader(HEADER_KEY_FILENAME, filename); if (description != null && description.length() > 0) { From a85193746ae7a61ce1228202b6ebf8727218b95b Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 12:11:24 +0900 Subject: [PATCH 063/169] Remove deprecated class org.archive.io.warc.WARCConstants --- CHANGES.md | 36 ++++++++++++++---- .../archive/io/GZIPMembersInputStream.java | 38 ------------------- src/main/java/org/archive/io/GzipHeader.java | 26 ------------- .../org/archive/io/NoGzipMagicException.java | 26 ------------- .../org/archive/io/warc/WARCConstants.java | 24 ------------ .../java/org/archive/io/warc/WARCReader.java | 4 +- .../archive/io/warc/WARCReaderFactory.java | 6 +-- .../java/org/archive/io/warc/WARCRecord.java | 6 ++- .../java/org/archive/io/warc/WARCWriter.java | 5 ++- .../org/archive/io/warc/WARCWriterTest.java | 6 ++- 10 files changed, 47 insertions(+), 130 deletions(-) delete mode 100644 src/main/java/org/archive/io/GZIPMembersInputStream.java delete mode 100644 src/main/java/org/archive/io/GzipHeader.java delete mode 100644 src/main/java/org/archive/io/NoGzipMagicException.java delete mode 100644 src/main/java/org/archive/io/warc/WARCConstants.java diff --git a/CHANGES.md b/CHANGES.md index 61597616..85f7e937 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,13 +1,35 @@ Unreleased ---------- -#### Removals - -* Removed `org.archive.io.ArchiveFileConstants` (use `org.archive.format.ArchiveFileConstants` instead) -* `ArchiveReader`, `ArchiveReaderFactory`, `WARCWriter`, `WriterPool` and `WriterPoolMember` no longer implement - `ArchiveFileConstants`. Use static imports instead. - -#### Dependency upgrades +### Removals + +#### Removed compatibility versions of moved classes + +| Removed | Replacement | +|-----------------------------------------|-----------------------------------------------| +| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | +| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | +| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | +| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | + +##### Removed usages of constant interfaces + +Static imports should be used instead. + +* `ArchiveFileConstants` is no longer implemented by: + * `ArchiveReader` + * `ArchiveReaderFactory` + * `WARCWriter` + * `WriterPool` + * `WriterPoolMember` +* `WARCConstants` is no longer implemented by: + * `WARCReader` + * `WARCReaderFactory` + * `WARCRecord` + * `WARCWriter` + +### Dependency upgrades - **junit**: 4.13.2 → 5.12.2 diff --git a/src/main/java/org/archive/io/GZIPMembersInputStream.java b/src/main/java/org/archive/io/GZIPMembersInputStream.java deleted file mode 100644 index 35fb9e90..00000000 --- a/src/main/java/org/archive/io/GZIPMembersInputStream.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -import java.io.IOException; -import java.io.InputStream; - -/** - * @deprecated use {@link org.archive.util.zip.GZIPMembersInputStream} - */ -@Deprecated -public class GZIPMembersInputStream extends org.archive.util.zip.GZIPMembersInputStream { - - public GZIPMembersInputStream(InputStream in) throws IOException { - super(in); - } - - public GZIPMembersInputStream(InputStream in, int size) throws IOException { - super(in, size); - } - -} \ No newline at end of file diff --git a/src/main/java/org/archive/io/GzipHeader.java b/src/main/java/org/archive/io/GzipHeader.java deleted file mode 100644 index 6b8263bc..00000000 --- a/src/main/java/org/archive/io/GzipHeader.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.GzipHeader} - */ -@Deprecated -public class GzipHeader extends org.archive.util.zip.GzipHeader { -} diff --git a/src/main/java/org/archive/io/NoGzipMagicException.java b/src/main/java/org/archive/io/NoGzipMagicException.java deleted file mode 100644 index 27d1058a..00000000 --- a/src/main/java/org/archive/io/NoGzipMagicException.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.NoGzipMagicException} - */ -@Deprecated -public class NoGzipMagicException extends org.archive.util.zip.NoGzipMagicException { -} diff --git a/src/main/java/org/archive/io/warc/WARCConstants.java b/src/main/java/org/archive/io/warc/WARCConstants.java deleted file mode 100644 index 83cc8a6d..00000000 --- a/src/main/java/org/archive/io/warc/WARCConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io.warc; - -@Deprecated -public interface WARCConstants extends org.archive.format.warc.WARCConstants { -} diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index a34854ef..f9b41af7 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -35,13 +35,15 @@ import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; +import static org.archive.format.warc.WARCConstants.*; + /** * WARCReader. * Go via {@link WARCReaderFactory} to get instance. * @author stack * @version $Date: 2006-11-27 18:03:03 -0800 (Mon, 27 Nov 2006) $ $Version$ */ -public class WARCReader extends ArchiveReader implements WARCConstants { +public class WARCReader extends ArchiveReader { protected WARCReader() { super(); } diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index c3e5baa0..881da869 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -30,13 +30,14 @@ import org.archive.io.ArchiveReader; import org.archive.io.ArchiveReaderFactory; import org.archive.io.ArchiveRecord; -import org.archive.io.warc.WARCConstants; import org.archive.util.ArchiveUtils; import org.archive.util.FileUtils; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.CountingInputStream; +import static org.archive.format.warc.WARCConstants.*; + /** * Factory for WARC Readers. * Figures whether to give out a compressed file Reader or an uncompressed @@ -44,8 +45,7 @@ * @author stack * @version $Date: 2006-08-23 17:59:04 -0700 (Wed, 23 Aug 2006) $ $Version$ */ -public class WARCReaderFactory extends ArchiveReaderFactory -implements WARCConstants { +public class WARCReaderFactory extends ArchiveReaderFactory { private static final WARCReaderFactory factory = new WARCReaderFactory(); /** diff --git a/src/main/java/org/archive/io/warc/WARCRecord.java b/src/main/java/org/archive/io/warc/WARCRecord.java index cf106270..21f662ea 100644 --- a/src/main/java/org/archive/io/warc/WARCRecord.java +++ b/src/main/java/org/archive/io/warc/WARCRecord.java @@ -34,13 +34,17 @@ import org.archive.io.ArchiveRecordHeader; import org.archive.util.LaxHttpParser; +import static org.archive.format.ArchiveFileConstants.ABSOLUTE_OFFSET_KEY; +import static org.archive.format.ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY; +import static org.archive.format.warc.WARCConstants.*; + /** * A WARC file Record. * * @author stack */ -public class WARCRecord extends ArchiveRecord implements WARCConstants { +public class WARCRecord extends ArchiveRecord { private Pattern WHITESPACE = Pattern.compile("\\s"); /** diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index f1816597..1e6135c8 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -43,6 +43,8 @@ import org.archive.util.ArchiveUtils; import org.archive.util.anvl.Element; +import static org.archive.format.warc.WARCConstants.*; + /** * WARC implementation. @@ -56,8 +58,7 @@ * @author stack * @version $Revision: 4604 $ $Date: 2006-09-05 22:38:18 -0700 (Tue, 05 Sep 2006) $ */ -public class WARCWriter extends WriterPoolMember -implements WARCConstants { +public class WARCWriter extends WriterPoolMember { public static final String TOTALS = "totals"; public static final String SIZE_ON_DISK = "sizeOnDisk"; public static final String TOTAL_BYTES = "totalBytes"; diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java index 1039119e..c0ace5f0 100644 --- a/src/test/java/org/archive/io/warc/WARCWriterTest.java +++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java @@ -44,12 +44,14 @@ import static org.junit.jupiter.api.Assertions.*; +import static org.archive.format.warc.WARCConstants.*; + /** * Test Writer and Reader. * @author stack * @version $Date: 2006-08-29 19:35:48 -0700 (Tue, 29 Aug 2006) $ $Version$ */ -public class WARCWriterTest implements WARCConstants { +public class WARCWriterTest { private static final AtomicInteger SERIAL_NO = new AtomicInteger(); @@ -153,7 +155,7 @@ private void writeWarcinfoRecord(WARCWriter writer) recordInfo.setContentStream(new ByteArrayInputStream(bytes)); recordInfo.setContentLength((long) bytes.length); - final URI recordid = writer.generateRecordId(WARCWriter.TYPE, WARCRecordType.warcinfo.toString()); + final URI recordid = writer.generateRecordId(TYPE, WARCRecordType.warcinfo.toString()); recordInfo.setRecordId(recordid); writer.writeRecord(recordInfo); From 9ebbfa9c2b6a18f852854d961082a959199e57d0 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 12:26:25 +0900 Subject: [PATCH 064/169] Remove deprecated methods --- CHANGES.md | 11 ++++++++++- src/main/java/org/archive/util/DevUtils.java | 9 --------- src/main/java/org/archive/util/Recorder.java | 10 ---------- src/main/java/org/archive/util/Reporter.java | 9 --------- src/main/java/org/archive/util/anvl/ANVLRecord.java | 5 ----- 5 files changed, 10 insertions(+), 34 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 85f7e937..9858d615 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,7 +3,7 @@ Unreleased ### Removals -#### Removed compatibility versions of moved classes +#### Removed deprecated versions of moved classes | Removed | Replacement | |-----------------------------------------|-----------------------------------------------| @@ -13,6 +13,15 @@ Unreleased | `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | | `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +#### Removed deprecated methods + +| Removed | Replacement | +|-----------------------------------------------|-------------------------------------------| +| `ANVLRecord(int)` | `ANVLRecord()` | +| `DevUtils.betterPrintStack(RuntimeException)` | `Throwable.printStackStrace()` | +| `Recorder.getReplayCharSequence()` | `Recorder.getContentReplayCharSequence()` | +| `Reporter.shortReportLineTo(PrintWriter)` | `Reporter.reportTo(PrintWriter)` | + ##### Removed usages of constant interfaces Static imports should be used instead. diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java index d630a0b1..f2a1d044 100644 --- a/src/main/java/org/archive/util/DevUtils.java +++ b/src/main/java/org/archive/util/DevUtils.java @@ -78,15 +78,6 @@ public static String extraInfo() { return sw.toString(); } - /** - * Nothing to see here, move along. - * @deprecated This method was never used. - */ - @Deprecated - public static void betterPrintStack(RuntimeException re) { - re.printStackTrace(System.err); - } - /** * Send this JVM process a SIGQUIT; giving a thread dump and possibly * a heap histogram (if using -XX:+PrintClassHistogram). diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index 61cbf871..e67cfb48 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -351,16 +351,6 @@ public void setContentEncoding(String contentEncoding) { public String getContentEncoding() { return this.contentEncoding; } - - - /** - * @return - * @throws IOException - * @deprecated use getContentReplayCharSequence - */ - public ReplayCharSequence getReplayCharSequence() throws IOException { - return getContentReplayCharSequence(); - } /** * @return A ReplayCharSequence. Caller may call diff --git a/src/main/java/org/archive/util/Reporter.java b/src/main/java/org/archive/util/Reporter.java index 3f4ea5e5..dd21b53d 100644 --- a/src/main/java/org/archive/util/Reporter.java +++ b/src/main/java/org/archive/util/Reporter.java @@ -32,15 +32,6 @@ public interface Reporter { */ public void reportTo(PrintWriter writer) throws IOException; - /** - * Write a short single-line summary report - * - * @param pw writer to receive report - */ - @Deprecated - public void shortReportLineTo(PrintWriter pw) throws IOException; - - /** * @return Same data that's in the single line report, as key-value pairs */ diff --git a/src/main/java/org/archive/util/anvl/ANVLRecord.java b/src/main/java/org/archive/util/anvl/ANVLRecord.java index 06603914..e548f432 100644 --- a/src/main/java/org/archive/util/anvl/ANVLRecord.java +++ b/src/main/java/org/archive/util/anvl/ANVLRecord.java @@ -72,11 +72,6 @@ public ANVLRecord(Collection c) { super(c); } - /** @deprecated */ - public ANVLRecord(int initialCapacity) { - super(); - } - public boolean addLabel(final String l) { return super.add(new Element(new Label(l))); } From 21a91008501c28f24cb4644d8f0bb673c8fc695b Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 12:35:04 +0900 Subject: [PATCH 065/169] Remove deprecated class org.archive.io.arc.ARCConstants --- CHANGES.md | 8 +++++ .../org/archive/io/HeaderedArchiveRecord.java | 2 +- .../java/org/archive/io/arc/ARCConstants.java | 29 ------------------- .../java/org/archive/io/arc/ARCReader.java | 3 +- .../org/archive/io/arc/ARCReaderFactory.java | 4 +-- .../java/org/archive/io/arc/ARCRecord.java | 9 +++--- .../org/archive/io/arc/ARCRecordMetaData.java | 3 +- .../java/org/archive/io/arc/ARCUtils.java | 4 ++- .../java/org/archive/io/arc/ARCWriter.java | 3 +- .../org/archive/io/arc/ARCWriterPoolTest.java | 4 +-- .../org/archive/io/arc/ARCWriterTest.java | 3 +- 11 files changed, 29 insertions(+), 43 deletions(-) delete mode 100644 src/main/java/org/archive/io/arc/ARCConstants.java diff --git a/CHANGES.md b/CHANGES.md index 9858d615..d43a75e0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,7 @@ Unreleased | `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | | `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | | `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | | `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | #### Removed deprecated methods @@ -32,6 +33,13 @@ Static imports should be used instead. * `WARCWriter` * `WriterPool` * `WriterPoolMember` +* `ARCConstants` is no longer implemented by: + * `ARCReader` + * `ARCReaderFactory` + * `ARCRecord` + * `ARCRecordMetaData` + * `ARCUtils` + * `ARCWriter` * `WARCConstants` is no longer implemented by: * `WARCReader` * `WARCReaderFactory` diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index ac4b82f6..809a9e54 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -27,7 +27,7 @@ import java.io.PrintStream; import org.archive.format.http.HttpHeader; -import org.archive.io.arc.ARCConstants; +import org.archive.format.arc.ARCConstants; import org.archive.util.LaxHttpParser; /** diff --git a/src/main/java/org/archive/io/arc/ARCConstants.java b/src/main/java/org/archive/io/arc/ARCConstants.java deleted file mode 100644 index c44cfef7..00000000 --- a/src/main/java/org/archive/io/arc/ARCConstants.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io.arc; - - -/** - * Constants used by ARC files and in ARC file processing. - * - * @author stack - * @deprecated - */ -public interface ARCConstants extends org.archive.format.arc.ARCConstants { -} diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java index 7f85cc2a..c9a88415 100644 --- a/src/main/java/org/archive/io/arc/ARCReader.java +++ b/src/main/java/org/archive/io/arc/ARCReader.java @@ -43,6 +43,7 @@ import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Get an iterator on an ARC file or get a record by absolute position. @@ -66,7 +67,7 @@ * @version $Date$ $Revision$ */ public abstract class ARCReader extends ArchiveReader -implements ARCConstants, Closeable { +implements Closeable { private final Logger logger = Logger.getLogger(ARCReader.class.getName()); /** diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index 44437ed7..d2f10842 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -40,6 +40,7 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.arc.ARCConstants.*; /** * Factory that returns an ARCReader. @@ -48,8 +49,7 @@ * * @author stack */ -public class ARCReaderFactory extends ArchiveReaderFactory -implements ARCConstants { +public class ARCReaderFactory extends ArchiveReaderFactory { /** * This factory instance. */ diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index d3c036ba..dafc63b6 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -42,12 +42,14 @@ import org.archive.util.LaxHttpParser; import org.archive.util.TextUtils; +import static org.archive.format.arc.ARCConstants.*; + /** * An ARC file record. * Does not compass the ARCRecord metadata line, just the record content. * @author stack */ -public class ARCRecord extends ArchiveRecord implements ARCConstants { +public class ARCRecord extends ArchiveRecord { /** * Http status code. * @@ -590,7 +592,7 @@ private InputStream readHttpHeader() throws IOException { } statusLine = new String(statusBytes, 0, - statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); + statusBytes.length - eolCharCount, DEFAULT_ENCODING); // If a null or DELETED break immediately if ((statusLine == null) || statusLine.startsWith("DELETED")) { @@ -681,8 +683,7 @@ private InputStream readHttpHeader() throws IOException { // Read the status line. Don't let it into the parseHeaders function. // It doesn't know what to do with it. bais.read(statusBytes, 0, statusBytes.length); - this.httpHeaders = LaxHttpParser.parseHeaders(bais, - ARCConstants.DEFAULT_ENCODING); + this.httpHeaders = LaxHttpParser.parseHeaders(bais, DEFAULT_ENCODING); this.getMetaData().setStatusCode(Integer.toString(getStatusCode())); bais.reset(); return bais; diff --git a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java index 02b368e4..2a187477 100644 --- a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java +++ b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java @@ -27,13 +27,14 @@ import org.archive.io.ArchiveRecordHeader; +import static org.archive.format.arc.ARCConstants.*; /** * An immutable class to hold an ARC record meta data. * * @author stack */ -public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants { +public class ARCRecordMetaData implements ArchiveRecordHeader { /** * Map of record header fields. * diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java index 985457e2..5bcb4cc3 100644 --- a/src/main/java/org/archive/io/arc/ARCUtils.java +++ b/src/main/java/org/archive/io/arc/ARCUtils.java @@ -32,7 +32,9 @@ import org.archive.util.zip.GzipHeader; import org.archive.util.zip.NoGzipMagicException; -public class ARCUtils implements ARCConstants { +import static org.archive.format.arc.ARCConstants.*; + +public class ARCUtils { /** * @param pathOrUri Path or URI to extract arc filename from. * @return Extracted arc file name. diff --git a/src/main/java/org/archive/io/arc/ARCWriter.java b/src/main/java/org/archive/io/arc/ARCWriter.java index c7042943..82d13e9f 100644 --- a/src/main/java/org/archive/io/arc/ARCWriter.java +++ b/src/main/java/org/archive/io/arc/ARCWriter.java @@ -42,6 +42,7 @@ import org.archive.util.DevUtils; import org.archive.util.MimetypeUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Write ARC files. @@ -110,7 +111,7 @@ * * @author stack */ -public class ARCWriter extends WriterPoolMember implements ARCConstants, Closeable { +public class ARCWriter extends WriterPoolMember implements Closeable { private static final Logger logger = Logger.getLogger(ARCWriter.class.getName()); diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java index 07548b4c..954da636 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java @@ -31,7 +31,7 @@ import org.junit.jupiter.api.io.TempDir; import static org.junit.jupiter.api.Assertions.assertEquals; - +import static org.archive.format.arc.ARCConstants.*; /** * Test ARCWriterPool @@ -119,7 +119,7 @@ private WriterPoolSettings getSettings(final boolean isCompressed) { return new WriterPoolSettingsData( "TEST", "${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}", - ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE, + DEFAULT_MAX_ARC_FILE_SIZE, isCompressed, Arrays.asList(files), null); diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java index 84539391..ca300697 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -49,6 +49,7 @@ import static org.junit.jupiter.api.Assertions.*; +import static org.archive.format.arc.ARCConstants.*; /** * Test ARCWriter class. @@ -58,7 +59,7 @@ * * @author stack */ -public class ARCWriterTest implements ARCConstants { +public class ARCWriterTest { /** * Utility class for writing bad ARCs (with trailing junk) */ From b44924ca29de25e42c5eb9f944811fee02037255 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 13:02:04 +0900 Subject: [PATCH 066/169] Document HttpClient 3 removal in CHANGES.md --- CHANGES.md | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d43a75e0..20314902 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,18 +1,32 @@ -Unreleased ----------- +2.0.0 +----- ### Removals +#### Removed Apache HttpClient 3.1 + +`HTTPSeekableLineReaderFactory` and `ZipNumBlockLoader` now default to HttpClient 4.3. + +| Removed | Replacement | +|-----------------------------------------------------------|--------------------------------------| +| `org.apache.commons.httpclient.URIException` | `org.archive.url.URIException` | +| `org.apache.commons.httpclient.Header` | `org.archive.format.http.HttpHeader` | +| `org.archive.httpclient.HttpRecorderGetMethod` | | +| `org.archive.httpclient.HttpRecorderMethod` | | +| `org.archive.httpclient.HttpRecorderPostMethod` | | +| `org.archive.httpclient.SingleHttpConnectionManager` | | +| `org.archive.httpclient.ThreadLocalHttpConnectionManager` | | + #### Removed deprecated versions of moved classes -| Removed | Replacement | -|-----------------------------------------|-----------------------------------------------| -| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | -| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | -| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | -| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | -| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | -| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +| Removed | Replacement | +|----------------------------------------------|-----------------------------------------------| +| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | +| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | +| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | +| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | +| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | #### Removed deprecated methods From ba22f96e4349dcacde1775f76b6665067f07e96f Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 13:04:53 +0900 Subject: [PATCH 067/169] Upgrade dependencies for 2.0.0 --- CHANGES.md | 3 +++ pom.xml | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 20314902..d508ce06 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -62,6 +62,9 @@ Static imports should be used instead. ### Dependency upgrades +- **commons-io**: 2.18.0 → 2.19.0 +- **guava**: 33.3.1-jre → 33.4.8-jre +- **json**: 20240303 → 20250517 - **junit**: 4.13.2 → 5.12.2 1.3.0 diff --git a/pom.xml b/pom.xml index 81bd9b32..60068767 100644 --- a/pom.xml +++ b/pom.xml @@ -61,13 +61,13 @@ com.google.guava guava - 33.3.1-jre + 33.4.8-jre org.json json - 20240303 + 20250517 org.htmlparser @@ -141,7 +141,7 @@ commons-io commons-io - 2.18.0 + 2.19.0 From c3299bb0442dd27aab79ce53da6e73ff9f08a107 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 13:23:37 +0900 Subject: [PATCH 068/169] Bump maven-compiler-plugin to 3.14.0 --- pom.xml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 60068767..22f83428 100644 --- a/pom.xml +++ b/pom.xml @@ -162,7 +162,7 @@ org.apache.maven.plugins maven-compiler-plugin - 2.3.2 + 3.14.0 8 8 @@ -265,6 +265,15 @@ + + jdk9-plus + + [9,) + + + 8 + + From 53f700903a04f2421613dbdc2a295cf9a35178a7 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 13:29:08 +0900 Subject: [PATCH 069/169] Remove deprecated URL canonicalizer classes --- CHANGES.md | 23 +++++++++++-------- .../url/DefaultIACanonicalizerRules.java | 7 ------ .../url/DefaultIAURLCanonicalizer.java | 7 ------ .../archive/url/GoogleURLCanonicalizer.java | 7 ------ .../url/NonMassagingIAURLCanonicalizer.java | 8 +++---- .../org/archive/url/WaybackURLKeyMaker.java | 2 +- .../archive/url/IAURLCanonicalizerTest.java | 4 ++-- 7 files changed, 20 insertions(+), 38 deletions(-) delete mode 100644 src/main/java/org/archive/url/DefaultIACanonicalizerRules.java delete mode 100644 src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java delete mode 100644 src/main/java/org/archive/url/GoogleURLCanonicalizer.java diff --git a/CHANGES.md b/CHANGES.md index d508ce06..19c26b2f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,16 +17,19 @@ | `org.archive.httpclient.SingleHttpConnectionManager` | | | `org.archive.httpclient.ThreadLocalHttpConnectionManager` | | -#### Removed deprecated versions of moved classes - -| Removed | Replacement | -|----------------------------------------------|-----------------------------------------------| -| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | -| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | -| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | -| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | -| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | -| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +#### Removed deprecated versions of renamed classes + +| Removed | Replacement | +|-----------------------------------------------|--------------------------------------------------| +| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | +| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | +| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | +| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | +| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +| `org.archive.url.DefaultIACanonicalizerRules` | `org.archive.url.AggressiveIACanonicalizerRules` | +| `org.archive.url.DefaultIAURLCanonicalizer` | `org.archive.url.AggressiveIAURLCanonicalizer` | +| `org.archive.url.GoogleURLCanonicalizer` | `org.archive.url.BasicURLCanonicalizer` | #### Removed deprecated methods diff --git a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java b/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java deleted file mode 100644 index 3d4d8581..00000000 --- a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIACanonicalizerRules - */ -public class DefaultIACanonicalizerRules extends AggressiveIACanonicalizerRules { -} diff --git a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java b/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java deleted file mode 100644 index 3d1f985d..00000000 --- a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIAURLCanonicalizer - */ -public class DefaultIAURLCanonicalizer extends AggressiveIAURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java b/src/main/java/org/archive/url/GoogleURLCanonicalizer.java deleted file mode 100644 index 388db8aa..00000000 --- a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use {@link BasicURLCanonicalizer} - */ -public class GoogleURLCanonicalizer extends BasicURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java index cd579eb0..830b7b92 100644 --- a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java +++ b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java @@ -1,10 +1,10 @@ package org.archive.url; public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { - private static final GoogleURLCanonicalizer google = - new GoogleURLCanonicalizer(); + private static final BasicURLCanonicalizer basic = + new BasicURLCanonicalizer(); private static CanonicalizeRules nonMassagingRules = - new DefaultIACanonicalizerRules(); + new AggressiveIACanonicalizerRules(); static { nonMassagingRules.setRule(CanonicalizeRules.HOST_SETTINGS, CanonicalizeRules.HOST_LOWERCASE); @@ -14,7 +14,7 @@ public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: - google.canonicalize(url); + basic.canonicalize(url); ia.canonicalize(url); } } diff --git a/src/main/java/org/archive/url/WaybackURLKeyMaker.java b/src/main/java/org/archive/url/WaybackURLKeyMaker.java index 99fb92e9..56f51b49 100644 --- a/src/main/java/org/archive/url/WaybackURLKeyMaker.java +++ b/src/main/java/org/archive/url/WaybackURLKeyMaker.java @@ -5,7 +5,7 @@ public class WaybackURLKeyMaker implements URLKeyMaker { // URLCanonicalizer canonicalizer = new NonMassagingIAURLCanonicalizer(); - URLCanonicalizer canonicalizer = new DefaultIAURLCanonicalizer(); + URLCanonicalizer canonicalizer = new AggressiveIAURLCanonicalizer(); public URLCanonicalizer getCanonicalizer() { return canonicalizer; diff --git a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java index 974bdd22..aecddb3b 100644 --- a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java @@ -10,7 +10,7 @@ public class IAURLCanonicalizerTest { @Test public void testFull() throws URISyntaxException { - IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); compCan(iaC,"http://www.archive.org:80/","http://archive.org/"); compCan(iaC,"https://www.archive.org:80/","https://archive.org:80/"); compCan(iaC,"http://www.archive.org:443/","http://archive.org:443/"); @@ -63,7 +63,7 @@ public void testGetDefaultPort() { @Test public void testStripSessionId() throws URISyntaxException { - IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); compCan(iaC, "http://www.nsf.gov/statistics/sed/2009/SED_2009.zip?CFID=14387305&CFTOKEN=72942008&jsessionid=f030eacc7e49c4ca0b077922347418418766", "http://nsf.gov/statistics/sed/2009/sed_2009.zip?jsessionid=f030eacc7e49c4ca0b077922347418418766"); From cc85f0520caf5ab5759cca94f40de9f763231c81 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 20 May 2025 15:52:02 +0900 Subject: [PATCH 070/169] Add RecordingInputStream.asOutputStream() Using RecordingInputStream requires an awkward workaround when the API being recorded is not in the form of an InputStream, for example, if it's asynchronous. This adds a method to access the underlying RecordingOutputStream so you can write to it directly when that would be easier. --- CHANGES.md | 4 ++++ .../java/org/archive/io/RecordingInputStream.java | 13 +++++++++++-- .../org/archive/io/RecordingInputStreamTest.java | 13 ++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 19c26b2f..6d9c5880 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,10 @@ 2.0.0 ----- +### New features + +- Added `RecordingInputStream.asOutputStream()` for direct writing of recorded data without an input stream. [#108](https://github.com/iipc/webarchive-commons/pull/108) + ### Removals #### Removed Apache HttpClient 3.1 diff --git a/src/main/java/org/archive/io/RecordingInputStream.java b/src/main/java/org/archive/io/RecordingInputStream.java index 95419280..3c9db61f 100644 --- a/src/main/java/org/archive/io/RecordingInputStream.java +++ b/src/main/java/org/archive/io/RecordingInputStream.java @@ -383,12 +383,12 @@ public synchronized void mark(int readlimit) { @Override public boolean markSupported() { - return this.in.markSupported(); + return in != null && this.in.markSupported(); } @Override public synchronized void reset() throws IOException { - this.in.reset(); + if (in != null) this.in.reset(); this.recordingOutputStream.reset(); } @@ -418,4 +418,13 @@ public void chopAtMessageBodyBegin() { public void clearForReuse() throws IOException { recordingOutputStream.clearForReuse(); } + + /** + * Returns an OutputStream that can be used for recording input data. This is useful if the input comes in some + * form other than an InputStream. For example, if the input is provided by a callback periodically called with + * a chunk of data. + */ + public RecordingOutputStream asOutputStream() { + return this.recordingOutputStream; + } } diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java index 9ddc7457..49160aa3 100644 --- a/src/test/java/org/archive/io/RecordingInputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -41,7 +41,6 @@ public class RecordingInputStreamTest { @TempDir File tempDir; - /** * Test readFullyOrUntil soft (no exception) and hard (exception) * length cutoffs, timeout, and rate-throttling. @@ -128,4 +127,16 @@ public void run() { }.start(); } + + @Test + public void testAsOutputStream() throws IOException { + RecordingInputStream ris = new RecordingInputStream(16384, (new File( + tempDir, "testAsOutputStream").getAbsolutePath())); + ris.open(null); + ris.asOutputStream().write("hello".getBytes()); + ris.close(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ris.getReplayInputStream().readFullyTo(baos); + assertEquals("hello", baos.toString()); + } } From 76fb20f071728f43286957dcb8abe3f2b5f26134 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 13:57:53 +0900 Subject: [PATCH 071/169] Fix javadoc errors --- src/main/java/org/archive/url/URI.java | 16 ++++++++-------- src/main/java/org/archive/url/URIException.java | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java index e420ca51..374e0574 100644 --- a/src/main/java/org/archive/url/URI.java +++ b/src/main/java/org/archive/url/URI.java @@ -68,7 +68,7 @@ * URI Syntactic Components *

  * - In general, written as follows:
- *   Absolute URI = <scheme>:<scheme-specific-part>
+ *   Absolute URI = <scheme>:<scheme-specific-part>
  *   Generic URI = <scheme>://<authority><path>?<query>
  *
  * - Syntax
@@ -103,7 +103,7 @@
  *  - char[] getRawXxx() // method
  *  - String getEscapedXxx() // method
  *  - String toString() // method
- * 

+ * * For unescaped URI forms * - URI(String) // constructor * - String getXXX() // method @@ -1873,8 +1873,8 @@ protected boolean validate(char[] component, int soffset, int eoffset, * $3 = //jakarta.apache.org * authority = $4 = jakarta.apache.org * path = $5 = /ietf/uri/ - * $6 = - * query = $7 = + * $6 = [undefined] + * query = $7 = [undefined] * $8 = #Related * fragment = $9 = Related *

@@ -2502,7 +2502,7 @@ public boolean hasFragment() { * DefaultCharsetChanged exception. * * So API programmer must follow the following way: - *

+     * 
      *  import org.apache.util.URI$DefaultCharsetChanged;
      *      .
      *      .
@@ -2517,7 +2517,7 @@ public boolean hasFragment() {
      *      // CASE 2: let user know the default document charset changed
      *      }
      *  }
-     *  
+ *
* * The API programmer is responsible to set the correct charset. * And each application should remember its own charset to support. @@ -2582,7 +2582,7 @@ public String getProtocolCharset() { * DefaultCharsetChanged exception. * * So API programmer must follow the following way: - *
+     * 
      *  import org.apache.util.URI$DefaultCharsetChanged;
      *      .
      *      .
@@ -2597,7 +2597,7 @@ public String getProtocolCharset() {
      *      // CASE 2: let user know the default protocol charset changed
      *      }
      *  }
-     *  
+ *
* * The API programmer is responsible to set the correct charset. * And each application should remember its own charset to support. diff --git a/src/main/java/org/archive/url/URIException.java b/src/main/java/org/archive/url/URIException.java index b32c68cf..49fa2cb5 100644 --- a/src/main/java/org/archive/url/URIException.java +++ b/src/main/java/org/archive/url/URIException.java @@ -35,7 +35,7 @@ /** * The URI parsing and escape encoding exception. * - * @author Sung-Gu + * @author Sung-Gu * @author Oleg Kalnichevski * @version $Revision: 480424 $ $Date: 2002/03/14 15:14:01 */ From 5c42251609b51692ff5ea6634269cf8df852d499 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 14:35:04 +0900 Subject: [PATCH 072/169] [maven-release-plugin] prepare release webarchive-commons-2.0.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 22f83428..87270b56 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 1.3.1-SNAPSHOT + 2.0.0 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - HEAD + webarchive-commons-2.0.0 From aafab5050c4a90e8ee6cfe11d64f8bea65a72bc0 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 14:35:10 +0900 Subject: [PATCH 073/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 87270b56..dcd6dd52 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.0 + 2.0.1-SNAPSHOT jar webarchive-commons From 0e659735397d9dbfa396ffda04da50335d1c3c04 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 14:40:11 +0900 Subject: [PATCH 074/169] Update plugin versions --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index dcd6dd52..4cb75f14 100644 --- a/pom.xml +++ b/pom.xml @@ -214,7 +214,7 @@ org.sonatype.plugins nexus-staging-maven-plugin - 1.6.7 + 1.7.0 true ossrh @@ -225,7 +225,7 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 + 3.3.1 attach-sources @@ -238,7 +238,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 + 3.11.2 attach-javadocs @@ -251,7 +251,7 @@ org.apache.maven.plugins maven-gpg-plugin - 1.5 + 3.2.7 sign-artifacts From cf21eb2be94d143c0e367aafe0e11a31cb6a5035 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 15:06:13 +0900 Subject: [PATCH 075/169] Limit permissions on CI action --- .github/workflows/maven.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 60fac096..d5dc5ea5 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -1,5 +1,8 @@ name: Java CI with Maven +permissions: + contents: read + on: push: branches: [ "master" ] From e3f06824297ed6c4a86dc7474d5690baa4d86997 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 16:16:53 +0900 Subject: [PATCH 076/169] CI: Remove dependency graph step We'll do this in settings instead. --- .github/workflows/maven.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index d5dc5ea5..2caac444 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,9 +34,4 @@ jobs: restore-keys: | ${{ runner.os }}-maven- - name: Build with Maven - run: mvn -B package --file pom.xml - - # Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive - - name: Update dependency graph - if: ${{ github.event_name == 'push' }} - uses: advanced-security/maven-dependency-submission-action@v4.1.1 \ No newline at end of file + run: mvn -B package --file pom.xml \ No newline at end of file From 840ae37426d08946dfaca4aeaa431f2b36ae3a6a Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 16:38:21 +0900 Subject: [PATCH 077/169] Re-add and undeprecate Reporter.shortReportLineTo(PrintWriter) Turns out this is used quite a bit. --- src/main/java/org/archive/util/Reporter.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/archive/util/Reporter.java b/src/main/java/org/archive/util/Reporter.java index dd21b53d..8da38afa 100644 --- a/src/main/java/org/archive/util/Reporter.java +++ b/src/main/java/org/archive/util/Reporter.java @@ -31,7 +31,14 @@ public interface Reporter { * @param writer to receive report */ public void reportTo(PrintWriter writer) throws IOException; - + + /** + * Write a short single-line summary report + * + * @param pw writer to receive report + */ + public void shortReportLineTo(PrintWriter pw) throws IOException; + /** * @return Same data that's in the single line report, as key-value pairs */ From 511a9da6d9077ffd437be46615aa1fe68a20a3b6 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 16:51:33 +0900 Subject: [PATCH 078/169] Update CHANGES.md for 2.0.1 --- CHANGES.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 6d9c5880..a6c131a5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +2.0.1 +----- + +### Changes + +* Re-added `Reporter.shortReportLineTo(PrintWriter)` as it turned out to be important to Heritrix. + + 2.0.0 ----- From 37dee9683f521d3ff3b7704edd762dbab8c81504 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 16:52:04 +0900 Subject: [PATCH 079/169] [maven-release-plugin] prepare release webarchive-commons-2.0.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 4cb75f14..4fde2720 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.1-SNAPSHOT + 2.0.1 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-2.0.1 From 66883375417d8a3705ff450135a22ab305790ae0 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Wed, 21 May 2025 16:52:09 +0900 Subject: [PATCH 080/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 4fde2720..e03dd34d 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.1 + 2.0.2-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.1 + webarchive-commons-2.0.0 From c28cb73c9be695e95e4d06bcb002230c92adb5ee Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Mon, 14 Jul 2025 17:34:02 -0700 Subject: [PATCH 081/169] feat: handle unicode, handle unsorted input edge cases, namespace public suffixes file to avoid collisions with other jars --- .../java/org/archive/net/PublicSuffixes.java | 30 ++-- .../{ => org/archive}/effective_tld_names.dat | 0 .../org/archive/net/PublicSuffixesTest.java | 136 ++++++++++++++---- 3 files changed, 127 insertions(+), 39 deletions(-) rename src/main/resources/{ => org/archive}/effective_tld_names.dat (100%) diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java index af024949..e436b8dc 100644 --- a/src/main/java/org/archive/net/PublicSuffixes.java +++ b/src/main/java/org/archive/net/PublicSuffixes.java @@ -121,6 +121,7 @@ public boolean add(CharSequence s) { i++; // zero-length match holds only when both cs and s are empty. if (i == 0) return cs.length() == 0 && s.length() == 0; + // cs is longer than s, so we need to replace cs with a prefix, and add a branch if (i < cs.length()) { CharSequence cs0 = cs.subSequence(0, i); CharSequence cs1 = cs.subSequence(i, cs.length()); @@ -128,10 +129,21 @@ public boolean add(CharSequence s) { cs = cs0; Node alt1 = new Node(cs1, branches); (branches = new ArrayList()).add(alt1); - addBranch(cs2); + if(cs2.length() == 0) { + // if cs2 is empty, we have a terminal node. + branches.add(new Node("", null)); + } else { + // otherwise, we have a new branch. + addBranch(cs2); + } + } else { - assert i == cs.length(); - addBranch(s.subSequence(i, s.length())); + // s is longer than cs, so we need to add a branch + if(i != s.length()) { + // but not if they are equal. + assert i == cs.length(); + addBranch(s.subSequence(i, s.length())); + } } return true; } @@ -172,8 +184,8 @@ public static void main(String args[]) throws IOException { InputStream is; if (args.length == 0 || "=".equals(args[0])) { // use bundled list - is = PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"); + is = PublicSuffixes.class.getResourceAsStream( + "/org/archive/effective_tld_names.dat"); } else { is = new FileInputStream(args[0]); } @@ -265,7 +277,7 @@ protected static void buildRegex(Node alt, StringBuilder sb) { sb.append("(?="); close = ")"; } else if (c == '*') { - sb.append("[-\\w]+"); + sb.append("[-\\w\\u00C0-\\u017F]+"); } else { sb.append(c); } @@ -304,7 +316,7 @@ private static String surtPrefixRegexFromTrie(Node trie) { regex.append("(?ix)^\n"); trie.addBranch("*,"); // for new/unknown TLDs buildRegex(trie, regex); - regex.append("\n([-\\w]+,)"); + regex.append("\n([-\\w\\u00C0-\\u017F]+,)"); return regex.toString(); } @@ -321,8 +333,8 @@ public static synchronized String getTopmostAssignedSurtPrefixRegex() { // use bundled list try { BufferedReader reader = new BufferedReader(new InputStreamReader( - PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"), "UTF-8")); + PublicSuffixes.class.getResourceAsStream( + "/org/archive/effective_tld_names.dat"), "UTF-8")); topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); IOUtils.closeQuietly(reader); } catch (UnsupportedEncodingException ex) { diff --git a/src/main/resources/effective_tld_names.dat b/src/main/resources/org/archive/effective_tld_names.dat similarity index 100% rename from src/main/resources/effective_tld_names.dat rename to src/main/resources/org/archive/effective_tld_names.dat diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index 758d7f46..96b6772c 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -19,10 +19,10 @@ package org.archive.net; -import java.io.PrintWriter; -import java.io.StringWriter; +import java.io.*; import java.util.ArrayList; import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.archive.net.PublicSuffixes.Node; import org.junit.jupiter.api.Test; @@ -48,7 +48,7 @@ public void testCompare() { assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(-1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node("*,"); assertEquals(1, n.compareTo('a')); assertEquals(0, n.compareTo('*')); @@ -56,7 +56,7 @@ public void testCompare() { assertEquals(0, n.compareTo(new Node("*,"))); assertEquals(1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node("!hoge"); assertEquals(1, n.compareTo('a')); assertEquals(-1, n.compareTo('*')); @@ -64,14 +64,14 @@ public void testCompare() { assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(0, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node(""); assertEquals(1, n.compareTo('a')); assertEquals(1, n.compareTo('*')); assertEquals(1, n.compareTo('!')); assertEquals(0, n.compareTo(new Node(""))); } - + protected String dump(Node alt) { StringWriter w = new StringWriter(); PublicSuffixes.dump(alt, 0, new PrintWriter(w)); @@ -121,38 +121,114 @@ public void testTrie3() { " \"\"" + NL, dump(alt)); } + @Test + public void testTrie4() { + StringBuilder sb = new StringBuilder(); + sb.append("us-east-1.amazonaws.com\n"); + sb.append("execute-api.us-east-1.amazonaws.com\n"); + // Test regex build ordering of branches. Second entry is a superset of the first + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:com,amazonaws,us-east-1,(?:execute-api,|)|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + } + + @Test + public void testTrie5() { + StringBuilder sb = new StringBuilder(); + sb.append("execute-api.us-east-1.amazonaws.com\n"); + sb.append("us-east-1.amazonaws.com\n"); + // Test regex build ordering of branches. Second entry is a proper subset of the first + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:com,amazonaws,us-east-1,(?:execute-api,|)|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + } + @Test + public void testTrie6() { + StringBuilder sb = new StringBuilder(); + sb.append("va.it\n"); + sb.append("val-daosta.it\n"); + sb.append("vald-aosta.it\n"); + sb.append("valled-aosta.it\n"); + sb.append("vallée-aoste.it\n"); + // Test input that breaks without proper unicode handling. + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:it,va(?:,|l(?:-daosta,|d-aosta,|l(?:ed-aosta,|ée-aoste,)))|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + + Matcher m = Pattern.compile(regex).matcher(""); + matchPrefix("it,va,example","it,va,", m); + matchPrefix("it,va,","it,va,", m); + matchPrefix("it,val-daosta,www","it,val-daosta,", m); + matchPrefix("it,val-daosta,","it,val-daosta,", m); + matchPrefix("it,vald-aosta,www","it,vald-aosta,", m); + matchPrefix("it,vald-aosta,","it,vald-aosta,", m); + matchPrefix("it,valled-aosta,www","it,valled-aosta,", m); + matchPrefix("it,valled-aosta,","it,valled-aosta,", m); + matchPrefix("it,vallze-aoste,","it,vallze-aoste,", m); + matchPrefix("it,vallze-aoste,www,222","it,vallze-aoste,", m); + } + @Test + public void testTrie7() { + StringBuilder sb = new StringBuilder(); + sb.append("*.fk\n"); + sb.append("com.fm\n"); + sb.append("edu.fm\n"); + sb.append("fm\n"); + // Test condition that generates duplicate branches f -> m, + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:f(?:k,[-\\w\\u00C0-\\u017F]+,|m,(?:com,|edu,))|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + + Matcher m = Pattern.compile(regex).matcher(""); + matchPrefix("fm,edu,www","fm,edu,", m); + matchPrefix("fm,edu,","fm,edu,", m); + matchPrefix("fm,example,www","fm,example,", m); + matchPrefix("fm,example,","fm,example,", m); + } + // test of higher-level functionality - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() .matcher(""); @Test public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("com,example,www,", "com,example,", m); + matchPrefix("com,example,", "com,example,", m); + matchPrefix("org,archive,www,", "org,archive,", m); + matchPrefix("org,archive,", "org,archive,", m); + matchPrefix("fr,yahoo,www,", "fr,yahoo,", m); + matchPrefix("fr,yahoo,", "fr,yahoo,", m); + matchPrefix("au,com,foobar,www,", "au,com,foobar,", m); + matchPrefix("au,com,foobar,", "au,com,foobar,", m); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,", m); + matchPrefix("uk,co,virgin,", "uk,co,virgin,", m); + matchPrefix("au,com,example,www,", "au,com,example,", m); + matchPrefix("au,com,example,", "au,com,example,", m); matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + "jp,yokohama,public,assigned,", m); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,", m); } @Test public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); + matchPrefix("de,bad-site,www", "de,bad-site,", m); } @Test public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); + matchPrefix("de,archive4u,www", "de,archive4u,", m); } @Test @@ -172,10 +248,10 @@ public void testIPV6() { @Test public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + matchPrefix("uk,bl,www,", "uk,bl,", m); + matchPrefix("uk,bl,", "uk,bl,", m); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,", m); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,", m); } @Test @@ -183,7 +259,7 @@ public void testFakeTLD() { // we assume any new/unknonwn TLD should be assumed as 2-level; // this is preferable for our grouping purpose but might not be // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); + matchPrefix("zzz,example,www,", "zzz,example,", m); } @Test @@ -197,10 +273,10 @@ public void testTopmostAssignedCaching() { assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),"topmostAssignedSurtPrefixPattern not cached"); assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),"topmostAssignedSurtPrefixRegex not cached"); } - + // TODO: test UTF domains? - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix, Matcher m) { m.reset(surtDomain); assertTrue(m.find(), "expected match not found in '" + surtDomain); assertEquals(expectedAssignedPrefix, m.group(), "expected match not found"); From d8d850a648a7965a5e58225f79df3702784806fa Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Mon, 14 Jul 2025 17:35:11 -0700 Subject: [PATCH 082/169] chore: update to latest public suffixes effective_tld_names.dat --- .../org/archive/effective_tld_names.dat | 13455 +++++++++++++--- 1 file changed, 11147 insertions(+), 2308 deletions(-) diff --git a/src/main/resources/org/archive/effective_tld_names.dat b/src/main/resources/org/archive/effective_tld_names.dat index 7c4a0860..91bf51b0 100644 --- a/src/main/resources/org/archive/effective_tld_names.dat +++ b/src/main/resources/org/archive/effective_tld_names.dat @@ -1,49 +1,64 @@ // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Please pull this list from, and only from https://publicsuffix.org/list/public_suffix_list.dat, +// rather than any other VCS sites. Pulling from any other URL is not guaranteed to be supported. + +// VERSION: 2025-07-09_15-23-09_UTC +// COMMIT: c38a2f8e8862ad65d91af25dee90002c61329953 + +// Instructions on pulling and using this list can be found at https://publicsuffix.org/list/. // ===BEGIN ICANN DOMAINS=== -// ac : http://en.wikipedia.org/wiki/.ac +// ac : http://nic.ac/rules.htm ac com.ac edu.ac gov.ac -net.ac mil.ac +net.ac org.ac -// ad : http://en.wikipedia.org/wiki/.ad +// ad : https://www.iana.org/domains/root/db/ad.html +// Confirmed by Amadeu Abril i Abril (CORE) 2024-11-17 ad -nom.ad -// ae : http://en.wikipedia.org/wiki/.ae -// see also: "Domain Name Eligibility Policy" at http://www.aeda.ae/eng/aepolicy.php +// ae : https://www.iana.org/domains/root/db/ae.html ae +ac.ae co.ae +gov.ae +mil.ae net.ae org.ae sch.ae -ac.ae -gov.ae -mil.ae -// aero : see http://www.information.aero/index.php?id=66 +// aero : https://information.aero/registration/policies/dmp aero +// 2LDs +airline.aero +airport.aero +// 2LDs (currently not accepting registration, seemingly never have) +// As of 2024-07, these are marked as reserved for potential 3LD +// registrations (clause 11 "allocated subdomains" in the 2006 TLD +// policy), but the relevant industry partners have not opened them up +// for registration. Current status can be determined from the TLD's +// policy document: 2LDs that are open for registration must list +// their policy in the TLD's policy. Any 2LD without such a policy is +// not open for registrations. accident-investigation.aero accident-prevention.aero aerobatic.aero aeroclub.aero aerodrome.aero agents.aero -aircraft.aero -airline.aero -airport.aero air-surveillance.aero -airtraffic.aero air-traffic-control.aero +aircraft.aero +airtraffic.aero ambulance.aero -amusement.aero association.aero author.aero ballooning.aero @@ -120,27 +135,27 @@ union.aero workinggroup.aero works.aero -// af : http://www.nic.af/help.jsp +// af : https://www.nic.af/domain-price af -gov.af com.af -org.af -net.af edu.af +gov.af +net.af +org.af // ag : http://www.nic.ag/prices.htm ag +co.ag com.ag -org.ag net.ag -co.ag nom.ag +org.ag // ai : http://nic.com.ai/ ai -off.ai com.ai net.ai +off.ai org.ai // al : http://www.ert.gov.al/ert_alb/faq_det.html?Id=31 @@ -152,81 +167,92 @@ mil.al net.al org.al -// am : http://en.wikipedia.org/wiki/.am +// am : https://www.amnic.net/policy/en/Policy_EN.pdf +// Confirmed by ISOC AM 2024-11-18 am - -// an : http://www.una.an/an_domreg/default.asp -an -com.an -net.an -org.an -edu.an - -// ao : http://en.wikipedia.org/wiki/.ao -// http://www.dns.ao/REGISTR.DOC +co.am +com.am +commune.am +net.am +org.am + +// ao : https://www.iana.org/domains/root/db/ao.html +// https://www.dns.ao/ao/ ao +co.ao ed.ao +edu.ao +gov.ao gv.ao +it.ao og.ao -co.ao +org.ao pb.ao -it.ao -// aq : http://en.wikipedia.org/wiki/.aq +// aq : https://www.iana.org/domains/root/db/aq.html aq -// ar : http://en.wikipedia.org/wiki/.ar -*.ar -!congresodelalengua3.ar -!educ.ar -!gobiernoelectronico.ar -!mecon.ar -!nacion.ar -!nic.ar -!promocion.ar -!retina.ar -!uba.ar - -// arpa : http://en.wikipedia.org/wiki/.arpa +// ar : https://nic.ar/es/nic-argentina/normativa +ar +bet.ar +com.ar +coop.ar +edu.ar +gob.ar +gov.ar +int.ar +mil.ar +musica.ar +mutual.ar +net.ar +org.ar +seg.ar +senasa.ar +tur.ar + +// arpa : https://www.iana.org/domains/root/db/arpa.html // Confirmed by registry 2008-06-18 +arpa e164.arpa +home.arpa in-addr.arpa ip6.arpa iris.arpa uri.arpa urn.arpa -// as : http://en.wikipedia.org/wiki/.as +// as : https://www.iana.org/domains/root/db/as.html as gov.as -// asia : http://en.wikipedia.org/wiki/.asia +// asia : https://www.iana.org/domains/root/db/asia.html asia -// at : http://en.wikipedia.org/wiki/.at +// at : https://www.iana.org/domains/root/db/at.html // Confirmed by registry 2008-06-17 at ac.at +sth.ac.at co.at gv.at or.at -// au : http://en.wikipedia.org/wiki/.au -// http://www.auda.org.au/ +// au : https://www.iana.org/domains/root/db/au.html +// https://www.auda.org.au/ +// Confirmed by registry 2024-11-17 +au // 2LDs +asn.au com.au -net.au -org.au edu.au gov.au -asn.au id.au -csiro.au +net.au +org.au // Historic 2LDs (closed to new registration, but sites still exist) -info.au conf.au oz.au -// CGDNs - http://www.cgdn.org.au/ +// CGDNs : https://www.auda.org.au/au-domain-names/the-different-au-domain-names/state-and-territory-domain-names/ act.au nsw.au nt.au @@ -237,6 +263,8 @@ vic.au wa.au // 3LDs act.edu.au +catholic.edu.au +// eq.edu.au - Removed at the request of the Queensland Department of Education nsw.edu.au nt.edu.au qld.edu.au @@ -244,54 +272,57 @@ sa.edu.au tas.edu.au vic.edu.au wa.edu.au -act.gov.au -// Removed at request of Shae.Donelan@services.nsw.gov.au, 2010-03-04 -// nsw.gov.au -nt.gov.au +// act.gov.au - Bug 984824 - Removed at request of Greg Tankard +// nsw.gov.au - Bug 547985 - Removed at request of +// nt.gov.au - Bug 940478 - Removed at request of Greg Connors qld.gov.au sa.gov.au tas.gov.au vic.gov.au wa.gov.au +// 4LDs +// education.tas.edu.au - Removed at the request of the Department of Education Tasmania +schools.nsw.edu.au -// aw : http://en.wikipedia.org/wiki/.aw +// aw : https://www.iana.org/domains/root/db/aw.html aw com.aw -// ax : http://en.wikipedia.org/wiki/.ax +// ax : https://www.iana.org/domains/root/db/ax.html ax -// az : http://en.wikipedia.org/wiki/.az +// az : https://www.iana.org/domains/root/db/az.html +// Confirmed via https://whois.az/?page_id=10 2024-12-11 az +biz.az +co.az com.az -net.az -int.az -gov.az -org.az edu.az +gov.az info.az -pp.az +int.az mil.az name.az +net.az +org.az +pp.az +// No longer available for registration, however domains exist as of 2024-12-11 +// see https://whois.az/?page_id=783 pro.az -biz.az -// ba : http://en.wikipedia.org/wiki/.ba +// ba : https://www.iana.org/domains/root/db/ba.html ba -org.ba -net.ba +com.ba edu.ba gov.ba mil.ba -unsa.ba -unbi.ba -co.ba -com.ba -rs.ba +net.ba +org.ba -// bb : http://en.wikipedia.org/wiki/.bb +// bb : https://www.iana.org/domains/root/db/bb.html bb biz.bb +co.bb com.bb edu.bb gov.bb @@ -299,22 +330,33 @@ info.bb net.bb org.bb store.bb +tv.bb -// bd : http://en.wikipedia.org/wiki/.bd +// bd : https://www.iana.org/domains/root/db/bd.html *.bd -// be : http://en.wikipedia.org/wiki/.be +// be : https://www.iana.org/domains/root/db/be.html // Confirmed by registry 2008-06-08 be ac.be -// bf : http://en.wikipedia.org/wiki/.bf +// bf : https://www.iana.org/domains/root/db/bf.html bf gov.bf -// bg : http://en.wikipedia.org/wiki/.bg +// bg : https://www.iana.org/domains/root/db/bg.html // https://www.register.bg/user/static/rules/en/index.html bg +0.bg +1.bg +2.bg +3.bg +4.bg +5.bg +6.bg +7.bg +8.bg +9.bg a.bg b.bg c.bg @@ -341,26 +383,16 @@ w.bg x.bg y.bg z.bg -0.bg -1.bg -2.bg -3.bg -4.bg -5.bg -6.bg -7.bg -8.bg -9.bg -// bh : http://en.wikipedia.org/wiki/.bh +// bh : https://www.iana.org/domains/root/db/bh.html bh com.bh edu.bh +gov.bh net.bh org.bh -gov.bh -// bi : http://en.wikipedia.org/wiki/.bi +// bi : https://www.iana.org/domains/root/db/bi.html // http://whois.nic.bi/ bi co.bi @@ -369,16 +401,34 @@ edu.bi or.bi org.bi -// biz : http://en.wikipedia.org/wiki/.biz +// biz : https://www.iana.org/domains/root/db/biz.html biz -// bj : http://en.wikipedia.org/wiki/.bj +// bj : https://nic.bj/bj-suffixes.txt +// Submitted by registry bj -asso.bj -barreau.bj -gouv.bj - -// bm : http://www.bermudanic.bm/dnr-text.txt +africa.bj +agro.bj +architectes.bj +assur.bj +avocats.bj +co.bj +com.bj +eco.bj +econo.bj +edu.bj +info.bj +loisirs.bj +money.bj +net.bj +org.bj +ote.bj +restaurant.bj +resto.bj +tourism.bj +univ.bj + +// bm : https://www.bermudanic.bm/domain-registration/index.php bm com.bm edu.bm @@ -386,90 +436,231 @@ gov.bm net.bm org.bm -// bn : http://en.wikipedia.org/wiki/.bn -*.bn +// bn : http://www.bnnic.bn/faqs +bn +com.bn +edu.bn +gov.bn +net.bn +org.bn -// bo : http://www.nic.bo/ +// bo : https://nic.bo +// Confirmed by registry 2024-11-19 bo com.bo edu.bo -gov.bo gob.bo int.bo -org.bo -net.bo mil.bo +net.bo +org.bo tv.bo - -// br : http://registro.br/dominio/dpn.html -// Updated by registry 2011-03-01 +web.bo +// Social Domains +academia.bo +agro.bo +arte.bo +blog.bo +bolivia.bo +ciencia.bo +cooperativa.bo +democracia.bo +deporte.bo +ecologia.bo +economia.bo +empresa.bo +indigena.bo +industria.bo +info.bo +medicina.bo +movimiento.bo +musica.bo +natural.bo +nombre.bo +noticias.bo +patria.bo +plurinacional.bo +politica.bo +profesional.bo +pueblo.bo +revista.bo +salud.bo +tecnologia.bo +tksat.bo +transporte.bo +wiki.bo + +// br : http://registro.br/dominio/categoria.html +// Submitted by registry br +9guacu.br +abc.br adm.br adv.br agr.br +aju.br am.br +anani.br +aparecida.br +app.br arq.br art.br ato.br b.br +barueri.br +belem.br +bet.br +bhz.br +bib.br bio.br blog.br bmd.br +boavista.br +bsb.br +campinagrande.br +campinas.br +caxias.br cim.br cng.br cnt.br com.br +contagem.br coop.br +coz.br +cri.br +cuiaba.br +curitiba.br +def.br +des.br +det.br +dev.br ecn.br eco.br edu.br emp.br +enf.br eng.br esp.br etc.br eti.br far.br +feira.br flog.br +floripa.br fm.br fnd.br +fortal.br fot.br +foz.br fst.br g12.br +geo.br ggf.br +goiania.br gov.br +// gov.br 26 states + df https://en.wikipedia.org/wiki/States_of_Brazil +ac.gov.br +al.gov.br +am.gov.br +ap.gov.br +ba.gov.br +ce.gov.br +df.gov.br +es.gov.br +go.gov.br +ma.gov.br +mg.gov.br +ms.gov.br +mt.gov.br +pa.gov.br +pb.gov.br +pe.gov.br +pi.gov.br +pr.gov.br +rj.gov.br +rn.gov.br +ro.gov.br +rr.gov.br +rs.gov.br +sc.gov.br +se.gov.br +sp.gov.br +to.gov.br +gru.br imb.br ind.br inf.br +jab.br +jampa.br +jdf.br +joinville.br jor.br jus.br leg.br +leilao.br lel.br +log.br +londrina.br +macapa.br +maceio.br +manaus.br +maringa.br mat.br med.br mil.br +morena.br +mp.br mus.br +natal.br net.br -nom.br +niteroi.br +*.nom.br not.br ntr.br odo.br +ong.br org.br +osasco.br +palmas.br +poa.br ppg.br pro.br psc.br psi.br +pvh.br qsl.br radio.br rec.br +recife.br +rep.br +ribeirao.br +rio.br +riobranco.br +riopreto.br +salvador.br +sampa.br +santamaria.br +santoandre.br +saobernardo.br +saogonca.br +seg.br +sjc.br slg.br +slz.br +sorocaba.br srv.br taxi.br +tc.br +tec.br teo.br +the.br tmp.br trd.br tur.br tv.br +udi.br vet.br +vix.br vlog.br wiki.br zlg.br @@ -477,12 +668,12 @@ zlg.br // bs : http://www.nic.bs/rules.html bs com.bs -net.bs -org.bs edu.bs gov.bs +net.bs +org.bs -// bt : http://en.wikipedia.org/wiki/.bt +// bt : https://www.iana.org/domains/root/db/bt.html bt com.bt edu.bt @@ -491,16 +682,19 @@ net.bt org.bt // bv : No registrations at this time. -// Submitted by registry 2006-06-16 +// Submitted by registry +bv -// bw : http://en.wikipedia.org/wiki/.bw -// http://www.gobin.info/domainname/bw.doc -// list of other 2nd level tlds ? +// bw : https://www.iana.org/domains/root/db/bw.html +// https://nic.net.bw/bw-name-structure bw +ac.bw co.bw +gov.bw +net.bw org.bw -// by : http://en.wikipedia.org/wiki/.by +// by : https://www.iana.org/domains/root/db/by.html // http://tld.by/rules_2006_en.html // list of other 2nd level tlds ? by @@ -510,20 +704,20 @@ mil.by // second-level domain, but it's being used as one (see www.google.com.by and // www.yahoo.com.by, for example), so we list it here for safety's sake. com.by - // http://hoster.by/ of.by -// bz : http://en.wikipedia.org/wiki/.bz +// bz : https://www.iana.org/domains/root/db/bz.html // http://www.belizenic.bz/ bz +co.bz com.bz -net.bz -org.bz edu.bz gov.bz +net.bz +org.bz -// ca : http://en.wikipedia.org/wiki/.ca +// ca : https://www.iana.org/domains/root/db/ca.html ca // ca geographical names ab.ca @@ -540,77 +734,78 @@ pe.ca qc.ca sk.ca yk.ca -// gc.ca: http://en.wikipedia.org/wiki/.gc.ca +// gc.ca: https://en.wikipedia.org/wiki/.gc.ca // see also: http://registry.gc.ca/en/SubdomainFAQ gc.ca -// cat : http://en.wikipedia.org/wiki/.cat +// cat : https://www.iana.org/domains/root/db/cat.html cat -// cc : http://en.wikipedia.org/wiki/.cc +// cc : https://www.iana.org/domains/root/db/cc.html cc -// cd : http://en.wikipedia.org/wiki/.cd -// see also: https://www.nic.cd/domain/insertDomain_2.jsp?act=1 +// cd : https://www.iana.org/domains/root/db/cd.html +// https://www.nic.cd cd gov.cd -// cf : http://en.wikipedia.org/wiki/.cf +// cf : https://www.iana.org/domains/root/db/cf.html cf -// cg : http://en.wikipedia.org/wiki/.cg +// cg : https://www.iana.org/domains/root/db/cg.html cg -// ch : http://en.wikipedia.org/wiki/.ch +// ch : https://www.iana.org/domains/root/db/ch.html ch -// ci : http://en.wikipedia.org/wiki/.ci -// http://www.nic.ci/index.php?page=charte +// ci : https://www.iana.org/domains/root/db/ci.html ci -org.ci -or.ci -com.ci +ac.ci +aéroport.ci +asso.ci co.ci -edu.ci +com.ci ed.ci -ac.ci -net.ci +edu.ci go.ci -asso.ci -aéroport.ci -int.ci -presse.ci -md.ci gouv.ci +int.ci +net.ci +or.ci +org.ci -// ck : http://en.wikipedia.org/wiki/.ck +// ck : https://www.iana.org/domains/root/db/ck.html *.ck !www.ck -// cl : http://en.wikipedia.org/wiki/.cl +// cl : https://www.nic.cl +// Confirmed by .CL registry cl -gov.cl -gob.cl co.cl +gob.cl +gov.cl mil.cl -// cm : http://en.wikipedia.org/wiki/.cm +// cm : https://www.iana.org/domains/root/db/cm.html plus bug 981927 cm +co.cm +com.cm gov.cm +net.cm -// cn : http://en.wikipedia.org/wiki/.cn -// Submitted by registry 2008-06-11 +// cn : https://www.iana.org/domains/root/db/cn.html +// Submitted by registry cn ac.cn com.cn edu.cn gov.cn +mil.cn net.cn org.cn -mil.cn 公司.cn -网络.cn 網絡.cn +网络.cn // cn geographic names ah.cn bj.cn @@ -618,18 +813,20 @@ cq.cn fj.cn gd.cn gs.cn -gz.cn gx.cn +gz.cn ha.cn hb.cn he.cn hi.cn +hk.cn hl.cn hn.cn jl.cn js.cn jx.cn ln.cn +mo.cn nm.cn nx.cn qh.cn @@ -639,38 +836,31 @@ sh.cn sn.cn sx.cn tj.cn +tw.cn xj.cn xz.cn yn.cn zj.cn -hk.cn -mo.cn -tw.cn -// co : http://en.wikipedia.org/wiki/.co -// Submitted by registry 2008-06-11 +// co : https://www.iana.org/domains/root/db/co.html +// https://www.cointernet.com.co/como-funciona-un-dominio-restringido +// Confirmed by registry 2024-11-18 co -arts.co com.co edu.co -firm.co gov.co -info.co -int.co mil.co net.co nom.co org.co -rec.co -web.co -// com : http://en.wikipedia.org/wiki/.com +// com : https://www.iana.org/domains/root/db/com.html com -// coop : http://en.wikipedia.org/wiki/.coop +// coop : https://www.iana.org/domains/root/db/coop.html coop -// cr : http://www.nic.cr/niccr_publico/showRegistroDominiosScreen.do +// cr : https://nic.cr/capitulo-1-registro-de-un-nombre-de-dominio/ cr ac.cr co.cr @@ -680,58 +870,86 @@ go.cr or.cr sa.cr -// cu : http://en.wikipedia.org/wiki/.cu +// cu : https://www.iana.org/domains/root/db/cu.html cu com.cu edu.cu -org.cu -net.cu -gov.cu +gob.cu inf.cu +nat.cu +net.cu +org.cu -// cv : http://en.wikipedia.org/wiki/.cv +// cv : https://www.iana.org/domains/root/db/cv.html +// https://ola.cv/domain-extensions-under-cv/ +// Confirmed by registry 2024-11-26 cv - -// cw : http://www.una.cw/cw_registry/ -// Confirmed by registry 2013-03-26 +com.cv +edu.cv +id.cv +int.cv +net.cv +nome.cv +org.cv +publ.cv + +// cw : https://www.uoc.cw/cw-registry +// Confirmed by registry 2024-11-19 cw com.cw edu.cw net.cw org.cw -// cx : http://en.wikipedia.org/wiki/.cx +// cx : https://www.iana.org/domains/root/db/cx.html // list of other 2nd level tlds ? cx gov.cx -// cy : http://en.wikipedia.org/wiki/.cy -*.cy - -// cz : http://en.wikipedia.org/wiki/.cz +// cy : http://www.nic.cy/ +// Submitted by Panayiotou Fotia +// https://nic.cy/wp-content/uploads/2024/01/Create-Request-for-domain-name-registration-1.pdf +cy +ac.cy +biz.cy +com.cy +ekloges.cy +gov.cy +ltd.cy +mil.cy +net.cy +org.cy +press.cy +pro.cy +tm.cy + +// cz : https://www.iana.org/domains/root/db/cz.html cz -// de : http://en.wikipedia.org/wiki/.de +// de : https://www.iana.org/domains/root/db/de.html // Confirmed by registry (with technical // reservations) 2008-07-01 de -// dj : http://en.wikipedia.org/wiki/.dj +// dj : https://www.iana.org/domains/root/db/dj.html dj -// dk : http://en.wikipedia.org/wiki/.dk +// dk : https://www.iana.org/domains/root/db/dk.html // Confirmed by registry 2008-06-17 dk -// dm : http://en.wikipedia.org/wiki/.dm +// dm : https://www.iana.org/domains/root/db/dm.html +// https://nic.dm/policies/pdf/DMRulesandGuidelines2024v1.pdf +// Confirmed by registry 2024-11-19 dm +co.dm com.dm -net.dm -org.dm edu.dm gov.dm +net.dm +org.dm -// do : http://en.wikipedia.org/wiki/.do +// do : https://www.iana.org/domains/root/db/do.html do art.do com.do @@ -744,221 +962,287 @@ org.do sld.do web.do -// dz : http://en.wikipedia.org/wiki/.dz +// dz : http://www.nic.dz/images/pdf_nic/charte.pdf dz +art.dz +asso.dz com.dz -org.dz -net.dz -gov.dz edu.dz -asso.dz +gov.dz +net.dz +org.dz pol.dz -art.dz +soc.dz +tm.dz -// ec : http://www.nic.ec/reg/paso1.asp -// Submitted by registry 2008-07-04 +// ec : https://www.nic.ec/ +// Submitted by registry ec +abg.ec +adm.ec +agron.ec +arqt.ec +art.ec +bar.ec +chef.ec com.ec -info.ec -net.ec +cont.ec +cpa.ec +cue.ec +dent.ec +dgn.ec +disco.ec +doc.ec +edu.ec +eng.ec +esm.ec fin.ec +fot.ec +gal.ec +gob.ec +gov.ec +gye.ec +ibr.ec +info.ec k12.ec +lat.ec +loj.ec med.ec -pro.ec -org.ec -edu.ec -gov.ec -gob.ec mil.ec - -// edu : http://en.wikipedia.org/wiki/.edu +mktg.ec +mon.ec +net.ec +ntr.ec +odont.ec +org.ec +pro.ec +prof.ec +psic.ec +psiq.ec +pub.ec +rio.ec +rrpp.ec +sal.ec +tech.ec +tul.ec +tur.ec +uio.ec +vet.ec +xxx.ec + +// edu : https://www.iana.org/domains/root/db/edu.html edu -// ee : http://www.eenet.ee/EENet/dom_reeglid.html#lisa_B +// ee : https://www.internet.ee/domains/general-domains-and-procedure-for-registration-of-sub-domains-under-general-domains ee +aip.ee +com.ee edu.ee +fie.ee gov.ee -riik.ee lib.ee med.ee -com.ee -pri.ee -aip.ee org.ee -fie.ee +pri.ee +riik.ee -// eg : http://en.wikipedia.org/wiki/.eg -eg +// eg : https://www.iana.org/domains/root/db/eg.html +// https://domain.eg/en/domain-rules/subdomain-names-types/ +eg +ac.eg com.eg edu.eg eun.eg gov.eg +info.eg +me.eg mil.eg name.eg net.eg org.eg sci.eg +sport.eg +tv.eg -// er : http://en.wikipedia.org/wiki/.er +// er : https://www.iana.org/domains/root/db/er.html *.er -// es : https://www.nic.es/site_ingles/ingles/dominios/index.html +// es : https://www.dominios.es/en es com.es +edu.es +gob.es nom.es org.es -gob.es -edu.es -// et : http://en.wikipedia.org/wiki/.et -*.et - -// eu : http://en.wikipedia.org/wiki/.eu +// et : https://www.iana.org/domains/root/db/et.html +et +biz.et +com.et +edu.et +gov.et +info.et +name.et +net.et +org.et + +// eu : https://www.iana.org/domains/root/db/eu.html eu -// fi : http://en.wikipedia.org/wiki/.fi +// fi : https://www.iana.org/domains/root/db/fi.html fi -// aland.fi : http://en.wikipedia.org/wiki/.ax +// aland.fi : https://www.iana.org/domains/root/db/ax.html // This domain is being phased out in favor of .ax. As there are still many // domains under aland.fi, we still keep it on the list until aland.fi is // completely removed. -// TODO: Check for updates (expected to be phased out around Q1/2009) aland.fi -// fj : http://en.wikipedia.org/wiki/.fj -*.fj - -// fk : http://en.wikipedia.org/wiki/.fk +// fj : http://domains.fj/ +// Submitted by registry 2020-02-11 +fj +ac.fj +biz.fj +com.fj +gov.fj +info.fj +mil.fj +name.fj +net.fj +org.fj +pro.fj + +// fk : https://www.iana.org/domains/root/db/fk.html *.fk -// fm : http://en.wikipedia.org/wiki/.fm +// fm : https://www.iana.org/domains/root/db/fm.html fm +com.fm +edu.fm +net.fm +org.fm -// fo : http://en.wikipedia.org/wiki/.fo +// fo : https://www.iana.org/domains/root/db/fo.html fo -// fr : http://www.afnic.fr/ -// domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs +// fr : https://www.afnic.fr/ https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf fr -com.fr asso.fr +com.fr +gouv.fr nom.fr prd.fr -presse.fr tm.fr -// domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels -aeroport.fr -assedic.fr -avocat.fr +// Other SLDs now selfmanaged out of AFNIC range. Former "domaines sectoriels", still registration suffixes avoues.fr cci.fr -chambagri.fr -chirurgiens-dentistes.fr -experts-comptables.fr -geometre-expert.fr -gouv.fr greta.fr huissier-justice.fr -medecin.fr -notaires.fr -pharmacien.fr -port.fr -veterinaire.fr -// ga : http://en.wikipedia.org/wiki/.ga +// ga : https://www.iana.org/domains/root/db/ga.html ga // gb : This registry is effectively dormant -// Submitted by registry 2008-06-12 +// Submitted by registry +gb -// gd : http://en.wikipedia.org/wiki/.gd +// gd : https://www.iana.org/domains/root/db/gd.html gd +edu.gd +gov.gd -// ge : http://www.nic.net.ge/policy_en.pdf +// ge : https://nic.ge/en/administrator/the-ge-domain-regulations +// Confirmed by registry 2024-11-20 ge com.ge edu.ge gov.ge -org.ge -mil.ge net.ge +org.ge pvt.ge +school.ge -// gf : http://en.wikipedia.org/wiki/.gf +// gf : https://www.iana.org/domains/root/db/gf.html gf -// gg : http://www.channelisles.net/applic/avextn.shtml +// gg : https://www.channelisles.net/register-1/register-direct +// Confirmed by registry 2013-11-28 gg co.gg -org.gg net.gg -sch.gg -gov.gg +org.gg -// gh : http://en.wikipedia.org/wiki/.gh -// see also: http://www.nic.gh/reg_now.php +// gh : https://www.iana.org/domains/root/db/gh.html +// https://www.nic.gh/ // Although domains directly at second level are not possible at the moment, // they have been possible for some time and may come back. gh +biz.gh com.gh edu.gh gov.gh -org.gh mil.gh +net.gh +org.gh // gi : http://www.nic.gi/rules.html gi com.gi -ltd.gi +edu.gi gov.gi +ltd.gi mod.gi -edu.gi org.gi -// gl : http://en.wikipedia.org/wiki/.gl +// gl : https://www.iana.org/domains/root/db/gl.html // http://nic.gl gl +co.gl +com.gl +edu.gl +net.gl +org.gl // gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm gm // gn : http://psg.com/dns/gn/gn.txt -// Submitted by registry 2008-06-17 +// Submitted by registry +gn ac.gn com.gn edu.gn gov.gn -org.gn net.gn +org.gn -// gov : http://en.wikipedia.org/wiki/.gov +// gov : https://www.iana.org/domains/root/db/gov.html gov // gp : http://www.nic.gp/index.php?lang=en gp +asso.gp com.gp -net.gp -mobi.gp edu.gp +mobi.gp +net.gp org.gp -asso.gp -// gq : http://en.wikipedia.org/wiki/.gq +// gq : https://www.iana.org/domains/root/db/gq.html gq -// gr : https://grweb.ics.forth.gr/english/1617-B-2005.html -// Submitted by registry 2008-06-09 +// gr : https://www.iana.org/domains/root/db/gr.html +// Submitted by registry gr com.gr edu.gr +gov.gr net.gr org.gr -gov.gr -// gs : http://en.wikipedia.org/wiki/.gs +// gs : https://www.iana.org/domains/root/db/gs.html gs -// gt : http://www.gt/politicas_de_registro.html +// gt : https://www.gt/sitio/registration_policy.php?lang=en gt com.gt edu.gt @@ -968,21 +1252,35 @@ mil.gt net.gt org.gt -// gu : http://gadao.gov.gu/registration.txt -*.gu - -// gw : http://en.wikipedia.org/wiki/.gw +// gu : http://gadao.gov.gu/register.html +// University of Guam : https://www.uog.edu +// Submitted by uognoc@triton.uog.edu +gu +com.gu +edu.gu +gov.gu +guam.gu +info.gu +net.gu +org.gu +web.gu + +// gw : https://www.iana.org/domains/root/db/gw.html +// gw : https://nic.gw/regras/ gw -// gy : http://en.wikipedia.org/wiki/.gy +// gy : https://www.iana.org/domains/root/db/gy.html // http://registry.gy/ gy co.gy com.gy +edu.gy +gov.gy net.gy +org.gy -// hk : https://www.hkdnr.hk -// Submitted by registry 2008-06-11 +// hk : https://www.hkirc.hk +// Submitted by registry hk com.hk edu.hk @@ -990,166 +1288,226 @@ gov.hk idv.hk net.hk org.hk +个人.hk +個人.hk 公司.hk -教育.hk -敎育.hk 政府.hk -個人.hk -个人.hk +敎育.hk +教育.hk 箇人.hk +組織.hk +組织.hk +網絡.hk 網络.hk -网络.hk 组織.hk -網絡.hk -网絡.hk 组织.hk -組織.hk -組织.hk +网絡.hk +网络.hk -// hm : http://en.wikipedia.org/wiki/.hm +// hm : https://www.iana.org/domains/root/db/hm.html hm -// hn : http://www.nic.hn/politicas/ps02,,05.html +// hn : https://www.iana.org/domains/root/db/hn.html hn com.hn edu.hn -org.hn -net.hn -mil.hn gob.hn +mil.hn +net.hn +org.hn // hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf hr -iz.hr +com.hr from.hr +iz.hr name.hr -com.hr // ht : http://www.nic.ht/info/charte.cfm ht +adult.ht +art.ht +asso.ht com.ht -shop.ht +coop.ht +edu.ht firm.ht +gouv.ht info.ht -adult.ht +med.ht net.ht -pro.ht org.ht -med.ht -art.ht -coop.ht +perso.ht pol.ht -asso.ht -edu.ht +pro.ht rel.ht -gouv.ht -perso.ht +shop.ht -// hu : http://www.domain.hu/domain/English/sld.html +// hu : https://www.iana.org/domains/root/db/hu.html // Confirmed by registry 2008-06-12 hu -co.hu -info.hu -org.hu -priv.hu -sport.hu -tm.hu 2000.hu agrar.hu bolt.hu casino.hu city.hu +co.hu erotica.hu erotika.hu film.hu forum.hu games.hu hotel.hu +info.hu ingatlan.hu jogasz.hu konyvelo.hu lakas.hu media.hu news.hu +org.hu +priv.hu reklam.hu sex.hu shop.hu +sport.hu suli.hu szex.hu +tm.hu tozsde.hu utazas.hu video.hu -// id : https://register.pandi.or.id/ +// id : https://www.iana.org/domains/root/db/id.html id ac.id biz.id co.id +desa.id go.id +kop.id mil.id my.id net.id or.id +ponpes.id sch.id web.id -// ie : http://en.wikipedia.org/wiki/.ie +// ie : https://www.iana.org/domains/root/db/ie.html ie gov.ie -// il : http://en.wikipedia.org/wiki/.il -*.il - -// im : https://www.nic.im/pdfs/imfaqs.pdf +// il : http://www.isoc.org.il/domains/ +// see also: https://en.isoc.org.il/il-cctld/registration-rules +// ISOC-IL (operated by .il Registry) +il +ac.il +co.il +gov.il +idf.il +k12.il +muni.il +net.il +org.il +// xn--4dbrk0ce ("Israel", Hebrew) : IL +ישראל +// xn--4dbgdty6c.xn--4dbrk0ce. +אקדמיה.ישראל +// xn--5dbhl8d.xn--4dbrk0ce. +ישוב.ישראל +// xn--8dbq2a.xn--4dbrk0ce. +צהל.ישראל +// xn--hebda8b.xn--4dbrk0ce. +ממשל.ישראל + +// im : https://www.nic.im/ +// Submitted by registry im +ac.im co.im ltd.co.im plc.co.im +com.im net.im -gov.im org.im -nic.im -ac.im +tt.im +tv.im -// in : http://en.wikipedia.org/wiki/.in -// see also: http://www.inregistry.in/policies/ -// Please note, that nic.in is not an offical eTLD, but used by most +// in : https://www.iana.org/domains/root/db/in.html +// see also: https://registry.in/policies +// Please note, that nic.in is not an official eTLD, but used by most // government institutions. in +5g.in +6g.in +ac.in +ai.in +am.in +bihar.in +biz.in +business.in +ca.in +cn.in co.in +com.in +coop.in +cs.in +delhi.in +dr.in +edu.in +er.in firm.in -net.in -org.in gen.in +gov.in +gujarat.in ind.in +info.in +int.in +internet.in +io.in +me.in +mil.in +net.in nic.in -ac.in -edu.in +org.in +pg.in +post.in +pro.in res.in -gov.in -mil.in +travel.in +tv.in +uk.in +up.in +us.in -// info : http://en.wikipedia.org/wiki/.info +// info : https://www.iana.org/domains/root/db/info.html info -// int : http://en.wikipedia.org/wiki/.int +// int : https://www.iana.org/domains/root/db/int.html // Confirmed by registry 2008-06-18 int eu.int -// io : http://www.nic.io/rules.html -// list of other 2nd level tlds ? +// io : http://www.nic.io/rules.htm io +co.io com.io +edu.io +gov.io +mil.io +net.io +nom.io +org.io // iq : http://www.cmc.iq/english/iq/iqregister1.htm iq -gov.iq +com.iq edu.iq +gov.iq mil.iq -com.iq -org.iq net.iq +org.iq // ir : http://www.nic.ir/Terms_and_Conditions_ir,_Appendix_1_Domain_Rules // Also see http://www.nic.ir/Internationalized_Domain_Names @@ -1168,328 +1526,456 @@ sch.ir ايران.ir // is : http://www.isnic.is/domain/rules.php -// Confirmed by registry 2008-12-06 +// Confirmed by registry 2024-11-17 is -net.is -com.is -edu.is -gov.is -org.is -int.is - -// it : http://en.wikipedia.org/wiki/.it + +// it : https://www.iana.org/domains/root/db/it.html +// https://www.nic.it/ it -gov.it edu.it -// list of reserved geo-names : -// http://www.nic.it/documenti/regolamenti-e-linee-guida/regolamento-assegnazione-versione-6.0.pdf -// (There is also a list of reserved geo-names corresponding to Italian -// municipalities : http://www.nic.it/documenti/appendice-c.pdf , but it is -// not included here.) -agrigento.it +gov.it +// Regions (3.3.1) +// https://www.nic.it/en/manage-your-it/forms-and-docs -> "Assignment and Management of domain names" +abr.it +abruzzo.it +aosta-valley.it +aostavalley.it +bas.it +basilicata.it +cal.it +calabria.it +cam.it +campania.it +emilia-romagna.it +emiliaromagna.it +emr.it +friuli-v-giulia.it +friuli-ve-giulia.it +friuli-vegiulia.it +friuli-venezia-giulia.it +friuli-veneziagiulia.it +friuli-vgiulia.it +friuliv-giulia.it +friulive-giulia.it +friulivegiulia.it +friulivenezia-giulia.it +friuliveneziagiulia.it +friulivgiulia.it +fvg.it +laz.it +lazio.it +lig.it +liguria.it +lom.it +lombardia.it +lombardy.it +lucania.it +mar.it +marche.it +mol.it +molise.it +piedmont.it +piemonte.it +pmn.it +pug.it +puglia.it +sar.it +sardegna.it +sardinia.it +sic.it +sicilia.it +sicily.it +taa.it +tos.it +toscana.it +trentin-sud-tirol.it +trentin-süd-tirol.it +trentin-sudtirol.it +trentin-südtirol.it +trentin-sued-tirol.it +trentin-suedtirol.it +trentino.it +trentino-a-adige.it +trentino-aadige.it +trentino-alto-adige.it +trentino-altoadige.it +trentino-s-tirol.it +trentino-stirol.it +trentino-sud-tirol.it +trentino-süd-tirol.it +trentino-sudtirol.it +trentino-südtirol.it +trentino-sued-tirol.it +trentino-suedtirol.it +trentinoa-adige.it +trentinoaadige.it +trentinoalto-adige.it +trentinoaltoadige.it +trentinos-tirol.it +trentinostirol.it +trentinosud-tirol.it +trentinosüd-tirol.it +trentinosudtirol.it +trentinosüdtirol.it +trentinosued-tirol.it +trentinosuedtirol.it +trentinsud-tirol.it +trentinsüd-tirol.it +trentinsudtirol.it +trentinsüdtirol.it +trentinsued-tirol.it +trentinsuedtirol.it +tuscany.it +umb.it +umbria.it +val-d-aosta.it +val-daosta.it +vald-aosta.it +valdaosta.it +valle-aosta.it +valle-d-aosta.it +valle-daosta.it +valleaosta.it +valled-aosta.it +valledaosta.it +vallee-aoste.it +vallée-aoste.it +vallee-d-aoste.it +vallée-d-aoste.it +valleeaoste.it +valléeaoste.it +valleedaoste.it +valléedaoste.it +vao.it +vda.it +ven.it +veneto.it +// Provinces (3.3.2) ag.it -alessandria.it +agrigento.it al.it -ancona.it +alessandria.it +alto-adige.it +altoadige.it an.it +ancona.it +andria-barletta-trani.it +andria-trani-barletta.it +andriabarlettatrani.it +andriatranibarletta.it +ao.it aosta.it aoste.it -ao.it -arezzo.it +ap.it +aq.it +aquila.it ar.it +arezzo.it ascoli-piceno.it ascolipiceno.it -ap.it asti.it at.it -avellino.it av.it -bari.it +avellino.it ba.it -andria-barletta-trani.it -andriabarlettatrani.it -trani-barletta-andria.it -tranibarlettaandria.it +balsan.it +balsan-sudtirol.it +balsan-südtirol.it +balsan-suedtirol.it +bari.it barletta-trani-andria.it barlettatraniandria.it -andria-trani-barletta.it -andriatranibarletta.it -trani-andria-barletta.it -traniandriabarletta.it -bt.it belluno.it -bl.it benevento.it -bn.it bergamo.it bg.it -biella.it bi.it -bologna.it +biella.it +bl.it +bn.it bo.it +bologna.it bolzano.it +bolzano-altoadige.it bozen.it -balsan.it -alto-adige.it -altoadige.it -suedtirol.it -bz.it +bozen-sudtirol.it +bozen-südtirol.it +bozen-suedtirol.it +br.it brescia.it -bs.it brindisi.it -br.it -cagliari.it +bs.it +bt.it +bulsan.it +bulsan-sudtirol.it +bulsan-südtirol.it +bulsan-suedtirol.it +bz.it ca.it +cagliari.it caltanissetta.it -cl.it +campidano-medio.it +campidanomedio.it campobasso.it -cb.it -carboniaiglesias.it carbonia-iglesias.it -iglesias-carbonia.it -iglesiascarbonia.it -ci.it +carboniaiglesias.it +carrara-massa.it +carraramassa.it caserta.it -ce.it catania.it -ct.it catanzaro.it -cz.it -chieti.it +cb.it +ce.it +cesena-forli.it +cesena-forlì.it +cesenaforli.it +cesenaforlì.it ch.it -como.it +chieti.it +ci.it +cl.it +cn.it co.it +como.it cosenza.it -cs.it -cremona.it cr.it +cremona.it crotone.it -kr.it +cs.it +ct.it cuneo.it -cn.it +cz.it dell-ogliastra.it dellogliastra.it -ogliastra.it -og.it -enna.it en.it -ferrara.it +enna.it +fc.it fe.it fermo.it -fm.it +ferrara.it +fg.it +fi.it firenze.it florence.it -fi.it +fm.it foggia.it -fg.it forli-cesena.it +forlì-cesena.it forlicesena.it -cesena-forli.it -cesenaforli.it -fc.it -frosinone.it +forlìcesena.it fr.it -genova.it -genoa.it +frosinone.it ge.it -gorizia.it +genoa.it +genova.it go.it -grosseto.it +gorizia.it gr.it -imperia.it +grosseto.it +iglesias-carbonia.it +iglesiascarbonia.it im.it -isernia.it +imperia.it is.it -laquila.it -aquila.it -aq.it +isernia.it +kr.it la-spezia.it +laquila.it laspezia.it -sp.it latina.it -lt.it -lecce.it +lc.it le.it +lecce.it lecco.it -lc.it -livorno.it li.it -lodi.it +livorno.it lo.it -lucca.it +lodi.it +lt.it lu.it +lucca.it macerata.it -mc.it mantova.it -mn.it massa-carrara.it massacarrara.it -carrara-massa.it -carraramassa.it -ms.it matera.it -mt.it +mb.it +mc.it +me.it medio-campidano.it mediocampidano.it -campidano-medio.it -campidanomedio.it -vs.it messina.it -me.it -milano.it -milan.it mi.it -modena.it +milan.it +milano.it +mn.it mo.it +modena.it monza.it monza-brianza.it +monza-e-della-brianza.it monzabrianza.it monzaebrianza.it monzaedellabrianza.it -monza-e-della-brianza.it -mb.it -napoli.it -naples.it +ms.it +mt.it na.it -novara.it +naples.it +napoli.it no.it -nuoro.it +novara.it nu.it -oristano.it +nuoro.it +og.it +ogliastra.it +olbia-tempio.it +olbiatempio.it or.it +oristano.it +ot.it +pa.it padova.it padua.it -pd.it palermo.it -pa.it parma.it -pr.it pavia.it -pv.it -perugia.it -pg.it -pescara.it +pc.it +pd.it pe.it +perugia.it pesaro-urbino.it pesarourbino.it -urbino-pesaro.it -urbinopesaro.it -pu.it +pescara.it +pg.it +pi.it piacenza.it -pc.it pisa.it -pi.it pistoia.it -pt.it -pordenone.it pn.it +po.it +pordenone.it potenza.it -pz.it +pr.it prato.it -po.it +pt.it +pu.it +pv.it +pz.it +ra.it ragusa.it -rg.it ravenna.it -ra.it -reggio-calabria.it -reggiocalabria.it rc.it +re.it +reggio-calabria.it reggio-emilia.it +reggiocalabria.it reggioemilia.it -re.it -rieti.it +rg.it ri.it +rieti.it rimini.it +rm.it rn.it +ro.it roma.it rome.it -rm.it rovigo.it -ro.it -salerno.it sa.it +salerno.it sassari.it -ss.it savona.it -sv.it -siena.it si.it +siena.it siracusa.it -sr.it -sondrio.it so.it -taranto.it +sondrio.it +sp.it +sr.it +ss.it +südtirol.it +suedtirol.it +sv.it ta.it +taranto.it +te.it tempio-olbia.it tempioolbia.it -olbia-tempio.it -olbiatempio.it -ot.it teramo.it -te.it terni.it -tr.it -torino.it -turin.it +tn.it to.it -trapani.it +torino.it tp.it +tr.it +trani-andria-barletta.it +trani-barletta-andria.it +traniandriabarletta.it +tranibarlettaandria.it +trapani.it trento.it -trentino.it -tn.it treviso.it -tv.it trieste.it ts.it -udine.it +turin.it +tv.it ud.it -varese.it +udine.it +urbino-pesaro.it +urbinopesaro.it va.it +varese.it +vb.it +vc.it +ve.it venezia.it venice.it -ve.it verbania.it -vb.it vercelli.it -vc.it verona.it -vr.it +vi.it vibo-valentia.it vibovalentia.it -vv.it vicenza.it -vi.it viterbo.it +vr.it +vs.it vt.it +vv.it -// je : http://www.channelisles.net/applic/avextn.shtml +// je : https://www.iana.org/domains/root/db/je.html +// Confirmed by registry 2013-11-28 je co.je -org.je net.je -sch.je -gov.je +org.je // jm : http://www.com.jm/register.html *.jm -// jo : http://www.dns.jo/Registration_policy.aspx +// jo : https://www.dns.jo/JoFamily.aspx +// Confirmed by registry 2024-11-17 jo +agri.jo +ai.jo com.jo -org.jo -net.jo edu.jo -sch.jo +eng.jo +fm.jo gov.jo mil.jo -name.jo +net.jo +org.jo +per.jo +phd.jo +sch.jo +tv.jo -// jobs : http://en.wikipedia.org/wiki/.jobs +// jobs : https://www.iana.org/domains/root/db/jobs.html jobs -// jp : http://en.wikipedia.org/wiki/.jp +// jp : https://www.iana.org/domains/root/db/jp.html // http://jprs.co.jp/en/jpdomain.html -// Updated by registry 2012-05-28 +// Confirmed by registry 2024-11-22 jp // jp organizational type names ac.jp @@ -1501,7 +1987,7 @@ gr.jp lg.jp ne.jp or.jp -// jp preficture type names +// jp prefecture type names aichi.jp akita.jp aomori.jp @@ -1549,21 +2035,71 @@ wakayama.jp yamagata.jp yamaguchi.jp yamanashi.jp +三重.jp +京都.jp +佐賀.jp +兵庫.jp +北海道.jp +千葉.jp +和歌山.jp +埼玉.jp +大分.jp +大阪.jp +奈良.jp +宮城.jp +宮崎.jp +富山.jp +山口.jp +山形.jp +山梨.jp +岐阜.jp +岡山.jp +岩手.jp +島根.jp +広島.jp +徳島.jp +愛媛.jp +愛知.jp +新潟.jp +東京.jp +栃木.jp +沖縄.jp +滋賀.jp +熊本.jp +石川.jp +神奈川.jp +福井.jp +福岡.jp +福島.jp +秋田.jp +群馬.jp +茨城.jp +長崎.jp +長野.jp +青森.jp +静岡.jp +香川.jp +高知.jp +鳥取.jp +鹿児島.jp // jp geographic type names // http://jprs.jp/doc/rule/saisoku-1.html +// 2024-11-22: JPRS confirmed that jp geographic type names no longer accept new registrations. +// Once all existing registrations expire (marking full discontinuation), these suffixes +// will be removed from the PSL. *.kawasaki.jp -*.kitakyushu.jp -*.kobe.jp -*.nagoya.jp -*.sapporo.jp -*.sendai.jp -*.yokohama.jp !city.kawasaki.jp +*.kitakyushu.jp !city.kitakyushu.jp +*.kobe.jp !city.kobe.jp +*.nagoya.jp !city.nagoya.jp +*.sapporo.jp !city.sapporo.jp +*.sendai.jp !city.sendai.jp +*.yokohama.jp !city.yokohama.jp // 4th level registration aisai.aichi.jp @@ -1593,7 +2129,6 @@ konan.aichi.jp kota.aichi.jp mihama.aichi.jp miyoshi.aichi.jp -nagakute.aichi.jp nishio.aichi.jp nisshin.aichi.jp obu.aichi.jp @@ -2265,7 +2800,6 @@ rikuzentakata.iwate.jp shiwa.iwate.jp shizukuishi.iwate.jp sumita.iwate.jp -takizawa.iwate.jp tanohata.iwate.jp tono.iwate.jp yahaba.iwate.jp @@ -2371,11 +2905,8 @@ arao.kumamoto.jp aso.kumamoto.jp choyo.kumamoto.jp gyokuto.kumamoto.jp -hitoyoshi.kumamoto.jp kamiamakusa.kumamoto.jp -kashima.kumamoto.jp kikuchi.kumamoto.jp -kosa.kumamoto.jp kumamoto.kumamoto.jp mashiki.kumamoto.jp mifune.kumamoto.jp @@ -2460,7 +2991,6 @@ iwanuma.miyagi.jp kakuda.miyagi.jp kami.miyagi.jp kawasaki.miyagi.jp -kesennuma.miyagi.jp marumori.miyagi.jp matsushima.miyagi.jp minamisanriku.miyagi.jp @@ -3246,63 +3776,73 @@ uenohara.yamanashi.jp yamanakako.yamanashi.jp yamanashi.yamanashi.jp -// ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145 -*.ke +// ke : http://www.kenic.or.ke/index.php/en/ke-domains/ke-domains +ke +ac.ke +co.ke +go.ke +info.ke +me.ke +mobi.ke +ne.ke +or.ke +sc.ke // kg : http://www.domain.kg/dmn_n.html kg -org.kg -net.kg com.kg edu.kg gov.kg mil.kg +net.kg +org.kg // kh : http://www.mptc.gov.kh/dns_registration.htm *.kh -// ki : http://www.ki/dns/index.html +// ki : https://www.iana.org/domains/root/db/ki.html ki -edu.ki biz.ki -net.ki -org.ki +com.ki +edu.ki gov.ki info.ki -com.ki +net.ki +org.ki -// km : http://en.wikipedia.org/wiki/.km +// km : https://www.iana.org/domains/root/db/km.html // http://www.domaine.km/documents/charte.doc km -org.km -nom.km +ass.km +com.km +edu.km gov.km +mil.km +nom.km +org.km prd.km tm.km -edu.km -mil.km -ass.km -com.km // These are only mentioned as proposed suggestions at domaine.km, but -// http://en.wikipedia.org/wiki/.km says they're available for registration: -coop.km +// https://www.iana.org/domains/root/db/km.html says they're available for registration: asso.km -presse.km +coop.km +gouv.km medecin.km notaires.km pharmaciens.km +presse.km veterinaire.km -gouv.km -// kn : http://en.wikipedia.org/wiki/.kn +// kn : https://www.iana.org/domains/root/db/kn.html // http://www.dot.kn/domainRules.html kn -net.kn -org.kn edu.kn gov.kn +net.kn +org.kn // kp : http://www.kcce.kp/en_index.php +kp com.kp edu.kp gov.kp @@ -3310,15 +3850,19 @@ org.kp rep.kp tra.kp -// kr : http://en.wikipedia.org/wiki/.kr -// see also: http://domain.nida.or.kr/eng/registration.jsp +// kr : https://www.iana.org/domains/root/db/kr.html +// see also: https://krnic.kisa.or.kr/jsp/infoboard/law/domBylawsReg.jsp kr ac.kr +ai.kr co.kr es.kr go.kr hs.kr +io.kr +it.kr kg.kr +me.kr mil.kr ms.kr ne.kr @@ -3344,92 +3888,110 @@ jeonnam.kr seoul.kr ulsan.kr -// kw : http://en.wikipedia.org/wiki/.kw -*.kw +// kw : https://www.nic.kw/policies/ +// Confirmed by registry +kw +com.kw +edu.kw +emb.kw +gov.kw +ind.kw +net.kw +org.kw // ky : http://www.icta.ky/da_ky_reg_dom.php // Confirmed by registry 2008-06-17 ky -edu.ky -gov.ky com.ky -org.ky +edu.ky net.ky +org.ky -// kz : http://en.wikipedia.org/wiki/.kz +// kz : https://www.iana.org/domains/root/db/kz.html // see also: http://www.nic.kz/rules/index.jsp kz -org.kz +com.kz edu.kz -net.kz gov.kz mil.kz -com.kz +net.kz +org.kz -// la : http://en.wikipedia.org/wiki/.la -// Submitted by registry 2008-06-10 +// la : https://www.iana.org/domains/root/db/la.html +// Submitted by registry la -int.la -net.la -info.la +com.la edu.la gov.la -per.la -com.la +info.la +int.la +net.la org.la +per.la -// lb : http://en.wikipedia.org/wiki/.lb -// Submitted by registry 2008-06-17 +// lb : https://www.iana.org/domains/root/db/lb.html +// Submitted by registry +lb com.lb edu.lb gov.lb net.lb org.lb -// lc : http://en.wikipedia.org/wiki/.lc +// lc : https://www.iana.org/domains/root/db/lc.html // see also: http://www.nic.lc/rules.htm lc -com.lc -net.lc co.lc -org.lc +com.lc edu.lc gov.lc +net.lc +org.lc -// li : http://en.wikipedia.org/wiki/.li +// li : https://www.iana.org/domains/root/db/li.html li -// lk : http://www.nic.lk/seclevpr.html +// lk : https://www.iana.org/domains/root/db/lk.html lk -gov.lk -sch.lk -net.lk -int.lk +ac.lk +assn.lk com.lk -org.lk edu.lk +gov.lk +grp.lk +hotel.lk +int.lk +ltd.lk +net.lk ngo.lk +org.lk +sch.lk soc.lk web.lk -ltd.lk -assn.lk -grp.lk -hotel.lk // lr : http://psg.com/dns/lr/lr.txt -// Submitted by registry 2008-06-17 +// Submitted by registry +lr com.lr edu.lr gov.lr -org.lr net.lr +org.lr -// ls : http://en.wikipedia.org/wiki/.ls +// ls : http://www.nic.ls/ +// Confirmed by registry ls +ac.ls +biz.ls co.ls +edu.ls +gov.ls +info.ls +net.ls org.ls +sc.ls -// lt : http://en.wikipedia.org/wiki/.lt +// lt : https://www.iana.org/domains/root/db/lt.html lt // gov.lt : http://www.gov.lt/index_en.php gov.lt @@ -3437,698 +3999,165 @@ gov.lt // lu : http://www.dns.lu/en/ lu -// lv : http://www.nic.lv/DNS/En/generic.php +// lv : https://www.iana.org/domains/root/db/lv.html lv +asn.lv com.lv +conf.lv edu.lv gov.lv -org.lv -mil.lv id.lv +mil.lv net.lv -asn.lv -conf.lv +org.lv // ly : http://www.nic.ly/regulations.php ly com.ly -net.ly -gov.ly -plc.ly edu.ly -sch.ly +gov.ly +id.ly med.ly +net.ly org.ly -id.ly +plc.ly +sch.ly -// ma : http://en.wikipedia.org/wiki/.ma +// ma : https://www.iana.org/domains/root/db/ma.html // http://www.anrt.ma/fr/admin/download/upload/file_fr782.pdf ma +ac.ma co.ma -net.ma gov.ma +net.ma org.ma -ac.ma press.ma // mc : http://www.nic.mc/ mc -tm.mc asso.mc +tm.mc -// md : http://en.wikipedia.org/wiki/.md +// md : https://www.iana.org/domains/root/db/md.html md -// me : http://en.wikipedia.org/wiki/.me +// me : https://www.iana.org/domains/root/db/me.html me +ac.me co.me -net.me -org.me edu.me -ac.me gov.me its.me +net.me +org.me priv.me -// mg : http://www.nic.mg/tarif.htm +// mg : https://nic.mg mg -org.mg -nom.mg -gov.mg -prd.mg -tm.mg +co.mg +com.mg edu.mg +gov.mg mil.mg -com.mg +nom.mg +org.mg +prd.mg -// mh : http://en.wikipedia.org/wiki/.mh +// mh : https://www.iana.org/domains/root/db/mh.html mh -// mil : http://en.wikipedia.org/wiki/.mil +// mil : https://www.iana.org/domains/root/db/mil.html mil -// mk : http://en.wikipedia.org/wiki/.mk +// mk : https://www.iana.org/domains/root/db/mk.html // see also: http://dns.marnet.net.mk/postapka.php mk com.mk -org.mk -net.mk edu.mk gov.mk inf.mk name.mk +net.mk +org.mk -// ml : http://www.gobin.info/domainname/ml-template.doc -// see also: http://en.wikipedia.org/wiki/.ml +// ml : https://www.iana.org/domains/root/db/ml.html +// Confirmed by Boubacar NDIAYE 2024-12-31 ml +ac.ml +art.ml +asso.ml com.ml edu.ml gouv.ml gov.ml +info.ml +inst.ml net.ml org.ml +pr.ml presse.ml -// mm : http://en.wikipedia.org/wiki/.mm +// mm : https://www.iana.org/domains/root/db/mm.html *.mm -// mn : http://en.wikipedia.org/wiki/.mn +// mn : https://www.iana.org/domains/root/db/mn.html mn -gov.mn edu.mn +gov.mn org.mn // mo : http://www.monic.net.mo/ mo com.mo -net.mo -org.mo edu.mo gov.mo +net.mo +org.mo -// mobi : http://en.wikipedia.org/wiki/.mobi +// mobi : https://www.iana.org/domains/root/db/mobi.html mobi // mp : http://www.dot.mp/ // Confirmed by registry 2008-06-17 mp -// mq : http://en.wikipedia.org/wiki/.mq +// mq : https://www.iana.org/domains/root/db/mq.html mq -// mr : http://en.wikipedia.org/wiki/.mr +// mr : https://www.iana.org/domains/root/db/mr.html mr gov.mr -// ms : http://en.wikipedia.org/wiki/.ms +// ms : https://www.iana.org/domains/root/db/ms.html ms - -// mt : https://www.nic.org.mt/dotmt/ -*.mt - -// mu : http://en.wikipedia.org/wiki/.mu +com.ms +edu.ms +gov.ms +net.ms +org.ms + +// mt : https://www.nic.org.mt/go/policy +// Submitted by registry +mt +com.mt +edu.mt +net.mt +org.mt + +// mu : https://www.iana.org/domains/root/db/mu.html mu -com.mu -net.mu -org.mu -gov.mu ac.mu co.mu +com.mu +gov.mu +net.mu or.mu +org.mu -// museum : http://about.museum/naming/ -// http://index.museum/ +// museum : https://welcome.museum/wp-content/uploads/2018/05/20180525-Registration-Policy-MUSEUM-EN_VF-2.pdf https://welcome.museum/buy-your-dot-museum-2/ museum -academy.museum -agriculture.museum -air.museum -airguard.museum -alabama.museum -alaska.museum -amber.museum -ambulance.museum -american.museum -americana.museum -americanantiques.museum -americanart.museum -amsterdam.museum -and.museum -annefrank.museum -anthro.museum -anthropology.museum -antiques.museum -aquarium.museum -arboretum.museum -archaeological.museum -archaeology.museum -architecture.museum -art.museum -artanddesign.museum -artcenter.museum -artdeco.museum -arteducation.museum -artgallery.museum -arts.museum -artsandcrafts.museum -asmatart.museum -assassination.museum -assisi.museum -association.museum -astronomy.museum -atlanta.museum -austin.museum -australia.museum -automotive.museum -aviation.museum -axis.museum -badajoz.museum -baghdad.museum -bahn.museum -bale.museum -baltimore.museum -barcelona.museum -baseball.museum -basel.museum -baths.museum -bauern.museum -beauxarts.museum -beeldengeluid.museum -bellevue.museum -bergbau.museum -berkeley.museum -berlin.museum -bern.museum -bible.museum -bilbao.museum -bill.museum -birdart.museum -birthplace.museum -bonn.museum -boston.museum -botanical.museum -botanicalgarden.museum -botanicgarden.museum -botany.museum -brandywinevalley.museum -brasil.museum -bristol.museum -british.museum -britishcolumbia.museum -broadcast.museum -brunel.museum -brussel.museum -brussels.museum -bruxelles.museum -building.museum -burghof.museum -bus.museum -bushey.museum -cadaques.museum -california.museum -cambridge.museum -can.museum -canada.museum -capebreton.museum -carrier.museum -cartoonart.museum -casadelamoneda.museum -castle.museum -castres.museum -celtic.museum -center.museum -chattanooga.museum -cheltenham.museum -chesapeakebay.museum -chicago.museum -children.museum -childrens.museum -childrensgarden.museum -chiropractic.museum -chocolate.museum -christiansburg.museum -cincinnati.museum -cinema.museum -circus.museum -civilisation.museum -civilization.museum -civilwar.museum -clinton.museum -clock.museum -coal.museum -coastaldefence.museum -cody.museum -coldwar.museum -collection.museum -colonialwilliamsburg.museum -coloradoplateau.museum -columbia.museum -columbus.museum -communication.museum -communications.museum -community.museum -computer.museum -computerhistory.museum -comunicações.museum -contemporary.museum -contemporaryart.museum -convent.museum -copenhagen.museum -corporation.museum -correios-e-telecomunicações.museum -corvette.museum -costume.museum -countryestate.museum -county.museum -crafts.museum -cranbrook.museum -creation.museum -cultural.museum -culturalcenter.museum -culture.museum -cyber.museum -cymru.museum -dali.museum -dallas.museum -database.museum -ddr.museum -decorativearts.museum -delaware.museum -delmenhorst.museum -denmark.museum -depot.museum -design.museum -detroit.museum -dinosaur.museum -discovery.museum -dolls.museum -donostia.museum -durham.museum -eastafrica.museum -eastcoast.museum -education.museum -educational.museum -egyptian.museum -eisenbahn.museum -elburg.museum -elvendrell.museum -embroidery.museum -encyclopedic.museum -england.museum -entomology.museum -environment.museum -environmentalconservation.museum -epilepsy.museum -essex.museum -estate.museum -ethnology.museum -exeter.museum -exhibition.museum -family.museum -farm.museum -farmequipment.museum -farmers.museum -farmstead.museum -field.museum -figueres.museum -filatelia.museum -film.museum -fineart.museum -finearts.museum -finland.museum -flanders.museum -florida.museum -force.museum -fortmissoula.museum -fortworth.museum -foundation.museum -francaise.museum -frankfurt.museum -franziskaner.museum -freemasonry.museum -freiburg.museum -fribourg.museum -frog.museum -fundacio.museum -furniture.museum -gallery.museum -garden.museum -gateway.museum -geelvinck.museum -gemological.museum -geology.museum -georgia.museum -giessen.museum -glas.museum -glass.museum -gorge.museum -grandrapids.museum -graz.museum -guernsey.museum -halloffame.museum -hamburg.museum -handson.museum -harvestcelebration.museum -hawaii.museum -health.museum -heimatunduhren.museum -hellas.museum -helsinki.museum -hembygdsforbund.museum -heritage.museum -histoire.museum -historical.museum -historicalsociety.museum -historichouses.museum -historisch.museum -historisches.museum -history.museum -historyofscience.museum -horology.museum -house.museum -humanities.museum -illustration.museum -imageandsound.museum -indian.museum -indiana.museum -indianapolis.museum -indianmarket.museum -intelligence.museum -interactive.museum -iraq.museum -iron.museum -isleofman.museum -jamison.museum -jefferson.museum -jerusalem.museum -jewelry.museum -jewish.museum -jewishart.museum -jfk.museum -journalism.museum -judaica.museum -judygarland.museum -juedisches.museum -juif.museum -karate.museum -karikatur.museum -kids.museum -koebenhavn.museum -koeln.museum -kunst.museum -kunstsammlung.museum -kunstunddesign.museum -labor.museum -labour.museum -lajolla.museum -lancashire.museum -landes.museum -lans.museum -läns.museum -larsson.museum -lewismiller.museum -lincoln.museum -linz.museum -living.museum -livinghistory.museum -localhistory.museum -london.museum -losangeles.museum -louvre.museum -loyalist.museum -lucerne.museum -luxembourg.museum -luzern.museum -mad.museum -madrid.museum -mallorca.museum -manchester.museum -mansion.museum -mansions.museum -manx.museum -marburg.museum -maritime.museum -maritimo.museum -maryland.museum -marylhurst.museum -media.museum -medical.museum -medizinhistorisches.museum -meeres.museum -memorial.museum -mesaverde.museum -michigan.museum -midatlantic.museum -military.museum -mill.museum -miners.museum -mining.museum -minnesota.museum -missile.museum -missoula.museum -modern.museum -moma.museum -money.museum -monmouth.museum -monticello.museum -montreal.museum -moscow.museum -motorcycle.museum -muenchen.museum -muenster.museum -mulhouse.museum -muncie.museum -museet.museum -museumcenter.museum -museumvereniging.museum -music.museum -national.museum -nationalfirearms.museum -nationalheritage.museum -nativeamerican.museum -naturalhistory.museum -naturalhistorymuseum.museum -naturalsciences.museum -nature.museum -naturhistorisches.museum -natuurwetenschappen.museum -naumburg.museum -naval.museum -nebraska.museum -neues.museum -newhampshire.museum -newjersey.museum -newmexico.museum -newport.museum -newspaper.museum -newyork.museum -niepce.museum -norfolk.museum -north.museum -nrw.museum -nuernberg.museum -nuremberg.museum -nyc.museum -nyny.museum -oceanographic.museum -oceanographique.museum -omaha.museum -online.museum -ontario.museum -openair.museum -oregon.museum -oregontrail.museum -otago.museum -oxford.museum -pacific.museum -paderborn.museum -palace.museum -paleo.museum -palmsprings.museum -panama.museum -paris.museum -pasadena.museum -pharmacy.museum -philadelphia.museum -philadelphiaarea.museum -philately.museum -phoenix.museum -photography.museum -pilots.museum -pittsburgh.museum -planetarium.museum -plantation.museum -plants.museum -plaza.museum -portal.museum -portland.museum -portlligat.museum -posts-and-telecommunications.museum -preservation.museum -presidio.museum -press.museum -project.museum -public.museum -pubol.museum -quebec.museum -railroad.museum -railway.museum -research.museum -resistance.museum -riodejaneiro.museum -rochester.museum -rockart.museum -roma.museum -russia.museum -saintlouis.museum -salem.museum -salvadordali.museum -salzburg.museum -sandiego.museum -sanfrancisco.museum -santabarbara.museum -santacruz.museum -santafe.museum -saskatchewan.museum -satx.museum -savannahga.museum -schlesisches.museum -schoenbrunn.museum -schokoladen.museum -school.museum -schweiz.museum -science.museum -scienceandhistory.museum -scienceandindustry.museum -sciencecenter.museum -sciencecenters.museum -science-fiction.museum -sciencehistory.museum -sciences.museum -sciencesnaturelles.museum -scotland.museum -seaport.museum -settlement.museum -settlers.museum -shell.museum -sherbrooke.museum -sibenik.museum -silk.museum -ski.museum -skole.museum -society.museum -sologne.museum -soundandvision.museum -southcarolina.museum -southwest.museum -space.museum -spy.museum -square.museum -stadt.museum -stalbans.museum -starnberg.museum -state.museum -stateofdelaware.museum -station.museum -steam.museum -steiermark.museum -stjohn.museum -stockholm.museum -stpetersburg.museum -stuttgart.museum -suisse.museum -surgeonshall.museum -surrey.museum -svizzera.museum -sweden.museum -sydney.museum -tank.museum -tcm.museum -technology.museum -telekommunikation.museum -television.museum -texas.museum -textile.museum -theater.museum -time.museum -timekeeping.museum -topology.museum -torino.museum -touch.museum -town.museum -transport.museum -tree.museum -trolley.museum -trust.museum -trustee.museum -uhren.museum -ulm.museum -undersea.museum -university.museum -usa.museum -usantiques.museum -usarts.museum -uscountryestate.museum -usculture.museum -usdecorativearts.museum -usgarden.museum -ushistory.museum -ushuaia.museum -uslivinghistory.museum -utah.museum -uvic.museum -valley.museum -vantaa.museum -versailles.museum -viking.museum -village.museum -virginia.museum -virtual.museum -virtuel.museum -vlaanderen.museum -volkenkunde.museum -wales.museum -wallonie.museum -war.museum -washingtondc.museum -watchandclock.museum -watch-and-clock.museum -western.museum -westfalen.museum -whaling.museum -wildlife.museum -williamsburg.museum -windmill.museum -workshop.museum -york.museum -yorkshire.museum -yosemite.museum -youth.museum -zoological.museum -zoology.museum -ירושלים.museum -иком.museum - -// mv : http://en.wikipedia.org/wiki/.mv + +// mv : https://www.iana.org/domains/root/db/mv.html // "mv" included because, contra Wikipedia, google.mv exists. mv aero.mv @@ -4156,121 +4185,134 @@ coop.mw edu.mw gov.mw int.mw -museum.mw net.mw org.mw // mx : http://www.nic.mx/ -// Submitted by registry 2008-06-19 +// Submitted by registry mx com.mx -org.mx -gob.mx edu.mx +gob.mx net.mx +org.mx -// my : http://www.mynic.net.my/ +// my : http://www.mynic.my/ +// Available strings: https://mynic.my/resources/domains/buying-a-domain/ my +biz.my com.my -net.my -org.my -gov.my edu.my +gov.my mil.my name.my +net.my +org.my -// mz : http://www.gobin.info/domainname/mz-template.doc -*.mz -!teledata.mz +// mz : http://www.uem.mz/ +// Submitted by registry +mz +ac.mz +adv.mz +co.mz +edu.mz +gov.mz +mil.mz +net.mz +org.mz // na : http://www.na-nic.com.na/ -// http://www.info.na/domain/ na -info.na -pro.na -name.na -school.na -or.na -dr.na -us.na -mx.na -ca.na -in.na -cc.na -tv.na -ws.na -mobi.na +alt.na co.na com.na +gov.na +net.na org.na -// name : has 2nd-level tlds, but there's no list of them +// name : http://www.nic.name/ +// Regarding 2LDs: https://github.com/publicsuffix/list/issues/2306 name // nc : http://www.cctld.nc/ nc asso.nc +nom.nc -// ne : http://en.wikipedia.org/wiki/.ne +// ne : https://www.iana.org/domains/root/db/ne.html ne -// net : http://en.wikipedia.org/wiki/.net +// net : https://www.iana.org/domains/root/db/net.html net -// nf : http://en.wikipedia.org/wiki/.nf +// nf : https://www.iana.org/domains/root/db/nf.html nf -com.nf -net.nf -per.nf -rec.nf -web.nf arts.nf +com.nf firm.nf info.nf +net.nf other.nf +per.nf +rec.nf store.nf +web.nf -// ng : http://psg.com/dns/ng/ -// Submitted by registry 2008-06-17 -ac.ng +// ng : http://www.nira.org.ng/index.php/join-us/register-ng-domain/189-nira-slds +ng com.ng edu.ng gov.ng +i.ng +mil.ng +mobi.ng +name.ng net.ng org.ng - -// ni : http://www.nic.ni/dominios.htm -*.ni - -// nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html -// Confirmed by registry (with technical -// reservations) 2008-06-08 +sch.ng + +// ni : http://www.nic.ni/ +ni +ac.ni +biz.ni +co.ni +com.ni +edu.ni +gob.ni +in.ni +info.ni +int.ni +mil.ni +net.ni +nom.ni +org.ni +web.ni + +// nl : https://www.iana.org/domains/root/db/nl.html +// https://www.sidn.nl/ nl -// BV.nl will be a registry for dutch BV's (besloten vennootschap) -bv.nl - -// no : http://www.norid.no/regelverk/index.en.html -// The Norwegian registry has declined to notify us of updates. The web pages -// referenced below are the official source of the data. There is also an -// announce mailing list: -// https://postlister.uninett.no/sympa/info/norid-diskusjon +// no : https://www.norid.no/en/om-domenenavn/regelverk-for-no/ +// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/ +// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/ +// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/ +// RSS feed: https://teknisk.norid.no/en/feed/ no -// Norid generic domains : http://www.norid.no/regelverk/vedlegg-c.en.html +// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/ fhs.no -vgs.no -fylkesbibl.no folkebibl.no -museum.no +fylkesbibl.no idrett.no +museum.no priv.no -// Non-Norid generic domains : http://www.norid.no/regelverk/vedlegg-d.en.html -mil.no -stat.no +vgs.no +// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/ dep.no -kommune.no herad.no -// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html +kommune.no +mil.no +stat.no +// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/ // counties aa.no ah.no @@ -4321,10 +4363,10 @@ akrehamn.no algard.no ålgård.no arna.no -brumunddal.no -bryne.no bronnoysund.no brønnøysund.no +brumunddal.no +bryne.no drobak.no drøbak.no egersund.no @@ -4368,27 +4410,32 @@ tananger.no tranby.no vossevangen.no // communities +aarborte.no +aejrie.no afjord.no åfjord.no agdenes.no +nes.akershus.no +aknoluokta.no +ákŋoluokta.no al.no ål.no +alaheadju.no +álaheadju.no alesund.no ålesund.no alstahaug.no alta.no áltá.no -alaheadju.no -álaheadju.no alvdal.no amli.no åmli.no amot.no åmot.no +andasuolo.no andebu.no andoy.no andøy.no -andasuolo.no ardal.no årdal.no aremark.no @@ -4398,9 +4445,9 @@ aseral.no åseral.no asker.no askim.no -askvoll.no askoy.no askøy.no +askvoll.no asnes.no åsnes.no audnedaln.no @@ -4413,27 +4460,37 @@ austevoll.no austrheim.no averoy.no averøy.no -balestrand.no -ballangen.no +badaddja.no +bådåddjå.no +bærum.no +bahcavuotna.no +báhcavuotna.no +bahccavuotna.no +báhccavuotna.no +baidar.no +báidár.no +bajddar.no +bájddar.no balat.no bálát.no +balestrand.no +ballangen.no balsfjord.no -bahccavuotna.no -báhccavuotna.no bamble.no bardu.no +barum.no +batsfjord.no +båtsfjord.no +bearalvahki.no +bearalváhki.no beardu.no beiarn.no -bajddar.no -bájddar.no -baidar.no -báidár.no berg.no bergen.no berlevag.no berlevåg.no -bearalvahki.no -bearalváhki.no +bievat.no +bievát.no bindal.no birkenes.no bjarkoy.no @@ -4442,36 +4499,32 @@ bjerkreim.no bjugn.no bodo.no bodø.no -badaddja.no -bådåddjå.no -budejju.no bokn.no +bomlo.no +bømlo.no bremanger.no bronnoy.no brønnøy.no +budejju.no +nes.buskerud.no bygland.no bykle.no -barum.no -bærum.no -bo.telemark.no -bø.telemark.no -bo.nordland.no -bø.nordland.no -bievat.no -bievát.no -bomlo.no -bømlo.no -batsfjord.no -båtsfjord.no -bahcavuotna.no -báhcavuotna.no +cahcesuolo.no +čáhcesuolo.no +davvenjarga.no +davvenjárga.no +davvesiida.no +deatnu.no +dielddanuorri.no +divtasvuodna.no +divttasvuotna.no +donna.no +dønna.no dovre.no drammen.no drangedal.no dyroy.no dyrøy.no -donna.no -dønna.no eid.no eidfjord.no eidsberg.no @@ -4483,14 +4536,12 @@ enebakk.no engerdal.no etne.no etnedal.no -evenes.no evenassi.no evenášši.no +evenes.no evje-og-hornnes.no farsund.no fauske.no -fuossko.no -fuoisku.no fedje.no fet.no finnoy.no @@ -4498,33 +4549,40 @@ finnøy.no fitjar.no fjaler.no fjell.no +fla.no +flå.no flakstad.no flatanger.no flekkefjord.no flesberg.no flora.no -fla.no -flå.no folldal.no +forde.no +førde.no forsand.no fosnes.no +fræna.no +frana.no frei.no frogn.no froland.no frosta.no -frana.no -fræna.no froya.no frøya.no +fuoisku.no +fuossko.no fusa.no fyresdal.no -forde.no -førde.no +gaivuotna.no +gáivuotna.no +galsa.no +gálsá.no gamvik.no gangaviika.no gáŋgaviika.no gaular.no gausdal.no +giehtavuoatna.no gildeskal.no gildeskål.no giske.no @@ -4542,38 +4600,37 @@ granvin.no gratangen.no grimstad.no grong.no -kraanghke.no -kråanghke.no grue.no gulen.no +guovdageaidnu.no +ha.no +hå.no +habmer.no +hábmer.no hadsel.no +hægebostad.no +hagebostad.no halden.no halsa.no hamar.no hamaroy.no -habmer.no -hábmer.no -hapmir.no -hápmir.no -hammerfest.no hammarfeasta.no hámmárfeasta.no +hammerfest.no +hapmir.no +hápmir.no haram.no hareid.no harstad.no hasvik.no -aknoluokta.no -ákŋoluokta.no hattfjelldal.no -aarborte.no haugesund.no +os.hedmark.no +valer.hedmark.no +våler.hedmark.no hemne.no hemnes.no hemsedal.no -heroy.more-og-romsdal.no -herøy.møre-og-romsdal.no -heroy.nordland.no -herøy.nordland.no hitra.no hjartdal.no hjelmeland.no @@ -4585,96 +4642,95 @@ hole.no holmestrand.no holtalen.no holtålen.no +os.hordaland.no hornindal.no horten.no -hurdal.no -hurum.no -hvaler.no -hyllestad.no -hagebostad.no -hægebostad.no hoyanger.no høyanger.no hoylandet.no høylandet.no -ha.no -hå.no +hurdal.no +hurum.no +hvaler.no +hyllestad.no ibestad.no inderoy.no inderøy.no iveland.no +ivgu.no jevnaker.no -jondal.no jolster.no jølster.no -karasjok.no +jondal.no +kafjord.no +kåfjord.no karasjohka.no kárášjohka.no +karasjok.no karlsoy.no -galsa.no -gálsá.no karmoy.no karmøy.no kautokeino.no -guovdageaidnu.no -klepp.no klabu.no klæbu.no +klepp.no kongsberg.no kongsvinger.no +kraanghke.no +kråanghke.no kragero.no kragerø.no kristiansand.no kristiansund.no krodsherad.no krødsherad.no +kvæfjord.no +kvænangen.no +kvafjord.no kvalsund.no -rahkkeravju.no -ráhkkerávju.no kvam.no +kvanangen.no kvinesdal.no kvinnherad.no kviteseid.no kvitsoy.no kvitsøy.no -kvafjord.no -kvæfjord.no -giehtavuoatna.no -kvanangen.no -kvænangen.no -navuotna.no -návuotna.no -kafjord.no -kåfjord.no -gaivuotna.no -gáivuotna.no +laakesvuemie.no +lærdal.no +lahppi.no +láhppi.no +lardal.no larvik.no -lavangen.no lavagis.no -loabat.no -loabát.no +lavangen.no +leangaviika.no +leaŋgaviika.no lebesby.no -davvesiida.no leikanger.no leirfjord.no leka.no leksvik.no lenvik.no -leangaviika.no -leaŋgaviika.no +lerdal.no lesja.no levanger.no lier.no lierne.no lillehammer.no lillesand.no -lindesnes.no lindas.no lindås.no +lindesnes.no +loabat.no +loabát.no +lodingen.no +lødingen.no lom.no loppa.no -lahppi.no -láhppi.no +lorenskog.no +lørenskog.no +loten.no +løten.no lund.no lunner.no luroy.no @@ -4682,25 +4738,19 @@ lurøy.no luster.no lyngdal.no lyngen.no -ivgu.no -lardal.no -lerdal.no -lærdal.no -lodingen.no -lødingen.no -lorenskog.no -lørenskog.no -loten.no -løten.no +malatvuopmi.no +málatvuopmi.no +malselv.no +målselv.no malvik.no -masoy.no -måsøy.no -muosat.no -muosát.no mandal.no marker.no marnardal.no masfjorden.no +masoy.no +måsøy.no +matta-varjjat.no +mátta-várjjat.no meland.no meldal.no melhus.no @@ -4708,39 +4758,39 @@ meloy.no meløy.no meraker.no meråker.no -moareke.no -moåreke.no midsund.no midtre-gauldal.no +moareke.no +moåreke.no modalen.no modum.no molde.no +heroy.more-og-romsdal.no +sande.more-og-romsdal.no +herøy.møre-og-romsdal.no +sande.møre-og-romsdal.no moskenes.no moss.no mosvik.no -malselv.no -målselv.no -malatvuopmi.no -málatvuopmi.no +muosat.no +muosát.no +naamesjevuemie.no +nååmesjevuemie.no +nærøy.no namdalseid.no -aejrie.no namsos.no namsskogan.no -naamesjevuemie.no -nååmesjevuemie.no -laakesvuemie.no nannestad.no -narvik.no +naroy.no narviika.no +narvik.no naustdal.no +navuotna.no +návuotna.no nedre-eiker.no -nes.akershus.no -nes.buskerud.no nesna.no nesodden.no nesseby.no -unjarga.no -unjárga.no nesset.no nissedal.no nittedal.no @@ -4749,21 +4799,20 @@ nord-fron.no nord-odal.no norddal.no nordkapp.no -davvenjarga.no -davvenjárga.no +bo.nordland.no +bø.nordland.no +heroy.nordland.no +herøy.nordland.no nordre-land.no nordreisa.no -raisa.no -ráisa.no nore-og-uvdal.no notodden.no -naroy.no -nærøy.no notteroy.no nøtterøy.no odda.no oksnes.no øksnes.no +omasvuotna.no oppdal.no oppegard.no oppegård.no @@ -4774,11 +4823,11 @@ orskog.no ørskog.no orsta.no ørsta.no -os.hedmark.no -os.hordaland.no osen.no osteroy.no osterøy.no +valer.ostfold.no +våler.østfold.no ostre-toten.no østre-toten.no overhalla.no @@ -4794,11 +4843,18 @@ porsanger.no porsangu.no porsáŋgu.no porsgrunn.no +rade.no +råde.no radoy.no radøy.no +rælingen.no +rahkkeravju.no +ráhkkerávju.no +raisa.no +ráisa.no rakkestad.no +ralingen.no rana.no -ruovat.no randaberg.no rauma.no rendalen.no @@ -4809,16 +4865,14 @@ rindal.no ringebu.no ringerike.no ringsaker.no -rissa.no risor.no risør.no +rissa.no roan.no -rollag.no -rygge.no -ralingen.no -rælingen.no rodoy.no rødøy.no +rollag.no +romsa.no romskog.no rømskog.no roros.no @@ -4829,18 +4883,14 @@ royken.no røyken.no royrvik.no røyrvik.no -rade.no -råde.no +ruovat.no +rygge.no salangen.no -siellak.no -saltdal.no salat.no -sálát.no sálat.no +sálát.no +saltdal.no samnanger.no -sande.more-og-romsdal.no -sande.møre-og-romsdal.no -sande.vestfold.no sandefjord.no sandnes.no sandoy.no @@ -4852,39 +4902,60 @@ sel.no selbu.no selje.no seljord.no +siellak.no sigdal.no siljan.no sirdal.no +skanit.no +skánit.no +skanland.no +skånland.no skaun.no skedsmo.no ski.no skien.no -skiptvet.no -skjervoy.no -skjervøy.no skierva.no skiervá.no +skiptvet.no skjak.no skjåk.no +skjervoy.no +skjervøy.no skodje.no -skanland.no -skånland.no -skanit.no -skánit.no smola.no smøla.no -snillfjord.no +snaase.no +snåase.no snasa.no snåsa.no +snillfjord.no snoasa.no -snaase.no -snåase.no sogndal.no +sogne.no +søgne.no sokndal.no sola.no solund.no +somna.no +sømna.no +sondre-land.no +søndre-land.no songdalen.no +sor-aurdal.no +sør-aurdal.no +sor-fron.no +sør-fron.no +sor-odal.no +sør-odal.no +sor-varanger.no +sør-varanger.no +sorfold.no +sørfold.no +sorreisa.no +sørreisa.no sortland.no +sorum.no +sørum.no spydeberg.no stange.no stavanger.no @@ -4897,7 +4968,6 @@ stor-elvdal.no stord.no stordal.no storfjord.no -omasvuotna.no strand.no stranda.no stryn.no @@ -4909,72 +4979,55 @@ surnadal.no sveio.no svelvik.no sykkylven.no -sogne.no -søgne.no -somna.no -sømna.no -sondre-land.no -søndre-land.no -sor-aurdal.no -sør-aurdal.no -sor-fron.no -sør-fron.no -sor-odal.no -sør-odal.no -sor-varanger.no -sør-varanger.no -matta-varjjat.no -mátta-várjjat.no -sorfold.no -sørfold.no -sorreisa.no -sørreisa.no -sorum.no -sørum.no tana.no -deatnu.no +bo.telemark.no +bø.telemark.no time.no tingvoll.no tinn.no tjeldsund.no -dielddanuorri.no tjome.no tjøme.no tokke.no tolga.no +tonsberg.no +tønsberg.no torsken.no +træna.no +trana.no tranoy.no tranøy.no +troandin.no +trogstad.no +trøgstad.no +tromsa.no tromso.no tromsø.no -tromsa.no -romsa.no trondheim.no -troandin.no trysil.no -trana.no -træna.no -trogstad.no -trøgstad.no tvedestrand.no tydal.no tynset.no tysfjord.no -divtasvuodna.no -divttasvuotna.no tysnes.no -tysvar.no tysvær.no -tonsberg.no -tønsberg.no +tysvar.no ullensaker.no ullensvang.no ulvik.no +unjarga.no +unjárga.no utsira.no +vaapste.no vadso.no vadsø.no -cahcesuolo.no -čáhcesuolo.no +værøy.no +vaga.no +vågå.no +vagan.no +vågan.no +vagsoy.no +vågsøy.no vaksdal.no valle.no vang.no @@ -4983,8 +5036,8 @@ vardo.no vardø.no varggat.no várggát.no +varoy.no vefsn.no -vaapste.no vega.no vegarshei.no vegårshei.no @@ -4992,6 +5045,7 @@ vennesla.no verdal.no verran.no vestby.no +sande.vestfold.no vestnes.no vestre-slidre.no vestre-toten.no @@ -5001,143 +5055,153 @@ vevelstad.no vik.no vikna.no vindafjord.no +voagat.no volda.no voss.no -varoy.no -værøy.no -vagan.no -vågan.no -voagat.no -vagsoy.no -vågsøy.no -vaga.no -vågå.no -valer.ostfold.no -våler.østfold.no -valer.hedmark.no -våler.hedmark.no // np : http://www.mos.com.np/register.html *.np // nr : http://cenpac.net.nr/dns/index.html -// Confirmed by registry 2008-06-17 +// Submitted by registry nr biz.nr -info.nr -gov.nr +com.nr edu.nr -org.nr +gov.nr +info.nr net.nr -com.nr +org.nr -// nu : http://en.wikipedia.org/wiki/.nu +// nu : https://www.iana.org/domains/root/db/nu.html nu -// nz : http://en.wikipedia.org/wiki/.nz -*.nz - -// om : http://en.wikipedia.org/wiki/.om -*.om -!mediaphone.om -!nawrastelecom.om -!nawras.om -!omanmobile.om -!omanpost.om -!omantel.om -!rakpetroleum.om -!siemens.om -!songfest.om -!statecouncil.om - -// org : http://en.wikipedia.org/wiki/.org +// nz : https://www.iana.org/domains/root/db/nz.html +// Submitted by registry +nz +ac.nz +co.nz +cri.nz +geek.nz +gen.nz +govt.nz +health.nz +iwi.nz +kiwi.nz +maori.nz +māori.nz +mil.nz +net.nz +org.nz +parliament.nz +school.nz + +// om : https://www.iana.org/domains/root/db/om.html +om +co.om +com.om +edu.om +gov.om +med.om +museum.om +net.om +org.om +pro.om + +// onion : https://tools.ietf.org/html/rfc7686 +onion + +// org : https://www.iana.org/domains/root/db/org.html org // pa : http://www.nic.pa/ // Some additional second level "domains" resolve directly as hostnames, such as // pannet.pa, so we add a rule for "pa". pa +abo.pa ac.pa -gob.pa com.pa -org.pa -sld.pa edu.pa -net.pa +gob.pa ing.pa -abo.pa med.pa +net.pa nom.pa +org.pa +sld.pa // pe : https://www.nic.pe/InformeFinalComision.pdf pe +com.pe edu.pe gob.pe -nom.pe mil.pe -org.pe -com.pe net.pe +nom.pe +org.pe // pf : http://www.gobin.info/domainname/formulaire-pf.pdf pf com.pf -org.pf edu.pf +org.pf -// pg : http://en.wikipedia.org/wiki/.pg +// pg : https://www.iana.org/domains/root/db/pg.html *.pg -// ph : http://www.domains.ph/FAQ2.asp -// Submitted by registry 2008-06-13 +// ph : https://www.iana.org/domains/root/db/ph.html +// Submitted by registry ph com.ph -net.ph -org.ph -gov.ph edu.ph -ngo.ph -mil.ph +gov.ph i.ph +mil.ph +net.ph +ngo.ph +org.ph -// pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK +// pk : https://pk5.pknic.net.pk/pk5/msgNamepk.PK +// Contact Email: staff@pknic.net.pk pk +ac.pk +biz.pk com.pk -net.pk edu.pk -org.pk fam.pk -biz.pk -web.pk -gov.pk +gkp.pk gob.pk +gog.pk gok.pk -gon.pk gop.pk gos.pk -info.pk +gov.pk +net.pk +org.pk +web.pk -// pl : http://www.dns.pl/english/ +// pl : https://www.dns.pl/en/ +// Confirmed by registry 2024-11-18 pl -// NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html -aid.pl +com.pl +net.pl +org.pl +// pl functional domains : https://www.dns.pl/en/list_of_functional_domain_names agro.pl +aid.pl atm.pl auto.pl biz.pl -com.pl edu.pl gmina.pl gsm.pl info.pl mail.pl -miasta.pl media.pl +miasta.pl mil.pl -net.pl nieruchomosci.pl nom.pl -org.pl pc.pl powiat.pl priv.pl @@ -5153,26 +5217,65 @@ tm.pl tourism.pl travel.pl turystyka.pl -// ICM functional domains (icm.edu.pl) -6bone.pl -art.pl -mbone.pl -// Government domains (administred by ippt.gov.pl) +// Government domains : https://www.dns.pl/informacje_o_rejestracji_domen_gov_pl +// In accordance with the .gov.pl Domain Name Regulations : https://www.dns.pl/regulamin_gov_pl gov.pl -uw.gov.pl -um.gov.pl -ug.gov.pl -upow.gov.pl -starostwo.gov.pl +ap.gov.pl +griw.gov.pl +ic.gov.pl +is.gov.pl +kmpsp.gov.pl +konsulat.gov.pl +kppsp.gov.pl +kwp.gov.pl +kwpsp.gov.pl +mup.gov.pl +mw.gov.pl +oia.gov.pl +oirm.gov.pl +oke.gov.pl +oow.gov.pl +oschr.gov.pl +oum.gov.pl +pa.gov.pl +pinb.gov.pl +piw.gov.pl +po.gov.pl +pr.gov.pl +psp.gov.pl +psse.gov.pl +pup.gov.pl +rzgw.gov.pl +sa.gov.pl +sdn.gov.pl +sko.gov.pl so.gov.pl sr.gov.pl -po.gov.pl -pa.gov.pl -// other functional domains -ngo.pl -irc.pl -usenet.pl -// NASK geographical domains : http://www.dns.pl/english/dns-regiony.html +starostwo.gov.pl +ug.gov.pl +ugim.gov.pl +um.gov.pl +umig.gov.pl +upow.gov.pl +uppo.gov.pl +us.gov.pl +uw.gov.pl +uzs.gov.pl +wif.gov.pl +wiih.gov.pl +winb.gov.pl +wios.gov.pl +witd.gov.pl +wiw.gov.pl +wkz.gov.pl +wsa.gov.pl +wskr.gov.pl +wsse.gov.pl +wuoz.gov.pl +wzmiuw.gov.pl +zp.gov.pl +zpisdn.gov.pl +// pl regional domains : https://www.dns.pl/en/list_of_regional_domain_names augustow.pl babia-gora.pl bedzin.pl @@ -5199,11 +5302,11 @@ jaworzno.pl jelenia-gora.pl jgora.pl kalisz.pl -kazimierz-dolny.pl karpacz.pl kartuzy.pl kaszuby.pl katowice.pl +kazimierz-dolny.pl kepno.pl ketrzyn.pl klodzko.pl @@ -5246,8 +5349,8 @@ pisz.pl podhale.pl podlasie.pl polkowice.pl -pomorze.pl pomorskie.pl +pomorze.pl prochowice.pl pruszkow.pl przeworsk.pl @@ -5258,12 +5361,11 @@ rybnik.pl rzeszow.pl sanok.pl sejny.pl -siedlce.pl +skoczow.pl slask.pl slupsk.pl sosnowiec.pl stalowa-wola.pl -skoczow.pl starachowice.pl stargard.pl suwalki.pl @@ -5293,93 +5395,81 @@ zagan.pl zarow.pl zgora.pl zgorzelec.pl -// TASK geographical domains (www.task.gda.pl/uslugi/dns) -gda.pl -gdansk.pl -gdynia.pl -med.pl -sopot.pl -// other geographical domains -gliwice.pl -krakow.pl -poznan.pl -wroc.pl -zakopane.pl -// pm : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +// pm : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf pm -// pn : http://www.government.pn/PnRegistry/policies.htm +// pn : https://www.iana.org/domains/root/db/pn.html pn -gov.pn co.pn -org.pn edu.pn +gov.pn net.pn +org.pn -// post : http://en.wikipedia.org/wiki/.post +// post : https://www.iana.org/domains/root/db/post.html post // pr : http://www.nic.pr/index.asp?f=1 pr +biz.pr com.pr -net.pr -org.pr -gov.pr edu.pr -isla.pr -pro.pr -biz.pr +gov.pr info.pr +isla.pr name.pr -// these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr +net.pr +org.pr +pro.pr +// these aren't mentioned on nic.pr, but on https://www.iana.org/domains/root/db/pr.html +ac.pr est.pr prof.pr -ac.pr -// pro : http://www.nic.pro/support_faq.htm +// pro : http://registry.pro/get-pro pro +aaa.pro aca.pro +acct.pro +avocat.pro bar.pro cpa.pro +eng.pro jur.pro law.pro med.pro -eng.pro +recht.pro -// ps : http://en.wikipedia.org/wiki/.ps +// ps : https://www.iana.org/domains/root/db/ps.html // http://www.nic.ps/registration/policy.html#reg ps +com.ps edu.ps gov.ps -sec.ps -plo.ps -com.ps -org.ps net.ps +org.ps +plo.ps +sec.ps -// pt : http://online.dns.pt/dns/start_dns +// pt : https://www.dns.pt/en/domain/pt-terms-and-conditions-registration-rules/ pt -net.pt -gov.pt -org.pt +com.pt edu.pt +gov.pt int.pt -publ.pt -com.pt +net.pt nome.pt +org.pt +publ.pt -// pw : http://en.wikipedia.org/wiki/.pw +// pw : https://www.iana.org/domains/root/db/pw.html +// Confirmed by registry in private correspondence with @dnsguru 2024-12-09 pw -co.pw -ne.pw -or.pw -ed.pw -go.pw -belau.pw - -// py : http://www.nic.py/pautas.html#seccion_9 -// Confirmed by registry 2012-10-03 +gov.pw + +// py : https://www.iana.org/domains/root/db/py.html +// Submitted by registry py com.py coop.py @@ -5400,201 +5490,63 @@ net.qa org.qa sch.qa -// re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs +// re : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +// Confirmed by registry 2024-11-18 re -com.re +// Closed for registration on 2013-03-15 but domains are still maintained asso.re -nom.re +com.re // ro : http://www.rotld.ro/ ro +arts.ro com.ro -org.ro -tm.ro -nt.ro -nom.ro +firm.ro info.ro +nom.ro +nt.ro +org.ro rec.ro -arts.ro -firm.ro store.ro +tm.ro www.ro -// rs : http://en.wikipedia.org/wiki/.rs +// rs : https://www.rnids.rs/en/domains/national-domains rs +ac.rs co.rs -org.rs edu.rs -ac.rs gov.rs in.rs +org.rs -// ru : http://www.cctld.ru/ru/docs/aktiv_8.php -// Industry domains +// ru : https://cctld.ru/files/pdf/docs/en/rules_ru-rf.pdf +// Submitted by George Georgievsky ru -ac.ru -com.ru -edu.ru -int.ru -net.ru -org.ru -pp.ru -// Geographical domains -adygeya.ru -altai.ru -amur.ru -arkhangelsk.ru -astrakhan.ru -bashkiria.ru -belgorod.ru -bir.ru -bryansk.ru -buryatia.ru -cbg.ru -chel.ru -chelyabinsk.ru -chita.ru -chukotka.ru -chuvashia.ru -dagestan.ru -dudinka.ru -e-burg.ru -grozny.ru -irkutsk.ru -ivanovo.ru -izhevsk.ru -jar.ru -joshkar-ola.ru -kalmykia.ru -kaluga.ru -kamchatka.ru -karelia.ru -kazan.ru -kchr.ru -kemerovo.ru -khabarovsk.ru -khakassia.ru -khv.ru -kirov.ru -koenig.ru -komi.ru -kostroma.ru -krasnoyarsk.ru -kuban.ru -kurgan.ru -kursk.ru -lipetsk.ru -magadan.ru -mari.ru -mari-el.ru -marine.ru -mordovia.ru -mosreg.ru -msk.ru -murmansk.ru -nalchik.ru -nnov.ru -nov.ru -novosibirsk.ru -nsk.ru -omsk.ru -orenburg.ru -oryol.ru -palana.ru -penza.ru -perm.ru -pskov.ru -ptz.ru -rnd.ru -ryazan.ru -sakhalin.ru -samara.ru -saratov.ru -simbirsk.ru -smolensk.ru -spb.ru -stavropol.ru -stv.ru -surgut.ru -tambov.ru -tatarstan.ru -tom.ru -tomsk.ru -tsaritsyn.ru -tsk.ru -tula.ru -tuva.ru -tver.ru -tyumen.ru -udm.ru -udmurtia.ru -ulan-ude.ru -vladikavkaz.ru -vladimir.ru -vladivostok.ru -volgograd.ru -vologda.ru -voronezh.ru -vrn.ru -vyatka.ru -yakutia.ru -yamal.ru -yaroslavl.ru -yekaterinburg.ru -yuzhno-sakhalinsk.ru -// More geographical domains -amursk.ru -baikal.ru -cmw.ru -fareast.ru -jamal.ru -kms.ru -k-uralsk.ru -kustanai.ru -kuzbass.ru -magnitka.ru -mytis.ru -nakhodka.ru -nkz.ru -norilsk.ru -oskol.ru -pyatigorsk.ru -rubtsovsk.ru -snz.ru -syzran.ru -vdonsk.ru -zgrad.ru -// State domains -gov.ru -mil.ru -// Technical domains -test.ru -// rw : http://www.nic.rw/cgi-bin/policy.pl +// rw : https://www.iana.org/domains/root/db/rw.html rw -gov.rw -net.rw -edu.rw ac.rw -com.rw co.rw -int.rw +coop.rw +gov.rw mil.rw -gouv.rw +net.rw +org.rw // sa : http://www.nic.net.sa/ sa com.sa -net.sa -org.sa +edu.sa gov.sa med.sa +net.sa +org.sa pub.sa -edu.sa sch.sa // sb : http://www.sbnic.net.sb/ -// Submitted by registry 2008-06-08 +// Submitted by registry sb com.sb edu.sb @@ -5605,25 +5557,26 @@ org.sb // sc : http://www.nic.sc/ sc com.sc +edu.sc gov.sc net.sc org.sc -edu.sc -// sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm -// Submitted by registry 2008-06-17 +// sd : https://www.iana.org/domains/root/db/sd.html +// Submitted by registry sd com.sd -net.sd -org.sd edu.sd -med.sd -tv.sd gov.sd info.sd +med.sd +net.sd +org.sd +tv.sd -// se : http://en.wikipedia.org/wiki/.se -// Submitted by registry 2008-06-24 +// se : https://www.iana.org/domains/root/db/se.html +// https://data.internetstiftelsen.se/barred_domains_list.txt -> Second level domains & Sub-domains +// Confirmed by Registry Services 2024-11-20 se a.se ac.se @@ -5657,7 +5610,6 @@ pp.se press.se r.se s.se -sshn.se t.se tm.se u.se @@ -5666,46 +5618,46 @@ x.se y.se z.se -// sg : http://www.nic.net.sg/page/registration-policies-procedures-and-guidelines +// sg : https://www.sgnic.sg/domain-registration/sg-categories-rules +// Confirmed by registry 2024-11-19 sg com.sg +edu.sg +gov.sg net.sg org.sg -gov.sg -edu.sg -per.sg -// sh : http://www.nic.sh/registrar.html +// sh : http://nic.sh/rules.htm sh com.sh -net.sh gov.sh -org.sh mil.sh +net.sh +org.sh -// si : http://en.wikipedia.org/wiki/.si +// si : https://www.iana.org/domains/root/db/si.html si // sj : No registrations at this time. -// Submitted by registry 2008-06-16 +// Submitted by registry +sj -// sk : http://en.wikipedia.org/wiki/.sk -// list of 2nd level domains ? +// sk : https://www.iana.org/domains/root/db/sk.html sk // sl : http://www.nic.sl -// Submitted by registry 2008-06-12 +// Submitted by registry sl com.sl -net.sl edu.sl gov.sl +net.sl org.sl -// sm : http://en.wikipedia.org/wiki/.sm +// sm : https://www.iana.org/domains/root/db/sm.html sm -// sn : http://en.wikipedia.org/wiki/.sn +// sn : https://www.iana.org/domains/root/db/sn.html sn art.sn com.sn @@ -5715,15 +5667,31 @@ org.sn perso.sn univ.sn -// so : http://www.soregistry.com/ +// so : http://sonic.so/policies/ so com.so +edu.so +gov.so +me.so net.so org.so -// sr : http://en.wikipedia.org/wiki/.sr +// sr : https://www.iana.org/domains/root/db/sr.html sr +// ss : https://registry.nic.ss/ +// Submitted by registry +ss +biz.ss +co.ss +com.ss +edu.ss +gov.ss +me.ss +net.ss +org.ss +sch.ss + // st : http://www.nic.st/html/policyrules/ st co.st @@ -5731,7 +5699,6 @@ com.st consulado.st edu.st embaixada.st -gov.st mil.st net.st org.st @@ -5739,53 +5706,57 @@ principe.st saotome.st store.st -// su : http://en.wikipedia.org/wiki/.su +// su : https://www.iana.org/domains/root/db/su.html su -// sv : http://www.svnet.org.sv/svpolicy.html -*.sv +// sv : https://www.iana.org/domains/root/db/sv.html +sv +com.sv +edu.sv +gob.sv +org.sv +red.sv -// sx : http://en.wikipedia.org/wiki/.sx -// Confirmed by registry 2012-05-31 +// sx : https://www.iana.org/domains/root/db/sx.html +// Submitted by registry sx gov.sx -// sy : http://en.wikipedia.org/wiki/.sy -// see also: http://www.gobin.info/domainname/sy.doc +// sy : https://www.iana.org/domains/root/db/sy.html sy +com.sy edu.sy gov.sy -net.sy mil.sy -com.sy +net.sy org.sy -// sz : http://en.wikipedia.org/wiki/.sz +// sz : https://www.iana.org/domains/root/db/sz.html // http://www.sispa.org.sz/ sz -co.sz ac.sz +co.sz org.sz -// tc : http://en.wikipedia.org/wiki/.tc +// tc : https://www.iana.org/domains/root/db/tc.html tc -// td : http://en.wikipedia.org/wiki/.td +// td : https://www.iana.org/domains/root/db/td.html td -// tel: http://en.wikipedia.org/wiki/.tel +// tel : https://www.iana.org/domains/root/db/tel.html // http://www.telnic.org/ tel -// tf : http://en.wikipedia.org/wiki/.tf +// tf : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf tf -// tg : http://en.wikipedia.org/wiki/.tg +// tg : https://www.iana.org/domains/root/db/tg.html // http://www.nic.tg/ tg -// th : http://en.wikipedia.org/wiki/.th -// Submitted by registry 2008-06-17 +// th : https://www.iana.org/domains/root/db/th.html +// Submitted by registry th ac.th co.th @@ -5813,111 +5784,120 @@ org.tj test.tj web.tj -// tk : http://en.wikipedia.org/wiki/.tk +// tk : https://www.iana.org/domains/root/db/tk.html tk -// tl : http://en.wikipedia.org/wiki/.tl +// tl : https://www.iana.org/domains/root/db/tl.html tl gov.tl -// tm : http://www.nic.tm/local.html +// tm : https://www.nic.tm/local.html +// Confirmed by registry 2024-11-19 tm -com.tm co.tm -org.tm -net.tm -nom.tm +com.tm +edu.tm gov.tm mil.tm -edu.tm +net.tm +nom.tm +org.tm -// tn : http://en.wikipedia.org/wiki/.tn -// http://whois.ati.tn/ +// tn : http://www.registre.tn/fr/ +// https://whois.ati.tn/ tn com.tn ens.tn fin.tn gov.tn ind.tn +info.tn intl.tn +mincom.tn nat.tn net.tn org.tn -info.tn perso.tn tourism.tn -edunet.tn -rnrt.tn -rns.tn -rnu.tn -mincom.tn -agrinet.tn -defense.tn -turen.tn -// to : http://en.wikipedia.org/wiki/.to -// Submitted by registry 2008-06-17 +// to : https://www.iana.org/domains/root/db/to.html +// Submitted by registry to com.to +edu.to gov.to +mil.to net.to org.to -edu.to -mil.to -// tr : http://en.wikipedia.org/wiki/.tr -*.tr -!nic.tr -// Used by government in the TRNC -// http://en.wikipedia.org/wiki/.nc.tr +// tr : https://nic.tr/ +// https://nic.tr/forms/eng/policies.pdf +// https://nic.tr/index.php?USRACTN=PRICELST +tr +av.tr +bbs.tr +bel.tr +biz.tr +com.tr +dr.tr +edu.tr +gen.tr +gov.tr +info.tr +k12.tr +kep.tr +mil.tr +name.tr +net.tr +org.tr +pol.tr +tel.tr +tsk.tr +tv.tr +web.tr +// Used by Northern Cyprus +nc.tr +// Used by government agencies of Northern Cyprus gov.nc.tr -// travel : http://en.wikipedia.org/wiki/.travel -travel - -// tt : http://www.nic.tt/ +// tt : https://www.nic.tt/ +// Confirmed by registry 2024-11-19 tt +biz.tt co.tt com.tt -org.tt -net.tt -biz.tt +edu.tt +gov.tt info.tt -pro.tt -int.tt -coop.tt -jobs.tt -mobi.tt -travel.tt -museum.tt -aero.tt +mil.tt name.tt -gov.tt -edu.tt +net.tt +org.tt +pro.tt -// tv : http://en.wikipedia.org/wiki/.tv +// tv : https://www.iana.org/domains/root/db/tv.html // Not listing any 2LDs as reserved since none seem to exist in practice, // Wikipedia notwithstanding. tv -// tw : http://en.wikipedia.org/wiki/.tw +// tw : https://www.iana.org/domains/root/db/tw.html +// https://twnic.tw/dnservice_catag.php +// Confirmed by registry 2024-11-26 tw +club.tw +com.tw +ebiz.tw edu.tw +game.tw gov.tw +idv.tw mil.tw -com.tw net.tw org.tw -idv.tw -game.tw -ebiz.tw -club.tw -網路.tw -組織.tw -商業.tw // tz : http://www.tznic.or.tz/index.php/domains -// Confirmed by registry 2013-01-22 +// Submitted by registry +tz ac.tz co.tz go.tz @@ -5932,7 +5912,7 @@ sc.tz tv.tz // ua : https://hostmaster.ua/policy/?ua -// Submitted by registry 2012-04-27 +// Submitted by registry ua // ua 2LD com.ua @@ -5957,7 +5937,6 @@ cv.ua dn.ua dnepropetrovsk.ua dnipropetrovsk.ua -dominic.ua donetsk.ua dp.ua if.ua @@ -5972,6 +5951,7 @@ kiev.ua kirovograd.ua km.ua kr.ua +kropyvnytskyi.ua krym.ua ks.ua kv.ua @@ -5979,6 +5959,7 @@ kyiv.ua lg.ua lt.ua lugansk.ua +luhansk.ua lutsk.ua lv.ua lviv.ua @@ -6002,11 +5983,13 @@ te.ua ternopil.ua uz.ua uzhgorod.ua +uzhhorod.ua vinnica.ua vinnytsia.ua vn.ua volyn.ua yalta.ua +zakarpattia.ua zaporizhzhe.ua zaporizhzhia.ua zhitomir.ua @@ -6014,44 +5997,45 @@ zhytomyr.ua zp.ua zt.ua -// Private registries in .ua -co.ua -pp.ua - // ug : https://www.registry.co.ug/ +// https://www.registry.co.ug, https://whois.co.ug +// Confirmed by registry 2025-01-20 ug -co.ug -or.ug ac.ug -sc.ug +co.ug +com.ug +edu.ug go.ug +gov.ug +mil.ug ne.ug -com.ug +or.ug org.ug - -// uk : http://en.wikipedia.org/wiki/.uk -// Submitted by registry 2012-10-02 -// and tweaked by us pending further consultation. -*.uk +sc.ug +us.ug + +// uk : https://www.iana.org/domains/root/db/uk.html +// Submitted by registry +uk +ac.uk +co.uk +gov.uk +ltd.uk +me.uk +net.uk +nhs.uk +org.uk +plc.uk +police.uk *.sch.uk -!bl.uk -!british-library.uk -!jet.uk -!mod.uk -!national-library-scotland.uk -!nel.uk -!nic.uk -!nls.uk -!parliament.uk - -// us : http://en.wikipedia.org/wiki/.us + +// us : https://www.iana.org/domains/root/db/us.html +// Confirmed via the .us zone file by William Harrison 2024-12-10 us dni.us -fed.us isa.us -kids.us nsn.us -// us geographic names +// Geographic Names ak.us al.us ar.us @@ -6100,9 +6084,9 @@ sd.us tn.us tx.us ut.us +va.us vi.us vt.us -va.us wa.us wi.us wv.us @@ -6122,11 +6106,10 @@ k12.ca.us k12.co.us k12.ct.us k12.dc.us -k12.de.us k12.fl.us k12.ga.us k12.gu.us -// k12.hi.us Hawaii has a state-wide DOE login: bug 614565 +// k12.hi.us - Bug 614565 - Hawaii has a state-wide DOE login k12.ia.us k12.id.us k12.il.us @@ -6143,7 +6126,7 @@ k12.mo.us k12.ms.us k12.mt.us k12.nc.us -k12.nd.us +// k12.nd.us - Bug 1028347 - Removed at request of Travis Rosso k12.ne.us k12.nh.us k12.nj.us @@ -6155,29 +6138,36 @@ k12.ok.us k12.or.us k12.pa.us k12.pr.us -k12.ri.us +// k12.ri.us - Removed at request of Kim Cournoyer k12.sc.us -k12.sd.us +// k12.sd.us - Bug 934131 - Removed at request of James Booze k12.tn.us k12.tx.us k12.ut.us +k12.va.us k12.vi.us k12.vt.us -k12.va.us k12.wa.us k12.wi.us -k12.wv.us -k12.wy.us - +// k12.wv.us - Bug 947705 - Removed at request of Verne Britton cc.ak.us +lib.ak.us cc.al.us +lib.al.us cc.ar.us +lib.ar.us cc.as.us +lib.as.us cc.az.us +lib.az.us cc.ca.us +lib.ca.us cc.co.us +lib.co.us cc.ct.us +lib.ct.us cc.dc.us +lib.dc.us cc.de.us cc.fl.us cc.ga.us @@ -6217,24 +6207,15 @@ cc.sd.us cc.tn.us cc.tx.us cc.ut.us +cc.va.us cc.vi.us cc.vt.us -cc.va.us cc.wa.us cc.wi.us cc.wv.us cc.wy.us - -lib.ak.us -lib.al.us -lib.ar.us -lib.as.us -lib.az.us -lib.ca.us -lib.co.us -lib.ct.us -lib.dc.us -lib.de.us +k12.wy.us +// lib.de.us - Issue #243 - Moved to Private section at request of Ed Moore lib.fl.us lib.ga.us lib.gu.us @@ -6273,21 +6254,31 @@ lib.sd.us lib.tn.us lib.tx.us lib.ut.us +lib.va.us lib.vi.us lib.vt.us -lib.va.us lib.wa.us lib.wi.us -lib.wv.us +// lib.wv.us - Bug 941670 - Removed at request of Larry W Arnold lib.wy.us - -// k12.ma.us contains school districts in Massachusetts. The 4LDs are -// managed indepedently except for private (PVT), charter (CHTR) and -// parochial (PAROCH) schools. Those are delegated dorectly to the -// 5LD operators. -pvt.k12.ma.us +// k12.ma.us contains school districts in Massachusetts. The 4LDs are +// managed independently except for private (PVT), charter (CHTR) and +// parochial (PAROCH) schools. Those are delegated directly to the +// 5LD operators. chtr.k12.ma.us paroch.k12.ma.us +pvt.k12.ma.us +// Merit Network, Inc. maintains the registry for =~ /(k12|cc|lib).mi.us/ and the following +// see also: https://domreg.merit.edu : domreg@merit.edu +// see also: whois -h whois.domreg.merit.edu help +ann-arbor.mi.us +cog.mi.us +dst.mi.us +eaton.mi.us +gen.mi.us +mus.mi.us +tec.mi.us +washtenaw.mi.us // uy : http://www.nic.org.uy/ uy @@ -6305,40 +6296,50 @@ com.uz net.uz org.uz -// va : http://en.wikipedia.org/wiki/.va +// va : https://www.iana.org/domains/root/db/va.html va -// vc : http://en.wikipedia.org/wiki/.vc -// Submitted by registry 2008-06-13 +// vc : https://www.iana.org/domains/root/db/vc.html +// Submitted by registry vc com.vc -net.vc -org.vc +edu.vc gov.vc mil.vc -edu.vc +net.vc +org.vc // ve : https://registro.nic.ve/ -// Confirmed by registry 2012-10-04 +// Submitted by registry nic@nic.ve and nicve@conatel.gob.ve ve +arts.ve +bib.ve co.ve com.ve e12.ve edu.ve +emprende.ve +firm.ve +gob.ve gov.ve info.ve +int.ve mil.ve net.ve +nom.ve org.ve +rar.ve +rec.ve +store.ve +tec.ve web.ve -// vg : http://en.wikipedia.org/wiki/.vg +// vg : https://www.iana.org/domains/root/db/vg.html +// Confirmed by registry 2025-01-10 vg +edu.vg -// vi : http://www.nic.vi/newdomainform.htm -// http://www.nic.vi/Domain_Rules/body_domain_rules.html indicates some other -// TLDs are "reserved", such as edu.vi and gov.vi, but doesn't actually say they -// are available for registration (which they do not seem to be). +// vi : https://www.iana.org/domains/root/db/vi.html vi co.vi com.vi @@ -6346,322 +6347,6468 @@ k12.vi net.vi org.vi -// vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp +// vn : https://www.vnnic.vn/en/domain/cctld-vn +// https://vnnic.vn/sites/default/files/tailieu/vn.cctld.domains.txt vn +ac.vn +ai.vn +biz.vn com.vn -net.vn -org.vn edu.vn gov.vn -int.vn -ac.vn -biz.vn +health.vn +id.vn info.vn +int.vn +io.vn name.vn +net.vn +org.vn pro.vn -health.vn -// vu : http://en.wikipedia.org/wiki/.vu -// list of 2nd level tlds ? +// vn geographical names +angiang.vn +bacgiang.vn +backan.vn +baclieu.vn +bacninh.vn +baria-vungtau.vn +bentre.vn +binhdinh.vn +binhduong.vn +binhphuoc.vn +binhthuan.vn +camau.vn +cantho.vn +caobang.vn +daklak.vn +daknong.vn +danang.vn +dienbien.vn +dongnai.vn +dongthap.vn +gialai.vn +hagiang.vn +haiduong.vn +haiphong.vn +hanam.vn +hanoi.vn +hatinh.vn +haugiang.vn +hoabinh.vn +hungyen.vn +khanhhoa.vn +kiengiang.vn +kontum.vn +laichau.vn +lamdong.vn +langson.vn +laocai.vn +longan.vn +namdinh.vn +nghean.vn +ninhbinh.vn +ninhthuan.vn +phutho.vn +phuyen.vn +quangbinh.vn +quangnam.vn +quangngai.vn +quangninh.vn +quangtri.vn +soctrang.vn +sonla.vn +tayninh.vn +thaibinh.vn +thainguyen.vn +thanhhoa.vn +thanhphohochiminh.vn +thuathienhue.vn +tiengiang.vn +travinh.vn +tuyenquang.vn +vinhlong.vn +vinhphuc.vn +yenbai.vn + +// vu : https://www.iana.org/domains/root/db/vu.html +// http://www.vunic.vu/ vu +com.vu +edu.vu +net.vu +org.vu -// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +// wf : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf wf -// ws : http://en.wikipedia.org/wiki/.ws +// ws : https://www.iana.org/domains/root/db/ws.html // http://samoanic.ws/index.dhtml ws com.ws +edu.ws +gov.ws net.ws org.ws -gov.ws -edu.ws -// yt : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf +// yt : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf yt // IDN ccTLDs -// Please sort by ISO 3166 ccTLD, then punicode string -// when submitting patches and follow this format: -// ("" ) : -// [optional sponsoring org] -// +// When submitting patches, please maintain a sort by ISO 3166 ccTLD, then +// U-label, and follow this format: +// // A-Label ("", [, variant info]) : +// // [sponsoring org] +// U-Label -// xn--mgbaam7a8h ("Emerat" Arabic) : AE +// xn--mgbaam7a8h ("Emerat", Arabic) : AE // http://nic.ae/english/arabicdomain/rules.jsp امارات -// xn--54b7fta0cc ("Bangla" Bangla) : BD +// xn--y9a3aq ("hye", Armenian) : AM +// ISOC AM (operated by .am Registry) +հայ + +// xn--54b7fta0cc ("Bangla", Bangla) : BD বাংলা -// xn--fiqs8s ("China" Chinese-Han-Simplified <.Zhonggou>) : CN +// xn--90ae ("bg", Bulgarian) : BG +бг + +// xn--mgbcpq6gpa1a ("albahrain", Arabic) : BH +البحرين + +// xn--90ais ("bel", Belarusian/Russian Cyrillic) : BY +// Operated by .by registry +бел + +// xn--fiqs8s ("Zhongguo/China", Chinese, Simplified) : CN // CNNIC -// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +// https://www.cnnic.cn/11/192/index.html 中国 -// xn--fiqz9s ("China" Chinese-Han-Traditional <.Zhonggou>) : CN +// xn--fiqz9s ("Zhongguo/China", Chinese, Traditional) : CN // CNNIC -// http://cnnic.cn/html/Dir/2005/10/11/3218.htm +// https://www.cnnic.com.cn/AU/MediaC/Announcement/201609/t20160905_54470.htm 中國 -// xn--lgbbat1ad8j ("Algeria / Al Jazair" Arabic) : DZ +// xn--lgbbat1ad8j ("Algeria/Al Jazair", Arabic) : DZ الجزائر -// xn--wgbh1c ("Egypt" Arabic .masr) : EG +// xn--wgbh1c ("Egypt/Masr", Arabic) : EG // http://www.dotmasr.eg/ مصر -// xn--node ("ge" Georgian (Mkhedruli)) : GE +// xn--e1a4c ("eu", Cyrillic) : EU +// https://eurid.eu +ею + +// xn--qxa6a ("eu", Greek) : EU +// https://eurid.eu +ευ + +// xn--mgbah1a3hjkrd ("Mauritania", Arabic) : MR +موريتانيا + +// xn--node ("ge", Georgian Mkhedruli) : GE გე -// xn--j6w193g ("Hong Kong" Chinese-Han) : HK -// https://www2.hkirc.hk/register/rules.jsp -香港 +// xn--qxam ("el", Greek) : GR +// Hellenic Ministry of Infrastructure, Transport, and Networks +ελ -// xn--h2brj9c ("Bharat" Devanagari) : IN +// xn--j6w193g ("Hong Kong", Chinese) : HK +// https://www.hkirc.hk +// Submitted by registry +// https://www.hkirc.hk/content.jsp?id=30#!/34 +香港 +個人.香港 +公司.香港 +政府.香港 +教育.香港 +組織.香港 +網絡.香港 + +// xn--2scrj9c ("Bharat", Kannada) : IN // India -भारत +ಭಾರತ -// xn--mgbbh1a71e ("Bharat" Arabic) : IN +// xn--3hcrj9c ("Bharat", Oriya) : IN // India -بھارت +ଭାରତ -// xn--fpcrj9c3d ("Bharat" Telugu) : IN +// xn--45br5cyl ("Bharatam", Assamese) : IN // India -భారత్ +ভাৰত -// xn--gecrj9c ("Bharat" Gujarati) : IN +// xn--h2breg3eve ("Bharatam", Sanskrit) : IN // India -ભારત +भारतम् -// xn--s9brj9c ("Bharat" Gurmukhi) : IN +// xn--h2brj9c8c ("Bharot", Santali) : IN // India -ਭਾਰਤ +भारोत -// xn--45brj9c ("Bharat" Bengali) : IN +// xn--mgbgu82a ("Bharat", Sindhi) : IN // India -ভারত +ڀارت -// xn--xkc2dl3a5ee0h ("India" Tamil) : IN +// xn--rvc1e0am3e ("Bharatam", Malayalam) : IN // India -இந்தியா +ഭാരതം -// xn--mgba3a4f16a ("Iran" Persian) : IR -ایران +// xn--h2brj9c ("Bharat", Devanagari) : IN +// India +भारत -// xn--mgba3a4fra ("Iran" Arabic) : IR -ايران +// xn--mgbbh1a ("Bharat", Kashmiri) : IN +// India +بارت -// xn--mgbayh7gpa ("al-Ordon" Arabic) : JO -// National Information Technology Center (NITC) +// xn--mgbbh1a71e ("Bharat", Arabic) : IN +// India +بھارت + +// xn--fpcrj9c3d ("Bharat", Telugu) : IN +// India +భారత్ + +// xn--gecrj9c ("Bharat", Gujarati) : IN +// India +ભારત + +// xn--s9brj9c ("Bharat", Gurmukhi) : IN +// India +ਭਾਰਤ + +// xn--45brj9c ("Bharat", Bengali) : IN +// India +ভারত + +// xn--xkc2dl3a5ee0h ("India", Tamil) : IN +// India +இந்தியா + +// xn--mgba3a4f16a ("Iran", Persian) : IR +ایران + +// xn--mgba3a4fra ("Iran", Arabic) : IR +ايران + +// xn--mgbtx2b ("Iraq", Arabic) : IQ +// Communications and Media Commission +عراق + +// xn--mgbayh7gpa ("al-Ordon", Arabic) : JO +// National Information Technology Center (NITC) // Royal Scientific Society, Al-Jubeiha الاردن -// xn--3e0b707e ("Republic of Korea" Hangul) : KR -한국 +// xn--3e0b707e ("Republic of Korea", Hangul) : KR +한국 + +// xn--80ao21a ("Kaz", Kazakh) : KZ +қаз + +// xn--q7ce6a ("Lao", Lao) : LA +ລາວ + +// xn--fzc2c9e2c ("Lanka", Sinhalese-Sinhala) : LK +// https://nic.lk +ලංකා + +// xn--xkc2al3hye2a ("Ilangai", Tamil) : LK +// https://nic.lk +இலங்கை + +// xn--mgbc0a9azcg ("Morocco/al-Maghrib", Arabic) : MA +المغرب + +// xn--d1alf ("mkd", Macedonian) : MK +// MARnet +мкд + +// xn--l1acc ("mon", Mongolian) : MN +мон + +// xn--mix891f ("Macao", Chinese, Traditional) : MO +// MONIC / HNET Asia (Registry Operator for .mo) +澳門 + +// xn--mix082f ("Macao", Chinese, Simplified) : MO +澳门 + +// xn--mgbx4cd0ab ("Malaysia", Malay) : MY +مليسيا + +// xn--mgb9awbf ("Oman", Arabic) : OM +عمان + +// xn--mgbai9azgqp6j ("Pakistan", Urdu/Arabic) : PK +پاکستان + +// xn--mgbai9a5eva00b ("Pakistan", Urdu/Arabic, variant) : PK +پاكستان + +// xn--ygbi2ammx ("Falasteen", Arabic) : PS +// The Palestinian National Internet Naming Authority (PNINA) +// http://www.pnina.ps +فلسطين + +// xn--90a3ac ("srb", Cyrillic) : RS +// https://www.rnids.rs/en/domains/national-domains +срб +ак.срб +обр.срб +од.срб +орг.срб +пр.срб +упр.срб + +// xn--p1ai ("rf", Russian-Cyrillic) : RU +// https://cctld.ru/files/pdf/docs/en/rules_ru-rf.pdf +// Submitted by George Georgievsky +рф + +// xn--wgbl6a ("Qatar", Arabic) : QA +// http://www.ict.gov.qa/ +قطر + +// xn--mgberp4a5d4ar ("AlSaudiah", Arabic) : SA +// http://www.nic.net.sa/ +السعودية + +// xn--mgberp4a5d4a87g ("AlSaudiah", Arabic, variant): SA +السعودیة + +// xn--mgbqly7c0a67fbc ("AlSaudiah", Arabic, variant) : SA +السعودیۃ + +// xn--mgbqly7cvafr ("AlSaudiah", Arabic, variant) : SA +السعوديه + +// xn--mgbpl2fh ("sudan", Arabic) : SD +// Operated by .sd registry +سودان + +// xn--yfro4i67o Singapore ("Singapore", Chinese) : SG +新加坡 + +// xn--clchc0ea0b2g2a9gcd ("Singapore", Tamil) : SG +சிங்கப்பூர் + +// xn--ogbpf8fl ("Syria", Arabic) : SY +سورية + +// xn--mgbtf8fl ("Syria", Arabic, variant) : SY +سوريا + +// xn--o3cw4h ("Thai", Thai) : TH +// http://www.thnic.co.th +ไทย +ทหาร.ไทย +ธุรกิจ.ไทย +เน็ต.ไทย +รัฐบาล.ไทย +ศึกษา.ไทย +องค์กร.ไทย + +// xn--pgbs0dh ("Tunisia", Arabic) : TN +// http://nic.tn +تونس + +// xn--kpry57d ("Taiwan", Chinese, Traditional) : TW +// https://twnic.tw/dnservice_catag.php +台灣 + +// xn--kprw13d ("Taiwan", Chinese, Simplified) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台湾 + +// xn--nnx388a ("Taiwan", Chinese, variant) : TW +臺灣 + +// xn--j1amh ("ukr", Cyrillic) : UA +укр + +// xn--mgb2ddes ("AlYemen", Arabic) : YE +اليمن + +// xxx : http://icmregistry.com +xxx + +// ye : http://www.y.net.ye/services/domain_name.htm +ye +com.ye +edu.ye +gov.ye +mil.ye +net.ye +org.ye + +// za : https://www.iana.org/domains/root/db/za.html +ac.za +agric.za +alt.za +co.za +edu.za +gov.za +grondar.za +law.za +mil.za +net.za +ngo.za +nic.za +nis.za +nom.za +org.za +school.za +tm.za +web.za + +// zm : https://zicta.zm/ +// Submitted by registry +zm +ac.zm +biz.zm +co.zm +com.zm +edu.zm +gov.zm +info.zm +mil.zm +net.zm +org.zm +sch.zm + +// zw : https://www.potraz.gov.zw/ +// Confirmed by registry 2017-01-25 +zw +ac.zw +co.zw +gov.zw +mil.zw +org.zw + +// newGTLDs + +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2025-07-09T15:20:49Z +// This list is auto-generated, don't edit it manually. +// aaa : American Automobile Association, Inc. +// https://www.iana.org/domains/root/db/aaa.html +aaa + +// aarp : AARP +// https://www.iana.org/domains/root/db/aarp.html +aarp + +// abb : ABB Ltd +// https://www.iana.org/domains/root/db/abb.html +abb + +// abbott : Abbott Laboratories, Inc. +// https://www.iana.org/domains/root/db/abbott.html +abbott + +// abbvie : AbbVie Inc. +// https://www.iana.org/domains/root/db/abbvie.html +abbvie + +// abc : Disney Enterprises, Inc. +// https://www.iana.org/domains/root/db/abc.html +abc + +// able : Able Inc. +// https://www.iana.org/domains/root/db/able.html +able + +// abogado : Registry Services, LLC +// https://www.iana.org/domains/root/db/abogado.html +abogado + +// abudhabi : Abu Dhabi Systems and Information Centre +// https://www.iana.org/domains/root/db/abudhabi.html +abudhabi + +// academy : Binky Moon, LLC +// https://www.iana.org/domains/root/db/academy.html +academy + +// accenture : Accenture plc +// https://www.iana.org/domains/root/db/accenture.html +accenture + +// accountant : dot Accountant Limited +// https://www.iana.org/domains/root/db/accountant.html +accountant + +// accountants : Binky Moon, LLC +// https://www.iana.org/domains/root/db/accountants.html +accountants + +// aco : ACO Severin Ahlmann GmbH & Co. KG +// https://www.iana.org/domains/root/db/aco.html +aco + +// actor : Dog Beach, LLC +// https://www.iana.org/domains/root/db/actor.html +actor + +// ads : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/ads.html +ads + +// adult : ICM Registry AD LLC +// https://www.iana.org/domains/root/db/adult.html +adult + +// aeg : Aktiebolaget Electrolux +// https://www.iana.org/domains/root/db/aeg.html +aeg + +// aetna : Aetna Life Insurance Company +// https://www.iana.org/domains/root/db/aetna.html +aetna + +// afl : Australian Football League +// https://www.iana.org/domains/root/db/afl.html +afl + +// africa : ZA Central Registry NPC trading as Registry.Africa +// https://www.iana.org/domains/root/db/africa.html +africa + +// agakhan : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/agakhan.html +agakhan + +// agency : Binky Moon, LLC +// https://www.iana.org/domains/root/db/agency.html +agency + +// aig : American International Group, Inc. +// https://www.iana.org/domains/root/db/aig.html +aig + +// airbus : Airbus S.A.S. +// https://www.iana.org/domains/root/db/airbus.html +airbus + +// airforce : Dog Beach, LLC +// https://www.iana.org/domains/root/db/airforce.html +airforce + +// airtel : Bharti Airtel Limited +// https://www.iana.org/domains/root/db/airtel.html +airtel + +// akdn : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/akdn.html +akdn + +// alibaba : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/alibaba.html +alibaba + +// alipay : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/alipay.html +alipay + +// allfinanz : Allfinanz Deutsche Vermögensberatung Aktiengesellschaft +// https://www.iana.org/domains/root/db/allfinanz.html +allfinanz + +// allstate : Allstate Fire and Casualty Insurance Company +// https://www.iana.org/domains/root/db/allstate.html +allstate + +// ally : Ally Financial Inc. +// https://www.iana.org/domains/root/db/ally.html +ally + +// alsace : Region Grand Est +// https://www.iana.org/domains/root/db/alsace.html +alsace + +// alstom : ALSTOM +// https://www.iana.org/domains/root/db/alstom.html +alstom + +// amazon : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/amazon.html +amazon + +// americanexpress : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/americanexpress.html +americanexpress + +// americanfamily : AmFam, Inc. +// https://www.iana.org/domains/root/db/americanfamily.html +americanfamily + +// amex : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/amex.html +amex + +// amfam : AmFam, Inc. +// https://www.iana.org/domains/root/db/amfam.html +amfam + +// amica : Amica Mutual Insurance Company +// https://www.iana.org/domains/root/db/amica.html +amica + +// amsterdam : Gemeente Amsterdam +// https://www.iana.org/domains/root/db/amsterdam.html +amsterdam + +// analytics : Campus IP LLC +// https://www.iana.org/domains/root/db/analytics.html +analytics + +// android : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/android.html +android + +// anquan : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/anquan.html +anquan + +// anz : Australia and New Zealand Banking Group Limited +// https://www.iana.org/domains/root/db/anz.html +anz + +// aol : Yahoo Inc. +// https://www.iana.org/domains/root/db/aol.html +aol + +// apartments : Binky Moon, LLC +// https://www.iana.org/domains/root/db/apartments.html +apartments + +// app : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/app.html +app + +// apple : Apple Inc. +// https://www.iana.org/domains/root/db/apple.html +apple + +// aquarelle : Aquarelle.com +// https://www.iana.org/domains/root/db/aquarelle.html +aquarelle + +// arab : League of Arab States +// https://www.iana.org/domains/root/db/arab.html +arab + +// aramco : Aramco Services Company +// https://www.iana.org/domains/root/db/aramco.html +aramco + +// archi : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/archi.html +archi + +// army : Dog Beach, LLC +// https://www.iana.org/domains/root/db/army.html +army + +// art : UK Creative Ideas Limited +// https://www.iana.org/domains/root/db/art.html +art + +// arte : Association Relative à la Télévision Européenne G.E.I.E. +// https://www.iana.org/domains/root/db/arte.html +arte + +// asda : Asda Stores Limited +// https://www.iana.org/domains/root/db/asda.html +asda + +// associates : Binky Moon, LLC +// https://www.iana.org/domains/root/db/associates.html +associates + +// athleta : The Gap, Inc. +// https://www.iana.org/domains/root/db/athleta.html +athleta + +// attorney : Dog Beach, LLC +// https://www.iana.org/domains/root/db/attorney.html +attorney + +// auction : Dog Beach, LLC +// https://www.iana.org/domains/root/db/auction.html +auction + +// audi : AUDI Aktiengesellschaft +// https://www.iana.org/domains/root/db/audi.html +audi + +// audible : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/audible.html +audible + +// audio : XYZ.COM LLC +// https://www.iana.org/domains/root/db/audio.html +audio + +// auspost : Australian Postal Corporation +// https://www.iana.org/domains/root/db/auspost.html +auspost + +// author : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/author.html +author + +// auto : XYZ.COM LLC +// https://www.iana.org/domains/root/db/auto.html +auto + +// autos : XYZ.COM LLC +// https://www.iana.org/domains/root/db/autos.html +autos + +// aws : AWS Registry LLC +// https://www.iana.org/domains/root/db/aws.html +aws + +// axa : AXA Group Operations SAS +// https://www.iana.org/domains/root/db/axa.html +axa + +// azure : Microsoft Corporation +// https://www.iana.org/domains/root/db/azure.html +azure + +// baby : XYZ.COM LLC +// https://www.iana.org/domains/root/db/baby.html +baby + +// baidu : Baidu, Inc. +// https://www.iana.org/domains/root/db/baidu.html +baidu + +// banamex : Citigroup Inc. +// https://www.iana.org/domains/root/db/banamex.html +banamex + +// band : Dog Beach, LLC +// https://www.iana.org/domains/root/db/band.html +band + +// bank : fTLD Registry Services LLC +// https://www.iana.org/domains/root/db/bank.html +bank + +// bar : Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable +// https://www.iana.org/domains/root/db/bar.html +bar + +// barcelona : Municipi de Barcelona +// https://www.iana.org/domains/root/db/barcelona.html +barcelona + +// barclaycard : Barclays Bank PLC +// https://www.iana.org/domains/root/db/barclaycard.html +barclaycard + +// barclays : Barclays Bank PLC +// https://www.iana.org/domains/root/db/barclays.html +barclays + +// barefoot : Gallo Vineyards, Inc. +// https://www.iana.org/domains/root/db/barefoot.html +barefoot + +// bargains : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bargains.html +bargains + +// baseball : MLB Advanced Media DH, LLC +// https://www.iana.org/domains/root/db/baseball.html +baseball + +// basketball : Fédération Internationale de Basketball (FIBA) +// https://www.iana.org/domains/root/db/basketball.html +basketball + +// bauhaus : Werkhaus GmbH +// https://www.iana.org/domains/root/db/bauhaus.html +bauhaus + +// bayern : Bayern Connect GmbH +// https://www.iana.org/domains/root/db/bayern.html +bayern + +// bbc : British Broadcasting Corporation +// https://www.iana.org/domains/root/db/bbc.html +bbc + +// bbt : BB&T Corporation +// https://www.iana.org/domains/root/db/bbt.html +bbt + +// bbva : BANCO BILBAO VIZCAYA ARGENTARIA, S.A. +// https://www.iana.org/domains/root/db/bbva.html +bbva + +// bcg : The Boston Consulting Group, Inc. +// https://www.iana.org/domains/root/db/bcg.html +bcg + +// bcn : Municipi de Barcelona +// https://www.iana.org/domains/root/db/bcn.html +bcn + +// beats : Beats Electronics, LLC +// https://www.iana.org/domains/root/db/beats.html +beats + +// beauty : XYZ.COM LLC +// https://www.iana.org/domains/root/db/beauty.html +beauty + +// beer : Registry Services, LLC +// https://www.iana.org/domains/root/db/beer.html +beer + +// berlin : dotBERLIN GmbH & Co. KG +// https://www.iana.org/domains/root/db/berlin.html +berlin + +// best : BestTLD Pty Ltd +// https://www.iana.org/domains/root/db/best.html +best + +// bestbuy : BBY Solutions, Inc. +// https://www.iana.org/domains/root/db/bestbuy.html +bestbuy + +// bet : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/bet.html +bet + +// bharti : Bharti Enterprises (Holding) Private Limited +// https://www.iana.org/domains/root/db/bharti.html +bharti + +// bible : American Bible Society +// https://www.iana.org/domains/root/db/bible.html +bible + +// bid : dot Bid Limited +// https://www.iana.org/domains/root/db/bid.html +bid + +// bike : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bike.html +bike + +// bing : Microsoft Corporation +// https://www.iana.org/domains/root/db/bing.html +bing + +// bingo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bingo.html +bingo + +// bio : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/bio.html +bio + +// black : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/black.html +black + +// blackfriday : Registry Services, LLC +// https://www.iana.org/domains/root/db/blackfriday.html +blackfriday + +// blockbuster : Dish DBS Corporation +// https://www.iana.org/domains/root/db/blockbuster.html +blockbuster + +// blog : Knock Knock WHOIS There, LLC +// https://www.iana.org/domains/root/db/blog.html +blog + +// bloomberg : Bloomberg IP Holdings LLC +// https://www.iana.org/domains/root/db/bloomberg.html +bloomberg + +// blue : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/blue.html +blue + +// bms : Bristol-Myers Squibb Company +// https://www.iana.org/domains/root/db/bms.html +bms + +// bmw : Bayerische Motoren Werke Aktiengesellschaft +// https://www.iana.org/domains/root/db/bmw.html +bmw + +// bnpparibas : BNP Paribas +// https://www.iana.org/domains/root/db/bnpparibas.html +bnpparibas + +// boats : XYZ.COM LLC +// https://www.iana.org/domains/root/db/boats.html +boats + +// boehringer : Boehringer Ingelheim International GmbH +// https://www.iana.org/domains/root/db/boehringer.html +boehringer + +// bofa : Bank of America Corporation +// https://www.iana.org/domains/root/db/bofa.html +bofa + +// bom : Núcleo de Informação e Coordenação do Ponto BR - NIC.br +// https://www.iana.org/domains/root/db/bom.html +bom + +// bond : ShortDot SA +// https://www.iana.org/domains/root/db/bond.html +bond + +// boo : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/boo.html +boo + +// book : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/book.html +book + +// booking : Booking.com B.V. +// https://www.iana.org/domains/root/db/booking.html +booking + +// bosch : Robert Bosch GMBH +// https://www.iana.org/domains/root/db/bosch.html +bosch + +// bostik : Bostik SA +// https://www.iana.org/domains/root/db/bostik.html +bostik + +// boston : Registry Services, LLC +// https://www.iana.org/domains/root/db/boston.html +boston + +// bot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/bot.html +bot + +// boutique : Binky Moon, LLC +// https://www.iana.org/domains/root/db/boutique.html +boutique + +// box : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/box.html +box + +// bradesco : Banco Bradesco S.A. +// https://www.iana.org/domains/root/db/bradesco.html +bradesco + +// bridgestone : Bridgestone Corporation +// https://www.iana.org/domains/root/db/bridgestone.html +bridgestone + +// broadway : Celebrate Broadway, Inc. +// https://www.iana.org/domains/root/db/broadway.html +broadway + +// broker : Dog Beach, LLC +// https://www.iana.org/domains/root/db/broker.html +broker + +// brother : Brother Industries, Ltd. +// https://www.iana.org/domains/root/db/brother.html +brother + +// brussels : DNS.be vzw +// https://www.iana.org/domains/root/db/brussels.html +brussels + +// build : Plan Bee LLC +// https://www.iana.org/domains/root/db/build.html +build + +// builders : Binky Moon, LLC +// https://www.iana.org/domains/root/db/builders.html +builders + +// business : Binky Moon, LLC +// https://www.iana.org/domains/root/db/business.html +business + +// buy : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/buy.html +buy + +// buzz : DOTSTRATEGY CO. +// https://www.iana.org/domains/root/db/buzz.html +buzz + +// bzh : Association www.bzh +// https://www.iana.org/domains/root/db/bzh.html +bzh + +// cab : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cab.html +cab + +// cafe : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cafe.html +cafe + +// cal : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/cal.html +cal + +// call : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/call.html +call + +// calvinklein : PVH gTLD Holdings LLC +// https://www.iana.org/domains/root/db/calvinklein.html +calvinklein + +// cam : Cam Connecting SARL +// https://www.iana.org/domains/root/db/cam.html +cam + +// camera : Binky Moon, LLC +// https://www.iana.org/domains/root/db/camera.html +camera + +// camp : Binky Moon, LLC +// https://www.iana.org/domains/root/db/camp.html +camp + +// canon : Canon Inc. +// https://www.iana.org/domains/root/db/canon.html +canon + +// capetown : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/capetown.html +capetown + +// capital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/capital.html +capital + +// capitalone : Capital One Financial Corporation +// https://www.iana.org/domains/root/db/capitalone.html +capitalone + +// car : XYZ.COM LLC +// https://www.iana.org/domains/root/db/car.html +car + +// caravan : Caravan International, Inc. +// https://www.iana.org/domains/root/db/caravan.html +caravan + +// cards : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cards.html +cards + +// care : Binky Moon, LLC +// https://www.iana.org/domains/root/db/care.html +care + +// career : dotCareer LLC +// https://www.iana.org/domains/root/db/career.html +career + +// careers : Binky Moon, LLC +// https://www.iana.org/domains/root/db/careers.html +careers + +// cars : XYZ.COM LLC +// https://www.iana.org/domains/root/db/cars.html +cars + +// casa : Registry Services, LLC +// https://www.iana.org/domains/root/db/casa.html +casa + +// case : Digity, LLC +// https://www.iana.org/domains/root/db/case.html +case + +// cash : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cash.html +cash + +// casino : Binky Moon, LLC +// https://www.iana.org/domains/root/db/casino.html +casino + +// catering : Binky Moon, LLC +// https://www.iana.org/domains/root/db/catering.html +catering + +// catholic : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/catholic.html +catholic + +// cba : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/cba.html +cba + +// cbn : The Christian Broadcasting Network, Inc. +// https://www.iana.org/domains/root/db/cbn.html +cbn + +// cbre : CBRE, Inc. +// https://www.iana.org/domains/root/db/cbre.html +cbre + +// center : Binky Moon, LLC +// https://www.iana.org/domains/root/db/center.html +center + +// ceo : XYZ.COM LLC +// https://www.iana.org/domains/root/db/ceo.html +ceo + +// cern : European Organization for Nuclear Research ("CERN") +// https://www.iana.org/domains/root/db/cern.html +cern + +// cfa : CFA Institute +// https://www.iana.org/domains/root/db/cfa.html +cfa + +// cfd : ShortDot SA +// https://www.iana.org/domains/root/db/cfd.html +cfd + +// chanel : Chanel International B.V. +// https://www.iana.org/domains/root/db/chanel.html +chanel + +// channel : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/channel.html +channel + +// charity : Public Interest Registry +// https://www.iana.org/domains/root/db/charity.html +charity + +// chase : JPMorgan Chase Bank, National Association +// https://www.iana.org/domains/root/db/chase.html +chase + +// chat : Binky Moon, LLC +// https://www.iana.org/domains/root/db/chat.html +chat + +// cheap : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cheap.html +cheap + +// chintai : CHINTAI Corporation +// https://www.iana.org/domains/root/db/chintai.html +chintai + +// christmas : XYZ.COM LLC +// https://www.iana.org/domains/root/db/christmas.html +christmas + +// chrome : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/chrome.html +chrome + +// church : Binky Moon, LLC +// https://www.iana.org/domains/root/db/church.html +church + +// cipriani : Hotel Cipriani Srl +// https://www.iana.org/domains/root/db/cipriani.html +cipriani + +// circle : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/circle.html +circle + +// cisco : Cisco Technology, Inc. +// https://www.iana.org/domains/root/db/cisco.html +cisco + +// citadel : Citadel Domain LLC +// https://www.iana.org/domains/root/db/citadel.html +citadel + +// citi : Citigroup Inc. +// https://www.iana.org/domains/root/db/citi.html +citi + +// citic : CITIC Group Corporation +// https://www.iana.org/domains/root/db/citic.html +citic + +// city : Binky Moon, LLC +// https://www.iana.org/domains/root/db/city.html +city + +// claims : Binky Moon, LLC +// https://www.iana.org/domains/root/db/claims.html +claims + +// cleaning : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cleaning.html +cleaning + +// click : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/click.html +click + +// clinic : Binky Moon, LLC +// https://www.iana.org/domains/root/db/clinic.html +clinic + +// clinique : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/clinique.html +clinique + +// clothing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/clothing.html +clothing + +// cloud : Aruba PEC S.p.A. +// https://www.iana.org/domains/root/db/cloud.html +cloud + +// club : Registry Services, LLC +// https://www.iana.org/domains/root/db/club.html +club + +// clubmed : Club Méditerranée S.A. +// https://www.iana.org/domains/root/db/clubmed.html +clubmed + +// coach : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coach.html +coach + +// codes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/codes.html +codes + +// coffee : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coffee.html +coffee + +// college : XYZ.COM LLC +// https://www.iana.org/domains/root/db/college.html +college + +// cologne : dotKoeln GmbH +// https://www.iana.org/domains/root/db/cologne.html +cologne + +// commbank : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/commbank.html +commbank + +// community : Binky Moon, LLC +// https://www.iana.org/domains/root/db/community.html +community + +// company : Binky Moon, LLC +// https://www.iana.org/domains/root/db/company.html +company + +// compare : Registry Services, LLC +// https://www.iana.org/domains/root/db/compare.html +compare + +// computer : Binky Moon, LLC +// https://www.iana.org/domains/root/db/computer.html +computer + +// comsec : VeriSign, Inc. +// https://www.iana.org/domains/root/db/comsec.html +comsec + +// condos : Binky Moon, LLC +// https://www.iana.org/domains/root/db/condos.html +condos + +// construction : Binky Moon, LLC +// https://www.iana.org/domains/root/db/construction.html +construction + +// consulting : Dog Beach, LLC +// https://www.iana.org/domains/root/db/consulting.html +consulting + +// contact : Dog Beach, LLC +// https://www.iana.org/domains/root/db/contact.html +contact + +// contractors : Binky Moon, LLC +// https://www.iana.org/domains/root/db/contractors.html +contractors + +// cooking : Registry Services, LLC +// https://www.iana.org/domains/root/db/cooking.html +cooking + +// cool : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cool.html +cool + +// corsica : Collectivité de Corse +// https://www.iana.org/domains/root/db/corsica.html +corsica + +// country : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/country.html +country + +// coupon : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/coupon.html +coupon + +// coupons : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coupons.html +coupons + +// courses : Registry Services, LLC +// https://www.iana.org/domains/root/db/courses.html +courses + +// cpa : American Institute of Certified Public Accountants +// https://www.iana.org/domains/root/db/cpa.html +cpa + +// credit : Binky Moon, LLC +// https://www.iana.org/domains/root/db/credit.html +credit + +// creditcard : Binky Moon, LLC +// https://www.iana.org/domains/root/db/creditcard.html +creditcard + +// creditunion : DotCooperation LLC +// https://www.iana.org/domains/root/db/creditunion.html +creditunion + +// cricket : dot Cricket Limited +// https://www.iana.org/domains/root/db/cricket.html +cricket + +// crown : Crown Equipment Corporation +// https://www.iana.org/domains/root/db/crown.html +crown + +// crs : Federated Co-operatives Limited +// https://www.iana.org/domains/root/db/crs.html +crs + +// cruise : Viking River Cruises (Bermuda) Ltd. +// https://www.iana.org/domains/root/db/cruise.html +cruise + +// cruises : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cruises.html +cruises + +// cuisinella : SCHMIDT GROUPE S.A.S. +// https://www.iana.org/domains/root/db/cuisinella.html +cuisinella + +// cymru : Nominet UK +// https://www.iana.org/domains/root/db/cymru.html +cymru + +// cyou : ShortDot SA +// https://www.iana.org/domains/root/db/cyou.html +cyou + +// dad : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dad.html +dad + +// dance : Dog Beach, LLC +// https://www.iana.org/domains/root/db/dance.html +dance + +// data : Dish DBS Corporation +// https://www.iana.org/domains/root/db/data.html +data + +// date : dot Date Limited +// https://www.iana.org/domains/root/db/date.html +date + +// dating : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dating.html +dating + +// datsun : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/datsun.html +datsun + +// day : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/day.html +day + +// dclk : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dclk.html +dclk + +// dds : Registry Services, LLC +// https://www.iana.org/domains/root/db/dds.html +dds + +// deal : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/deal.html +deal + +// dealer : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/dealer.html +dealer + +// deals : Binky Moon, LLC +// https://www.iana.org/domains/root/db/deals.html +deals + +// degree : Dog Beach, LLC +// https://www.iana.org/domains/root/db/degree.html +degree + +// delivery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/delivery.html +delivery + +// dell : Dell Inc. +// https://www.iana.org/domains/root/db/dell.html +dell + +// deloitte : Deloitte Touche Tohmatsu +// https://www.iana.org/domains/root/db/deloitte.html +deloitte + +// delta : Delta Air Lines, Inc. +// https://www.iana.org/domains/root/db/delta.html +delta + +// democrat : Dog Beach, LLC +// https://www.iana.org/domains/root/db/democrat.html +democrat + +// dental : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dental.html +dental + +// dentist : Dog Beach, LLC +// https://www.iana.org/domains/root/db/dentist.html +dentist + +// desi +// https://www.iana.org/domains/root/db/desi.html +desi + +// design : Registry Services, LLC +// https://www.iana.org/domains/root/db/design.html +design + +// dev : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dev.html +dev + +// dhl : Deutsche Post AG +// https://www.iana.org/domains/root/db/dhl.html +dhl + +// diamonds : Binky Moon, LLC +// https://www.iana.org/domains/root/db/diamonds.html +diamonds + +// diet : XYZ.COM LLC +// https://www.iana.org/domains/root/db/diet.html +diet + +// digital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/digital.html +digital + +// direct : Binky Moon, LLC +// https://www.iana.org/domains/root/db/direct.html +direct + +// directory : Binky Moon, LLC +// https://www.iana.org/domains/root/db/directory.html +directory + +// discount : Binky Moon, LLC +// https://www.iana.org/domains/root/db/discount.html +discount + +// discover : Discover Financial Services +// https://www.iana.org/domains/root/db/discover.html +discover + +// dish : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dish.html +dish + +// diy : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/diy.html +diy + +// dnp : Dai Nippon Printing Co., Ltd. +// https://www.iana.org/domains/root/db/dnp.html +dnp + +// docs : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/docs.html +docs + +// doctor : Binky Moon, LLC +// https://www.iana.org/domains/root/db/doctor.html +doctor + +// dog : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dog.html +dog + +// domains : Binky Moon, LLC +// https://www.iana.org/domains/root/db/domains.html +domains + +// dot : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dot.html +dot + +// download : dot Support Limited +// https://www.iana.org/domains/root/db/download.html +download + +// drive : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/drive.html +drive + +// dtv : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dtv.html +dtv + +// dubai : Dubai Smart Government Department +// https://www.iana.org/domains/root/db/dubai.html +dubai + +// dunlop : The Goodyear Tire & Rubber Company +// https://www.iana.org/domains/root/db/dunlop.html +dunlop + +// dupont : DuPont Specialty Products USA, LLC +// https://www.iana.org/domains/root/db/dupont.html +dupont + +// durban : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/durban.html +durban + +// dvag : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/dvag.html +dvag + +// dvr : DISH Technologies L.L.C. +// https://www.iana.org/domains/root/db/dvr.html +dvr + +// earth : Interlink Systems Innovation Institute K.K. +// https://www.iana.org/domains/root/db/earth.html +earth + +// eat : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/eat.html +eat + +// eco : Big Room Inc. +// https://www.iana.org/domains/root/db/eco.html +eco + +// edeka : EDEKA Verband kaufmännischer Genossenschaften e.V. +// https://www.iana.org/domains/root/db/edeka.html +edeka + +// education : Binky Moon, LLC +// https://www.iana.org/domains/root/db/education.html +education + +// email : Binky Moon, LLC +// https://www.iana.org/domains/root/db/email.html +email + +// emerck : Merck KGaA +// https://www.iana.org/domains/root/db/emerck.html +emerck + +// energy : Binky Moon, LLC +// https://www.iana.org/domains/root/db/energy.html +energy + +// engineer : Dog Beach, LLC +// https://www.iana.org/domains/root/db/engineer.html +engineer + +// engineering : Binky Moon, LLC +// https://www.iana.org/domains/root/db/engineering.html +engineering + +// enterprises : Binky Moon, LLC +// https://www.iana.org/domains/root/db/enterprises.html +enterprises + +// epson : Seiko Epson Corporation +// https://www.iana.org/domains/root/db/epson.html +epson + +// equipment : Binky Moon, LLC +// https://www.iana.org/domains/root/db/equipment.html +equipment + +// ericsson : Telefonaktiebolaget L M Ericsson +// https://www.iana.org/domains/root/db/ericsson.html +ericsson + +// erni : ERNI Group Holding AG +// https://www.iana.org/domains/root/db/erni.html +erni + +// esq : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/esq.html +esq + +// estate : Binky Moon, LLC +// https://www.iana.org/domains/root/db/estate.html +estate + +// eurovision : European Broadcasting Union (EBU) +// https://www.iana.org/domains/root/db/eurovision.html +eurovision + +// eus : Puntueus Fundazioa +// https://www.iana.org/domains/root/db/eus.html +eus + +// events : Binky Moon, LLC +// https://www.iana.org/domains/root/db/events.html +events + +// exchange : Binky Moon, LLC +// https://www.iana.org/domains/root/db/exchange.html +exchange + +// expert : Binky Moon, LLC +// https://www.iana.org/domains/root/db/expert.html +expert + +// exposed : Binky Moon, LLC +// https://www.iana.org/domains/root/db/exposed.html +exposed + +// express : Binky Moon, LLC +// https://www.iana.org/domains/root/db/express.html +express + +// extraspace : Extra Space Storage LLC +// https://www.iana.org/domains/root/db/extraspace.html +extraspace + +// fage : Fage International S.A. +// https://www.iana.org/domains/root/db/fage.html +fage + +// fail : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fail.html +fail + +// fairwinds : FairWinds Partners, LLC +// https://www.iana.org/domains/root/db/fairwinds.html +fairwinds + +// faith : dot Faith Limited +// https://www.iana.org/domains/root/db/faith.html +faith + +// family : Dog Beach, LLC +// https://www.iana.org/domains/root/db/family.html +family + +// fan : Dog Beach, LLC +// https://www.iana.org/domains/root/db/fan.html +fan + +// fans : ZDNS International Limited +// https://www.iana.org/domains/root/db/fans.html +fans + +// farm : Binky Moon, LLC +// https://www.iana.org/domains/root/db/farm.html +farm + +// farmers : Farmers Insurance Exchange +// https://www.iana.org/domains/root/db/farmers.html +farmers + +// fashion : Registry Services, LLC +// https://www.iana.org/domains/root/db/fashion.html +fashion + +// fast : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/fast.html +fast + +// fedex : Federal Express Corporation +// https://www.iana.org/domains/root/db/fedex.html +fedex + +// feedback : Top Level Spectrum, Inc. +// https://www.iana.org/domains/root/db/feedback.html +feedback + +// ferrari : Fiat Chrysler Automobiles N.V. +// https://www.iana.org/domains/root/db/ferrari.html +ferrari + +// ferrero : Ferrero Trading Lux S.A. +// https://www.iana.org/domains/root/db/ferrero.html +ferrero + +// fidelity : Fidelity Brokerage Services LLC +// https://www.iana.org/domains/root/db/fidelity.html +fidelity + +// fido : Rogers Communications Canada Inc. +// https://www.iana.org/domains/root/db/fido.html +fido + +// film : Motion Picture Domain Registry Pty Ltd +// https://www.iana.org/domains/root/db/film.html +film + +// final : Núcleo de Informação e Coordenação do Ponto BR - NIC.br +// https://www.iana.org/domains/root/db/final.html +final + +// finance : Binky Moon, LLC +// https://www.iana.org/domains/root/db/finance.html +finance + +// financial : Binky Moon, LLC +// https://www.iana.org/domains/root/db/financial.html +financial + +// fire : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/fire.html +fire + +// firestone : Bridgestone Licensing Services, Inc +// https://www.iana.org/domains/root/db/firestone.html +firestone + +// firmdale : Firmdale Holdings Limited +// https://www.iana.org/domains/root/db/firmdale.html +firmdale + +// fish : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fish.html +fish + +// fishing : Registry Services, LLC +// https://www.iana.org/domains/root/db/fishing.html +fishing + +// fit : Registry Services, LLC +// https://www.iana.org/domains/root/db/fit.html +fit + +// fitness : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fitness.html +fitness + +// flickr : Flickr, Inc. +// https://www.iana.org/domains/root/db/flickr.html +flickr + +// flights : Binky Moon, LLC +// https://www.iana.org/domains/root/db/flights.html +flights + +// flir : FLIR Systems, Inc. +// https://www.iana.org/domains/root/db/flir.html +flir + +// florist : Binky Moon, LLC +// https://www.iana.org/domains/root/db/florist.html +florist + +// flowers : XYZ.COM LLC +// https://www.iana.org/domains/root/db/flowers.html +flowers + +// fly : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/fly.html +fly + +// foo : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/foo.html +foo + +// food : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/food.html +food + +// football : Binky Moon, LLC +// https://www.iana.org/domains/root/db/football.html +football + +// ford : Ford Motor Company +// https://www.iana.org/domains/root/db/ford.html +ford + +// forex : Dog Beach, LLC +// https://www.iana.org/domains/root/db/forex.html +forex + +// forsale : Dog Beach, LLC +// https://www.iana.org/domains/root/db/forsale.html +forsale + +// forum : Waterford Limited +// https://www.iana.org/domains/root/db/forum.html +forum + +// foundation : Public Interest Registry +// https://www.iana.org/domains/root/db/foundation.html +foundation + +// fox : FOX Registry, LLC +// https://www.iana.org/domains/root/db/fox.html +fox + +// free : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/free.html +free + +// fresenius : Fresenius Immobilien-Verwaltungs-GmbH +// https://www.iana.org/domains/root/db/fresenius.html +fresenius + +// frl : FRLregistry B.V. +// https://www.iana.org/domains/root/db/frl.html +frl + +// frogans : OP3FT +// https://www.iana.org/domains/root/db/frogans.html +frogans + +// frontier : Frontier Communications Corporation +// https://www.iana.org/domains/root/db/frontier.html +frontier + +// ftr : Frontier Communications Corporation +// https://www.iana.org/domains/root/db/ftr.html +ftr + +// fujitsu : Fujitsu Limited +// https://www.iana.org/domains/root/db/fujitsu.html +fujitsu + +// fun : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/fun.html +fun + +// fund : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fund.html +fund + +// furniture : Binky Moon, LLC +// https://www.iana.org/domains/root/db/furniture.html +furniture + +// futbol : Dog Beach, LLC +// https://www.iana.org/domains/root/db/futbol.html +futbol + +// fyi : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fyi.html +fyi + +// gal : Asociación puntoGAL +// https://www.iana.org/domains/root/db/gal.html +gal + +// gallery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gallery.html +gallery + +// gallo : Gallo Vineyards, Inc. +// https://www.iana.org/domains/root/db/gallo.html +gallo + +// gallup : Gallup, Inc. +// https://www.iana.org/domains/root/db/gallup.html +gallup + +// game : XYZ.COM LLC +// https://www.iana.org/domains/root/db/game.html +game + +// games : Dog Beach, LLC +// https://www.iana.org/domains/root/db/games.html +games + +// gap : The Gap, Inc. +// https://www.iana.org/domains/root/db/gap.html +gap + +// garden : Registry Services, LLC +// https://www.iana.org/domains/root/db/garden.html +garden + +// gay : Registry Services, LLC +// https://www.iana.org/domains/root/db/gay.html +gay + +// gbiz : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gbiz.html +gbiz + +// gdn : Joint Stock Company "Navigation-information systems" +// https://www.iana.org/domains/root/db/gdn.html +gdn + +// gea : GEA Group Aktiengesellschaft +// https://www.iana.org/domains/root/db/gea.html +gea + +// gent : Easyhost BV +// https://www.iana.org/domains/root/db/gent.html +gent + +// genting : Resorts World Inc Pte. Ltd. +// https://www.iana.org/domains/root/db/genting.html +genting + +// george : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/george.html +george + +// ggee : GMO Internet, Inc. +// https://www.iana.org/domains/root/db/ggee.html +ggee + +// gift : DotGift, LLC +// https://www.iana.org/domains/root/db/gift.html +gift + +// gifts : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gifts.html +gifts + +// gives : Public Interest Registry +// https://www.iana.org/domains/root/db/gives.html +gives + +// giving : Public Interest Registry +// https://www.iana.org/domains/root/db/giving.html +giving + +// glass : Binky Moon, LLC +// https://www.iana.org/domains/root/db/glass.html +glass + +// gle : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gle.html +gle + +// global : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/global.html +global + +// globo : Globo Comunicação e Participações S.A +// https://www.iana.org/domains/root/db/globo.html +globo + +// gmail : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gmail.html +gmail + +// gmbh : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gmbh.html +gmbh + +// gmo : GMO Internet, Inc. +// https://www.iana.org/domains/root/db/gmo.html +gmo + +// gmx : 1&1 Mail & Media GmbH +// https://www.iana.org/domains/root/db/gmx.html +gmx + +// godaddy : Go Daddy East, LLC +// https://www.iana.org/domains/root/db/godaddy.html +godaddy + +// gold : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gold.html +gold + +// goldpoint : YODOBASHI CAMERA CO.,LTD. +// https://www.iana.org/domains/root/db/goldpoint.html +goldpoint + +// golf : Binky Moon, LLC +// https://www.iana.org/domains/root/db/golf.html +golf + +// goo : NTT DOCOMO, INC. +// https://www.iana.org/domains/root/db/goo.html +goo + +// goodyear : The Goodyear Tire & Rubber Company +// https://www.iana.org/domains/root/db/goodyear.html +goodyear + +// goog : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/goog.html +goog + +// google : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/google.html +google + +// gop : Republican State Leadership Committee, Inc. +// https://www.iana.org/domains/root/db/gop.html +gop + +// got : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/got.html +got + +// grainger : Grainger Registry Services, LLC +// https://www.iana.org/domains/root/db/grainger.html +grainger + +// graphics : Binky Moon, LLC +// https://www.iana.org/domains/root/db/graphics.html +graphics + +// gratis : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gratis.html +gratis + +// green : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/green.html +green + +// gripe : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gripe.html +gripe + +// grocery : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/grocery.html +grocery + +// group : Binky Moon, LLC +// https://www.iana.org/domains/root/db/group.html +group + +// gucci : Guccio Gucci S.p.a. +// https://www.iana.org/domains/root/db/gucci.html +gucci + +// guge : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/guge.html +guge + +// guide : Binky Moon, LLC +// https://www.iana.org/domains/root/db/guide.html +guide + +// guitars : XYZ.COM LLC +// https://www.iana.org/domains/root/db/guitars.html +guitars + +// guru : Binky Moon, LLC +// https://www.iana.org/domains/root/db/guru.html +guru + +// hair : XYZ.COM LLC +// https://www.iana.org/domains/root/db/hair.html +hair + +// hamburg : Hamburg Top-Level-Domain GmbH +// https://www.iana.org/domains/root/db/hamburg.html +hamburg + +// hangout : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/hangout.html +hangout + +// haus : Dog Beach, LLC +// https://www.iana.org/domains/root/db/haus.html +haus + +// hbo : HBO Registry Services, Inc. +// https://www.iana.org/domains/root/db/hbo.html +hbo + +// hdfc : HDFC BANK LIMITED +// https://www.iana.org/domains/root/db/hdfc.html +hdfc + +// hdfcbank : HDFC BANK LIMITED +// https://www.iana.org/domains/root/db/hdfcbank.html +hdfcbank + +// health : Registry Services, LLC +// https://www.iana.org/domains/root/db/health.html +health + +// healthcare : Binky Moon, LLC +// https://www.iana.org/domains/root/db/healthcare.html +healthcare + +// help : Innovation service Limited +// https://www.iana.org/domains/root/db/help.html +help + +// helsinki : City of Helsinki +// https://www.iana.org/domains/root/db/helsinki.html +helsinki + +// here : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/here.html +here + +// hermes : HERMES INTERNATIONAL +// https://www.iana.org/domains/root/db/hermes.html +hermes + +// hiphop : Dot Hip Hop, LLC +// https://www.iana.org/domains/root/db/hiphop.html +hiphop + +// hisamitsu : Hisamitsu Pharmaceutical Co.,Inc. +// https://www.iana.org/domains/root/db/hisamitsu.html +hisamitsu + +// hitachi : Hitachi, Ltd. +// https://www.iana.org/domains/root/db/hitachi.html +hitachi + +// hiv : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/hiv.html +hiv + +// hkt : PCCW-HKT DataCom Services Limited +// https://www.iana.org/domains/root/db/hkt.html +hkt + +// hockey : Binky Moon, LLC +// https://www.iana.org/domains/root/db/hockey.html +hockey + +// holdings : Binky Moon, LLC +// https://www.iana.org/domains/root/db/holdings.html +holdings + +// holiday : Binky Moon, LLC +// https://www.iana.org/domains/root/db/holiday.html +holiday + +// homedepot : Home Depot Product Authority, LLC +// https://www.iana.org/domains/root/db/homedepot.html +homedepot + +// homegoods : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/homegoods.html +homegoods + +// homes : XYZ.COM LLC +// https://www.iana.org/domains/root/db/homes.html +homes + +// homesense : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/homesense.html +homesense + +// honda : Honda Motor Co., Ltd. +// https://www.iana.org/domains/root/db/honda.html +honda + +// horse : Registry Services, LLC +// https://www.iana.org/domains/root/db/horse.html +horse + +// hospital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/hospital.html +hospital + +// host : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/host.html +host + +// hosting : XYZ.COM LLC +// https://www.iana.org/domains/root/db/hosting.html +hosting + +// hot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/hot.html +hot + +// hotel : HOTEL Top-Level-Domain S.a.r.l +// https://www.iana.org/domains/root/db/hotel.html +hotel + +// hotels : Booking.com B.V. +// https://www.iana.org/domains/root/db/hotels.html +hotels + +// hotmail : Microsoft Corporation +// https://www.iana.org/domains/root/db/hotmail.html +hotmail + +// house : Binky Moon, LLC +// https://www.iana.org/domains/root/db/house.html +house + +// how : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/how.html +how + +// hsbc : HSBC Global Services (UK) Limited +// https://www.iana.org/domains/root/db/hsbc.html +hsbc + +// hughes : Hughes Satellite Systems Corporation +// https://www.iana.org/domains/root/db/hughes.html +hughes + +// hyatt : Hyatt GTLD, L.L.C. +// https://www.iana.org/domains/root/db/hyatt.html +hyatt + +// hyundai : Hyundai Motor Company +// https://www.iana.org/domains/root/db/hyundai.html +hyundai + +// ibm : International Business Machines Corporation +// https://www.iana.org/domains/root/db/ibm.html +ibm + +// icbc : Industrial and Commercial Bank of China Limited +// https://www.iana.org/domains/root/db/icbc.html +icbc + +// ice : IntercontinentalExchange, Inc. +// https://www.iana.org/domains/root/db/ice.html +ice + +// icu : ShortDot SA +// https://www.iana.org/domains/root/db/icu.html +icu + +// ieee : IEEE Global LLC +// https://www.iana.org/domains/root/db/ieee.html +ieee + +// ifm : ifm electronic gmbh +// https://www.iana.org/domains/root/db/ifm.html +ifm + +// ikano : Ikano S.A. +// https://www.iana.org/domains/root/db/ikano.html +ikano + +// imamat : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/imamat.html +imamat + +// imdb : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/imdb.html +imdb + +// immo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/immo.html +immo + +// immobilien : Dog Beach, LLC +// https://www.iana.org/domains/root/db/immobilien.html +immobilien + +// inc : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/inc.html +inc + +// industries : Binky Moon, LLC +// https://www.iana.org/domains/root/db/industries.html +industries + +// infiniti : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/infiniti.html +infiniti + +// ing : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/ing.html +ing + +// ink : Registry Services, LLC +// https://www.iana.org/domains/root/db/ink.html +ink + +// institute : Binky Moon, LLC +// https://www.iana.org/domains/root/db/institute.html +institute + +// insurance : fTLD Registry Services LLC +// https://www.iana.org/domains/root/db/insurance.html +insurance + +// insure : Binky Moon, LLC +// https://www.iana.org/domains/root/db/insure.html +insure + +// international : Binky Moon, LLC +// https://www.iana.org/domains/root/db/international.html +international + +// intuit : Intuit Administrative Services, Inc. +// https://www.iana.org/domains/root/db/intuit.html +intuit + +// investments : Binky Moon, LLC +// https://www.iana.org/domains/root/db/investments.html +investments + +// ipiranga : Ipiranga Produtos de Petroleo S.A. +// https://www.iana.org/domains/root/db/ipiranga.html +ipiranga + +// irish : Binky Moon, LLC +// https://www.iana.org/domains/root/db/irish.html +irish + +// ismaili : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/ismaili.html +ismaili + +// ist : Istanbul Metropolitan Municipality +// https://www.iana.org/domains/root/db/ist.html +ist + +// istanbul : Istanbul Metropolitan Municipality +// https://www.iana.org/domains/root/db/istanbul.html +istanbul + +// itau : Itau Unibanco Holding S.A. +// https://www.iana.org/domains/root/db/itau.html +itau + +// itv : ITV Services Limited +// https://www.iana.org/domains/root/db/itv.html +itv + +// jaguar : Jaguar Land Rover Ltd +// https://www.iana.org/domains/root/db/jaguar.html +jaguar + +// java : Oracle Corporation +// https://www.iana.org/domains/root/db/java.html +java + +// jcb : JCB Co., Ltd. +// https://www.iana.org/domains/root/db/jcb.html +jcb + +// jeep : FCA US LLC. +// https://www.iana.org/domains/root/db/jeep.html +jeep + +// jetzt : Binky Moon, LLC +// https://www.iana.org/domains/root/db/jetzt.html +jetzt + +// jewelry : Binky Moon, LLC +// https://www.iana.org/domains/root/db/jewelry.html +jewelry + +// jio : Reliance Industries Limited +// https://www.iana.org/domains/root/db/jio.html +jio + +// jll : Jones Lang LaSalle Incorporated +// https://www.iana.org/domains/root/db/jll.html +jll + +// jmp : Matrix IP LLC +// https://www.iana.org/domains/root/db/jmp.html +jmp + +// jnj : Johnson & Johnson Services, Inc. +// https://www.iana.org/domains/root/db/jnj.html +jnj + +// joburg : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/joburg.html +joburg + +// jot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/jot.html +jot + +// joy : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/joy.html +joy + +// jpmorgan : JPMorgan Chase Bank, National Association +// https://www.iana.org/domains/root/db/jpmorgan.html +jpmorgan + +// jprs : Japan Registry Services Co., Ltd. +// https://www.iana.org/domains/root/db/jprs.html +jprs + +// juegos : Dog Beach, LLC +// https://www.iana.org/domains/root/db/juegos.html +juegos + +// juniper : JUNIPER NETWORKS, INC. +// https://www.iana.org/domains/root/db/juniper.html +juniper + +// kaufen : Dog Beach, LLC +// https://www.iana.org/domains/root/db/kaufen.html +kaufen + +// kddi : KDDI CORPORATION +// https://www.iana.org/domains/root/db/kddi.html +kddi + +// kerryhotels : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kerryhotels.html +kerryhotels + +// kerryproperties : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kerryproperties.html +kerryproperties + +// kfh : Kuwait Finance House +// https://www.iana.org/domains/root/db/kfh.html +kfh + +// kia : KIA MOTORS CORPORATION +// https://www.iana.org/domains/root/db/kia.html +kia + +// kids : DotKids Foundation Limited +// https://www.iana.org/domains/root/db/kids.html +kids + +// kim : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/kim.html +kim + +// kindle : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/kindle.html +kindle + +// kitchen : Binky Moon, LLC +// https://www.iana.org/domains/root/db/kitchen.html +kitchen + +// kiwi : DOT KIWI LIMITED +// https://www.iana.org/domains/root/db/kiwi.html +kiwi + +// koeln : dotKoeln GmbH +// https://www.iana.org/domains/root/db/koeln.html +koeln + +// komatsu : Komatsu Ltd. +// https://www.iana.org/domains/root/db/komatsu.html +komatsu + +// kosher : Kosher Marketing Assets LLC +// https://www.iana.org/domains/root/db/kosher.html +kosher + +// kpmg : KPMG International Cooperative (KPMG International Genossenschaft) +// https://www.iana.org/domains/root/db/kpmg.html +kpmg + +// kpn : Koninklijke KPN N.V. +// https://www.iana.org/domains/root/db/kpn.html +kpn + +// krd : KRG Department of Information Technology +// https://www.iana.org/domains/root/db/krd.html +krd + +// kred : KredTLD Pty Ltd +// https://www.iana.org/domains/root/db/kred.html +kred + +// kuokgroup : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kuokgroup.html +kuokgroup + +// kyoto : Academic Institution: Kyoto Jyoho Gakuen +// https://www.iana.org/domains/root/db/kyoto.html +kyoto + +// lacaixa : Fundación Bancaria Caixa d’Estalvis i Pensions de Barcelona, “la Caixa” +// https://www.iana.org/domains/root/db/lacaixa.html +lacaixa + +// lamborghini : Automobili Lamborghini S.p.A. +// https://www.iana.org/domains/root/db/lamborghini.html +lamborghini + +// lamer : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/lamer.html +lamer + +// land : Binky Moon, LLC +// https://www.iana.org/domains/root/db/land.html +land + +// landrover : Jaguar Land Rover Ltd +// https://www.iana.org/domains/root/db/landrover.html +landrover + +// lanxess : LANXESS Corporation +// https://www.iana.org/domains/root/db/lanxess.html +lanxess + +// lasalle : Jones Lang LaSalle Incorporated +// https://www.iana.org/domains/root/db/lasalle.html +lasalle + +// lat : XYZ.COM LLC +// https://www.iana.org/domains/root/db/lat.html +lat + +// latino : Dish DBS Corporation +// https://www.iana.org/domains/root/db/latino.html +latino + +// latrobe : La Trobe University +// https://www.iana.org/domains/root/db/latrobe.html +latrobe + +// law : Registry Services, LLC +// https://www.iana.org/domains/root/db/law.html +law + +// lawyer : Dog Beach, LLC +// https://www.iana.org/domains/root/db/lawyer.html +lawyer + +// lds : IRI Domain Management, LLC +// https://www.iana.org/domains/root/db/lds.html +lds + +// lease : Binky Moon, LLC +// https://www.iana.org/domains/root/db/lease.html +lease + +// leclerc : A.C.D. LEC Association des Centres Distributeurs Edouard Leclerc +// https://www.iana.org/domains/root/db/leclerc.html +leclerc + +// lefrak : LeFrak Organization, Inc. +// https://www.iana.org/domains/root/db/lefrak.html +lefrak + +// legal : Binky Moon, LLC +// https://www.iana.org/domains/root/db/legal.html +legal + +// lego : LEGO Juris A/S +// https://www.iana.org/domains/root/db/lego.html +lego + +// lexus : TOYOTA MOTOR CORPORATION +// https://www.iana.org/domains/root/db/lexus.html +lexus + +// lgbt : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/lgbt.html +lgbt + +// lidl : Schwarz Domains und Services GmbH & Co. KG +// https://www.iana.org/domains/root/db/lidl.html +lidl + +// life : Binky Moon, LLC +// https://www.iana.org/domains/root/db/life.html +life + +// lifeinsurance : American Council of Life Insurers +// https://www.iana.org/domains/root/db/lifeinsurance.html +lifeinsurance + +// lifestyle : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/lifestyle.html +lifestyle + +// lighting : Binky Moon, LLC +// https://www.iana.org/domains/root/db/lighting.html +lighting + +// like : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/like.html +like + +// lilly : Eli Lilly and Company +// https://www.iana.org/domains/root/db/lilly.html +lilly + +// limited : Binky Moon, LLC +// https://www.iana.org/domains/root/db/limited.html +limited + +// limo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/limo.html +limo + +// lincoln : Ford Motor Company +// https://www.iana.org/domains/root/db/lincoln.html +lincoln + +// link : Nova Registry Ltd +// https://www.iana.org/domains/root/db/link.html +link + +// live : Dog Beach, LLC +// https://www.iana.org/domains/root/db/live.html +live + +// living : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/living.html +living + +// llc : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/llc.html +llc + +// llp : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/llp.html +llp + +// loan : dot Loan Limited +// https://www.iana.org/domains/root/db/loan.html +loan + +// loans : Binky Moon, LLC +// https://www.iana.org/domains/root/db/loans.html +loans + +// locker : Orange Domains LLC +// https://www.iana.org/domains/root/db/locker.html +locker + +// locus : Locus Analytics LLC +// https://www.iana.org/domains/root/db/locus.html +locus + +// lol : XYZ.COM LLC +// https://www.iana.org/domains/root/db/lol.html +lol + +// london : Dot London Domains Limited +// https://www.iana.org/domains/root/db/london.html +london + +// lotte : Lotte Holdings Co., Ltd. +// https://www.iana.org/domains/root/db/lotte.html +lotte + +// lotto : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/lotto.html +lotto + +// love : Waterford Limited +// https://www.iana.org/domains/root/db/love.html +love + +// lpl : LPL Holdings, Inc. +// https://www.iana.org/domains/root/db/lpl.html +lpl + +// lplfinancial : LPL Holdings, Inc. +// https://www.iana.org/domains/root/db/lplfinancial.html +lplfinancial + +// ltd : Binky Moon, LLC +// https://www.iana.org/domains/root/db/ltd.html +ltd + +// ltda : InterNetX, Corp +// https://www.iana.org/domains/root/db/ltda.html +ltda + +// lundbeck : H. Lundbeck A/S +// https://www.iana.org/domains/root/db/lundbeck.html +lundbeck + +// luxe : Registry Services, LLC +// https://www.iana.org/domains/root/db/luxe.html +luxe + +// luxury : Luxury Partners, LLC +// https://www.iana.org/domains/root/db/luxury.html +luxury + +// madrid : Comunidad de Madrid +// https://www.iana.org/domains/root/db/madrid.html +madrid + +// maif : Mutuelle Assurance Instituteur France (MAIF) +// https://www.iana.org/domains/root/db/maif.html +maif + +// maison : Binky Moon, LLC +// https://www.iana.org/domains/root/db/maison.html +maison + +// makeup : XYZ.COM LLC +// https://www.iana.org/domains/root/db/makeup.html +makeup + +// man : MAN Truck & Bus SE +// https://www.iana.org/domains/root/db/man.html +man + +// management : Binky Moon, LLC +// https://www.iana.org/domains/root/db/management.html +management + +// mango : PUNTO FA S.L. +// https://www.iana.org/domains/root/db/mango.html +mango + +// map : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/map.html +map + +// market : Dog Beach, LLC +// https://www.iana.org/domains/root/db/market.html +market + +// marketing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/marketing.html +marketing + +// markets : Dog Beach, LLC +// https://www.iana.org/domains/root/db/markets.html +markets + +// marriott : Marriott Worldwide Corporation +// https://www.iana.org/domains/root/db/marriott.html +marriott + +// marshalls : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/marshalls.html +marshalls + +// mattel : Mattel IT Services, Inc. +// https://www.iana.org/domains/root/db/mattel.html +mattel + +// mba : Binky Moon, LLC +// https://www.iana.org/domains/root/db/mba.html +mba + +// mckinsey : McKinsey Holdings, Inc. +// https://www.iana.org/domains/root/db/mckinsey.html +mckinsey + +// med : Medistry LLC +// https://www.iana.org/domains/root/db/med.html +med + +// media : Binky Moon, LLC +// https://www.iana.org/domains/root/db/media.html +media + +// meet : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/meet.html +meet + +// melbourne : The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation +// https://www.iana.org/domains/root/db/melbourne.html +melbourne + +// meme : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/meme.html +meme + +// memorial : Dog Beach, LLC +// https://www.iana.org/domains/root/db/memorial.html +memorial + +// men : Exclusive Registry Limited +// https://www.iana.org/domains/root/db/men.html +men + +// menu : Dot Menu Registry, LLC +// https://www.iana.org/domains/root/db/menu.html +menu + +// merck : Merck Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/merck.html +merck + +// merckmsd : MSD Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/merckmsd.html +merckmsd + +// miami : Registry Services, LLC +// https://www.iana.org/domains/root/db/miami.html +miami + +// microsoft : Microsoft Corporation +// https://www.iana.org/domains/root/db/microsoft.html +microsoft + +// mini : Bayerische Motoren Werke Aktiengesellschaft +// https://www.iana.org/domains/root/db/mini.html +mini + +// mint : Intuit Administrative Services, Inc. +// https://www.iana.org/domains/root/db/mint.html +mint + +// mit : Massachusetts Institute of Technology +// https://www.iana.org/domains/root/db/mit.html +mit + +// mitsubishi : Mitsubishi Corporation +// https://www.iana.org/domains/root/db/mitsubishi.html +mitsubishi + +// mlb : MLB Advanced Media DH, LLC +// https://www.iana.org/domains/root/db/mlb.html +mlb + +// mls : The Canadian Real Estate Association +// https://www.iana.org/domains/root/db/mls.html +mls + +// mma : MMA IARD +// https://www.iana.org/domains/root/db/mma.html +mma + +// mobile : Dish DBS Corporation +// https://www.iana.org/domains/root/db/mobile.html +mobile + +// moda : Dog Beach, LLC +// https://www.iana.org/domains/root/db/moda.html +moda + +// moe : Interlink Systems Innovation Institute K.K. +// https://www.iana.org/domains/root/db/moe.html +moe + +// moi : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/moi.html +moi + +// mom : XYZ.COM LLC +// https://www.iana.org/domains/root/db/mom.html +mom + +// monash : Monash University +// https://www.iana.org/domains/root/db/monash.html +monash + +// money : Binky Moon, LLC +// https://www.iana.org/domains/root/db/money.html +money + +// monster : XYZ.COM LLC +// https://www.iana.org/domains/root/db/monster.html +monster + +// mormon : IRI Domain Management, LLC +// https://www.iana.org/domains/root/db/mormon.html +mormon + +// mortgage : Dog Beach, LLC +// https://www.iana.org/domains/root/db/mortgage.html +mortgage + +// moscow : Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID) +// https://www.iana.org/domains/root/db/moscow.html +moscow + +// moto : Motorola Trademark Holdings, LLC +// https://www.iana.org/domains/root/db/moto.html +moto + +// motorcycles : XYZ.COM LLC +// https://www.iana.org/domains/root/db/motorcycles.html +motorcycles + +// mov : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/mov.html +mov + +// movie : Binky Moon, LLC +// https://www.iana.org/domains/root/db/movie.html +movie + +// msd : MSD Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/msd.html +msd + +// mtn : MTN Dubai Limited +// https://www.iana.org/domains/root/db/mtn.html +mtn + +// mtr : MTR Corporation Limited +// https://www.iana.org/domains/root/db/mtr.html +mtr + +// music : DotMusic Limited +// https://www.iana.org/domains/root/db/music.html +music + +// nab : National Australia Bank Limited +// https://www.iana.org/domains/root/db/nab.html +nab + +// nagoya : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/nagoya.html +nagoya + +// navy : Dog Beach, LLC +// https://www.iana.org/domains/root/db/navy.html +navy + +// nba : NBA REGISTRY, LLC +// https://www.iana.org/domains/root/db/nba.html +nba + +// nec : NEC Corporation +// https://www.iana.org/domains/root/db/nec.html +nec + +// netbank : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/netbank.html +netbank + +// netflix : Netflix, Inc. +// https://www.iana.org/domains/root/db/netflix.html +netflix + +// network : Binky Moon, LLC +// https://www.iana.org/domains/root/db/network.html +network + +// neustar : NeuStar, Inc. +// https://www.iana.org/domains/root/db/neustar.html +neustar + +// new : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/new.html +new + +// news : Dog Beach, LLC +// https://www.iana.org/domains/root/db/news.html +news + +// next : Next plc +// https://www.iana.org/domains/root/db/next.html +next + +// nextdirect : Next plc +// https://www.iana.org/domains/root/db/nextdirect.html +nextdirect + +// nexus : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/nexus.html +nexus + +// nfl : NFL Reg Ops LLC +// https://www.iana.org/domains/root/db/nfl.html +nfl + +// ngo : Public Interest Registry +// https://www.iana.org/domains/root/db/ngo.html +ngo + +// nhk : Japan Broadcasting Corporation (NHK) +// https://www.iana.org/domains/root/db/nhk.html +nhk + +// nico : DWANGO Co., Ltd. +// https://www.iana.org/domains/root/db/nico.html +nico + +// nike : NIKE, Inc. +// https://www.iana.org/domains/root/db/nike.html +nike + +// nikon : NIKON CORPORATION +// https://www.iana.org/domains/root/db/nikon.html +nikon + +// ninja : Dog Beach, LLC +// https://www.iana.org/domains/root/db/ninja.html +ninja + +// nissan : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/nissan.html +nissan + +// nissay : Nippon Life Insurance Company +// https://www.iana.org/domains/root/db/nissay.html +nissay + +// nokia : Nokia Corporation +// https://www.iana.org/domains/root/db/nokia.html +nokia + +// norton : Gen Digital Inc. +// https://www.iana.org/domains/root/db/norton.html +norton + +// now : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/now.html +now + +// nowruz +// https://www.iana.org/domains/root/db/nowruz.html +nowruz + +// nowtv : Starbucks (HK) Limited +// https://www.iana.org/domains/root/db/nowtv.html +nowtv + +// nra : NRA Holdings Company, INC. +// https://www.iana.org/domains/root/db/nra.html +nra + +// nrw : Minds + Machines GmbH +// https://www.iana.org/domains/root/db/nrw.html +nrw + +// ntt : NIPPON TELEGRAPH AND TELEPHONE CORPORATION +// https://www.iana.org/domains/root/db/ntt.html +ntt + +// nyc : The City of New York by and through the New York City Department of Information Technology & Telecommunications +// https://www.iana.org/domains/root/db/nyc.html +nyc + +// obi : OBI Group Holding SE & Co. KGaA +// https://www.iana.org/domains/root/db/obi.html +obi + +// observer : Fegistry, LLC +// https://www.iana.org/domains/root/db/observer.html +observer + +// office : Microsoft Corporation +// https://www.iana.org/domains/root/db/office.html +office + +// okinawa : BRregistry, Inc. +// https://www.iana.org/domains/root/db/okinawa.html +okinawa + +// olayan : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/olayan.html +olayan + +// olayangroup : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/olayangroup.html +olayangroup + +// ollo : Dish DBS Corporation +// https://www.iana.org/domains/root/db/ollo.html +ollo + +// omega : The Swatch Group Ltd +// https://www.iana.org/domains/root/db/omega.html +omega + +// one : One.com A/S +// https://www.iana.org/domains/root/db/one.html +one + +// ong : Public Interest Registry +// https://www.iana.org/domains/root/db/ong.html +ong + +// onl : iRegistry GmbH +// https://www.iana.org/domains/root/db/onl.html +onl + +// online : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/online.html +online + +// ooo : INFIBEAM AVENUES LIMITED +// https://www.iana.org/domains/root/db/ooo.html +ooo + +// open : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/open.html +open + +// oracle : Oracle Corporation +// https://www.iana.org/domains/root/db/oracle.html +oracle + +// orange : Orange Brand Services Limited +// https://www.iana.org/domains/root/db/orange.html +orange + +// organic : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/organic.html +organic + +// origins : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/origins.html +origins + +// osaka : Osaka Registry Co., Ltd. +// https://www.iana.org/domains/root/db/osaka.html +osaka + +// otsuka : Otsuka Holdings Co., Ltd. +// https://www.iana.org/domains/root/db/otsuka.html +otsuka + +// ott : Dish DBS Corporation +// https://www.iana.org/domains/root/db/ott.html +ott + +// ovh : MédiaBC +// https://www.iana.org/domains/root/db/ovh.html +ovh + +// page : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/page.html +page + +// panasonic : Panasonic Holdings Corporation +// https://www.iana.org/domains/root/db/panasonic.html +panasonic + +// paris : City of Paris +// https://www.iana.org/domains/root/db/paris.html +paris + +// pars +// https://www.iana.org/domains/root/db/pars.html +pars + +// partners : Binky Moon, LLC +// https://www.iana.org/domains/root/db/partners.html +partners + +// parts : Binky Moon, LLC +// https://www.iana.org/domains/root/db/parts.html +parts + +// party : Blue Sky Registry Limited +// https://www.iana.org/domains/root/db/party.html +party + +// pay : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/pay.html +pay + +// pccw : PCCW Enterprises Limited +// https://www.iana.org/domains/root/db/pccw.html +pccw + +// pet : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/pet.html +pet + +// pfizer : Pfizer Inc. +// https://www.iana.org/domains/root/db/pfizer.html +pfizer + +// pharmacy : National Association of Boards of Pharmacy +// https://www.iana.org/domains/root/db/pharmacy.html +pharmacy + +// phd : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/phd.html +phd + +// philips : Koninklijke Philips N.V. +// https://www.iana.org/domains/root/db/philips.html +philips + +// phone : Dish DBS Corporation +// https://www.iana.org/domains/root/db/phone.html +phone + +// photo : Registry Services, LLC +// https://www.iana.org/domains/root/db/photo.html +photo + +// photography : Binky Moon, LLC +// https://www.iana.org/domains/root/db/photography.html +photography + +// photos : Binky Moon, LLC +// https://www.iana.org/domains/root/db/photos.html +photos + +// physio : PhysBiz Pty Ltd +// https://www.iana.org/domains/root/db/physio.html +physio + +// pics : XYZ.COM LLC +// https://www.iana.org/domains/root/db/pics.html +pics + +// pictet : Banque Pictet & Cie SA +// https://www.iana.org/domains/root/db/pictet.html +pictet + +// pictures : Binky Moon, LLC +// https://www.iana.org/domains/root/db/pictures.html +pictures + +// pid : Top Level Spectrum, Inc. +// https://www.iana.org/domains/root/db/pid.html +pid + +// pin : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/pin.html +pin + +// ping : Ping Registry Provider, Inc. +// https://www.iana.org/domains/root/db/ping.html +ping + +// pink : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/pink.html +pink + +// pioneer : Pioneer Corporation +// https://www.iana.org/domains/root/db/pioneer.html +pioneer + +// pizza : Binky Moon, LLC +// https://www.iana.org/domains/root/db/pizza.html +pizza + +// place : Binky Moon, LLC +// https://www.iana.org/domains/root/db/place.html +place + +// play : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/play.html +play + +// playstation : Sony Interactive Entertainment Inc. +// https://www.iana.org/domains/root/db/playstation.html +playstation + +// plumbing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/plumbing.html +plumbing + +// plus : Binky Moon, LLC +// https://www.iana.org/domains/root/db/plus.html +plus + +// pnc : PNC Domain Co., LLC +// https://www.iana.org/domains/root/db/pnc.html +pnc + +// pohl : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/pohl.html +pohl + +// poker : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/poker.html +poker + +// politie : Politie Nederland +// https://www.iana.org/domains/root/db/politie.html +politie + +// porn : ICM Registry PN LLC +// https://www.iana.org/domains/root/db/porn.html +porn + +// praxi : Praxi S.p.A. +// https://www.iana.org/domains/root/db/praxi.html +praxi + +// press : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/press.html +press + +// prime : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/prime.html +prime + +// prod : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/prod.html +prod + +// productions : Binky Moon, LLC +// https://www.iana.org/domains/root/db/productions.html +productions + +// prof : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/prof.html +prof + +// progressive : Progressive Casualty Insurance Company +// https://www.iana.org/domains/root/db/progressive.html +progressive + +// promo : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/promo.html +promo + +// properties : Binky Moon, LLC +// https://www.iana.org/domains/root/db/properties.html +properties + +// property : Digital Property Infrastructure Limited +// https://www.iana.org/domains/root/db/property.html +property + +// protection : XYZ.COM LLC +// https://www.iana.org/domains/root/db/protection.html +protection + +// pru : Prudential Financial, Inc. +// https://www.iana.org/domains/root/db/pru.html +pru + +// prudential : Prudential Financial, Inc. +// https://www.iana.org/domains/root/db/prudential.html +prudential + +// pub : Dog Beach, LLC +// https://www.iana.org/domains/root/db/pub.html +pub + +// pwc : PricewaterhouseCoopers LLP +// https://www.iana.org/domains/root/db/pwc.html +pwc + +// qpon : dotQPON LLC +// https://www.iana.org/domains/root/db/qpon.html +qpon + +// quebec : PointQuébec Inc +// https://www.iana.org/domains/root/db/quebec.html +quebec + +// quest : XYZ.COM LLC +// https://www.iana.org/domains/root/db/quest.html +quest + +// racing : Premier Registry Limited +// https://www.iana.org/domains/root/db/racing.html +racing + +// radio : European Broadcasting Union (EBU) +// https://www.iana.org/domains/root/db/radio.html +radio + +// read : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/read.html +read + +// realestate : dotRealEstate LLC +// https://www.iana.org/domains/root/db/realestate.html +realestate + +// realtor : Real Estate Domains LLC +// https://www.iana.org/domains/root/db/realtor.html +realtor + +// realty : Waterford Limited +// https://www.iana.org/domains/root/db/realty.html +realty + +// recipes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/recipes.html +recipes + +// red : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/red.html +red + +// redstone : Redstone Haute Couture Co., Ltd. +// https://www.iana.org/domains/root/db/redstone.html +redstone + +// redumbrella : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/redumbrella.html +redumbrella + +// rehab : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rehab.html +rehab + +// reise : Binky Moon, LLC +// https://www.iana.org/domains/root/db/reise.html +reise + +// reisen : Binky Moon, LLC +// https://www.iana.org/domains/root/db/reisen.html +reisen + +// reit : National Association of Real Estate Investment Trusts, Inc. +// https://www.iana.org/domains/root/db/reit.html +reit + +// reliance : Reliance Industries Limited +// https://www.iana.org/domains/root/db/reliance.html +reliance + +// ren : ZDNS International Limited +// https://www.iana.org/domains/root/db/ren.html +ren + +// rent : XYZ.COM LLC +// https://www.iana.org/domains/root/db/rent.html +rent + +// rentals : Binky Moon, LLC +// https://www.iana.org/domains/root/db/rentals.html +rentals + +// repair : Binky Moon, LLC +// https://www.iana.org/domains/root/db/repair.html +repair + +// report : Binky Moon, LLC +// https://www.iana.org/domains/root/db/report.html +report + +// republican : Dog Beach, LLC +// https://www.iana.org/domains/root/db/republican.html +republican + +// rest : Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable +// https://www.iana.org/domains/root/db/rest.html +rest + +// restaurant : Binky Moon, LLC +// https://www.iana.org/domains/root/db/restaurant.html +restaurant + +// review : dot Review Limited +// https://www.iana.org/domains/root/db/review.html +review + +// reviews : Dog Beach, LLC +// https://www.iana.org/domains/root/db/reviews.html +reviews + +// rexroth : Robert Bosch GMBH +// https://www.iana.org/domains/root/db/rexroth.html +rexroth + +// rich : iRegistry GmbH +// https://www.iana.org/domains/root/db/rich.html +rich + +// richardli : Pacific Century Asset Management (HK) Limited +// https://www.iana.org/domains/root/db/richardli.html +richardli + +// ricoh : Ricoh Company, Ltd. +// https://www.iana.org/domains/root/db/ricoh.html +ricoh + +// ril : Reliance Industries Limited +// https://www.iana.org/domains/root/db/ril.html +ril + +// rio : Empresa Municipal de Informática SA - IPLANRIO +// https://www.iana.org/domains/root/db/rio.html +rio + +// rip : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rip.html +rip + +// rocks : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rocks.html +rocks + +// rodeo : Registry Services, LLC +// https://www.iana.org/domains/root/db/rodeo.html +rodeo + +// rogers : Rogers Communications Canada Inc. +// https://www.iana.org/domains/root/db/rogers.html +rogers + +// room : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/room.html +room + +// rsvp : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/rsvp.html +rsvp + +// rugby : World Rugby Strategic Developments Limited +// https://www.iana.org/domains/root/db/rugby.html +rugby + +// ruhr : dotSaarland GmbH +// https://www.iana.org/domains/root/db/ruhr.html +ruhr + +// run : Binky Moon, LLC +// https://www.iana.org/domains/root/db/run.html +run + +// rwe : RWE AG +// https://www.iana.org/domains/root/db/rwe.html +rwe + +// ryukyu : BRregistry, Inc. +// https://www.iana.org/domains/root/db/ryukyu.html +ryukyu + +// saarland : dotSaarland GmbH +// https://www.iana.org/domains/root/db/saarland.html +saarland + +// safe : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/safe.html +safe + +// safety : Safety Registry Services, LLC. +// https://www.iana.org/domains/root/db/safety.html +safety + +// sakura : SAKURA Internet Inc. +// https://www.iana.org/domains/root/db/sakura.html +sakura + +// sale : Dog Beach, LLC +// https://www.iana.org/domains/root/db/sale.html +sale + +// salon : Binky Moon, LLC +// https://www.iana.org/domains/root/db/salon.html +salon + +// samsclub : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/samsclub.html +samsclub + +// samsung : SAMSUNG SDS CO., LTD +// https://www.iana.org/domains/root/db/samsung.html +samsung + +// sandvik : Sandvik AB +// https://www.iana.org/domains/root/db/sandvik.html +sandvik + +// sandvikcoromant : Sandvik AB +// https://www.iana.org/domains/root/db/sandvikcoromant.html +sandvikcoromant + +// sanofi : Sanofi +// https://www.iana.org/domains/root/db/sanofi.html +sanofi + +// sap : SAP AG +// https://www.iana.org/domains/root/db/sap.html +sap + +// sarl : Binky Moon, LLC +// https://www.iana.org/domains/root/db/sarl.html +sarl + +// sas : Research IP LLC +// https://www.iana.org/domains/root/db/sas.html +sas + +// save : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/save.html +save + +// saxo : Saxo Bank A/S +// https://www.iana.org/domains/root/db/saxo.html +saxo + +// sbi : STATE BANK OF INDIA +// https://www.iana.org/domains/root/db/sbi.html +sbi + +// sbs : ShortDot SA +// https://www.iana.org/domains/root/db/sbs.html +sbs + +// scb : The Siam Commercial Bank Public Company Limited ("SCB") +// https://www.iana.org/domains/root/db/scb.html +scb + +// schaeffler : Schaeffler Technologies AG & Co. KG +// https://www.iana.org/domains/root/db/schaeffler.html +schaeffler + +// schmidt : SCHMIDT GROUPE S.A.S. +// https://www.iana.org/domains/root/db/schmidt.html +schmidt + +// scholarships : Scholarships.com, LLC +// https://www.iana.org/domains/root/db/scholarships.html +scholarships + +// school : Binky Moon, LLC +// https://www.iana.org/domains/root/db/school.html +school + +// schule : Binky Moon, LLC +// https://www.iana.org/domains/root/db/schule.html +schule + +// schwarz : Schwarz Domains und Services GmbH & Co. KG +// https://www.iana.org/domains/root/db/schwarz.html +schwarz + +// science : dot Science Limited +// https://www.iana.org/domains/root/db/science.html +science + +// scot : Dot Scot Registry Limited +// https://www.iana.org/domains/root/db/scot.html +scot + +// search : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/search.html +search + +// seat : SEAT, S.A. (Sociedad Unipersonal) +// https://www.iana.org/domains/root/db/seat.html +seat + +// secure : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/secure.html +secure + +// security : XYZ.COM LLC +// https://www.iana.org/domains/root/db/security.html +security + +// seek : Seek Limited +// https://www.iana.org/domains/root/db/seek.html +seek + +// select : Registry Services, LLC +// https://www.iana.org/domains/root/db/select.html +select + +// sener : Sener Ingeniería y Sistemas, S.A. +// https://www.iana.org/domains/root/db/sener.html +sener + +// services : Binky Moon, LLC +// https://www.iana.org/domains/root/db/services.html +services + +// seven : Seven West Media Ltd +// https://www.iana.org/domains/root/db/seven.html +seven + +// sew : SEW-EURODRIVE GmbH & Co KG +// https://www.iana.org/domains/root/db/sew.html +sew + +// sex : ICM Registry SX LLC +// https://www.iana.org/domains/root/db/sex.html +sex + +// sexy : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/sexy.html +sexy + +// sfr : Societe Francaise du Radiotelephone - SFR +// https://www.iana.org/domains/root/db/sfr.html +sfr + +// shangrila : Shangri‐La International Hotel Management Limited +// https://www.iana.org/domains/root/db/shangrila.html +shangrila + +// sharp : Sharp Corporation +// https://www.iana.org/domains/root/db/sharp.html +sharp + +// shell : Shell Information Technology International Inc +// https://www.iana.org/domains/root/db/shell.html +shell + +// shia +// https://www.iana.org/domains/root/db/shia.html +shia + +// shiksha : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/shiksha.html +shiksha + +// shoes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/shoes.html +shoes + +// shop : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/shop.html +shop + +// shopping : Binky Moon, LLC +// https://www.iana.org/domains/root/db/shopping.html +shopping + +// shouji : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/shouji.html +shouji + +// show : Binky Moon, LLC +// https://www.iana.org/domains/root/db/show.html +show + +// silk : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/silk.html +silk + +// sina : Sina Corporation +// https://www.iana.org/domains/root/db/sina.html +sina + +// singles : Binky Moon, LLC +// https://www.iana.org/domains/root/db/singles.html +singles + +// site : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/site.html +site + +// ski : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/ski.html +ski + +// skin : XYZ.COM LLC +// https://www.iana.org/domains/root/db/skin.html +skin + +// sky : Sky UK Limited +// https://www.iana.org/domains/root/db/sky.html +sky + +// skype : Microsoft Corporation +// https://www.iana.org/domains/root/db/skype.html +skype + +// sling : DISH Technologies L.L.C. +// https://www.iana.org/domains/root/db/sling.html +sling + +// smart : Smart Communications, Inc. (SMART) +// https://www.iana.org/domains/root/db/smart.html +smart + +// smile : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/smile.html +smile + +// sncf : Société Nationale SNCF +// https://www.iana.org/domains/root/db/sncf.html +sncf + +// soccer : Binky Moon, LLC +// https://www.iana.org/domains/root/db/soccer.html +soccer + +// social : Dog Beach, LLC +// https://www.iana.org/domains/root/db/social.html +social + +// softbank : SoftBank Group Corp. +// https://www.iana.org/domains/root/db/softbank.html +softbank + +// software : Dog Beach, LLC +// https://www.iana.org/domains/root/db/software.html +software + +// sohu : Sohu.com Limited +// https://www.iana.org/domains/root/db/sohu.html +sohu + +// solar : Binky Moon, LLC +// https://www.iana.org/domains/root/db/solar.html +solar + +// solutions : Binky Moon, LLC +// https://www.iana.org/domains/root/db/solutions.html +solutions + +// song : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/song.html +song + +// sony : Sony Corporation +// https://www.iana.org/domains/root/db/sony.html +sony + +// soy : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/soy.html +soy + +// spa : Asia Spa and Wellness Promotion Council Limited +// https://www.iana.org/domains/root/db/spa.html +spa + +// space : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/space.html +space + +// sport : SportAccord +// https://www.iana.org/domains/root/db/sport.html +sport + +// spot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/spot.html +spot + +// srl : InterNetX, Corp +// https://www.iana.org/domains/root/db/srl.html +srl + +// stada : STADA Arzneimittel AG +// https://www.iana.org/domains/root/db/stada.html +stada + +// staples : Staples, Inc. +// https://www.iana.org/domains/root/db/staples.html +staples + +// star : Star India Private Limited +// https://www.iana.org/domains/root/db/star.html +star + +// statebank : STATE BANK OF INDIA +// https://www.iana.org/domains/root/db/statebank.html +statebank + +// statefarm : State Farm Mutual Automobile Insurance Company +// https://www.iana.org/domains/root/db/statefarm.html +statefarm + +// stc : Saudi Telecom Company +// https://www.iana.org/domains/root/db/stc.html +stc + +// stcgroup : Saudi Telecom Company +// https://www.iana.org/domains/root/db/stcgroup.html +stcgroup + +// stockholm : Stockholms kommun +// https://www.iana.org/domains/root/db/stockholm.html +stockholm + +// storage : XYZ.COM LLC +// https://www.iana.org/domains/root/db/storage.html +storage + +// store : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/store.html +store + +// stream : dot Stream Limited +// https://www.iana.org/domains/root/db/stream.html +stream + +// studio : Dog Beach, LLC +// https://www.iana.org/domains/root/db/studio.html +studio + +// study : Registry Services, LLC +// https://www.iana.org/domains/root/db/study.html +study + +// style : Binky Moon, LLC +// https://www.iana.org/domains/root/db/style.html +style + +// sucks : Vox Populi Registry Ltd. +// https://www.iana.org/domains/root/db/sucks.html +sucks + +// supplies : Binky Moon, LLC +// https://www.iana.org/domains/root/db/supplies.html +supplies + +// supply : Binky Moon, LLC +// https://www.iana.org/domains/root/db/supply.html +supply + +// support : Binky Moon, LLC +// https://www.iana.org/domains/root/db/support.html +support + +// surf : Registry Services, LLC +// https://www.iana.org/domains/root/db/surf.html +surf + +// surgery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/surgery.html +surgery + +// suzuki : SUZUKI MOTOR CORPORATION +// https://www.iana.org/domains/root/db/suzuki.html +suzuki + +// swatch : The Swatch Group Ltd +// https://www.iana.org/domains/root/db/swatch.html +swatch + +// swiss : Swiss Confederation +// https://www.iana.org/domains/root/db/swiss.html +swiss + +// sydney : State of New South Wales, Department of Premier and Cabinet +// https://www.iana.org/domains/root/db/sydney.html +sydney + +// systems : Binky Moon, LLC +// https://www.iana.org/domains/root/db/systems.html +systems + +// tab : Tabcorp Holdings Limited +// https://www.iana.org/domains/root/db/tab.html +tab + +// taipei : Taipei City Government +// https://www.iana.org/domains/root/db/taipei.html +taipei + +// talk : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/talk.html +talk + +// taobao : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/taobao.html +taobao + +// target : Target Domain Holdings, LLC +// https://www.iana.org/domains/root/db/target.html +target + +// tatamotors : Tata Motors Ltd +// https://www.iana.org/domains/root/db/tatamotors.html +tatamotors + +// tatar : Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic" +// https://www.iana.org/domains/root/db/tatar.html +tatar + +// tattoo : Registry Services, LLC +// https://www.iana.org/domains/root/db/tattoo.html +tattoo + +// tax : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tax.html +tax + +// taxi : Binky Moon, LLC +// https://www.iana.org/domains/root/db/taxi.html +taxi + +// tci +// https://www.iana.org/domains/root/db/tci.html +tci + +// tdk : TDK Corporation +// https://www.iana.org/domains/root/db/tdk.html +tdk + +// team : Binky Moon, LLC +// https://www.iana.org/domains/root/db/team.html +team + +// tech : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/tech.html +tech + +// technology : Binky Moon, LLC +// https://www.iana.org/domains/root/db/technology.html +technology + +// temasek : Temasek Holdings (Private) Limited +// https://www.iana.org/domains/root/db/temasek.html +temasek + +// tennis : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tennis.html +tennis + +// teva : Teva Pharmaceutical Industries Limited +// https://www.iana.org/domains/root/db/teva.html +teva + +// thd : Home Depot Product Authority, LLC +// https://www.iana.org/domains/root/db/thd.html +thd + +// theater : Binky Moon, LLC +// https://www.iana.org/domains/root/db/theater.html +theater + +// theatre : XYZ.COM LLC +// https://www.iana.org/domains/root/db/theatre.html +theatre + +// tiaa : Teachers Insurance and Annuity Association of America +// https://www.iana.org/domains/root/db/tiaa.html +tiaa + +// tickets : XYZ.COM LLC +// https://www.iana.org/domains/root/db/tickets.html +tickets + +// tienda : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tienda.html +tienda + +// tips : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tips.html +tips + +// tires : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tires.html +tires + +// tirol : punkt Tirol GmbH +// https://www.iana.org/domains/root/db/tirol.html +tirol + +// tjmaxx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tjmaxx.html +tjmaxx + +// tjx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tjx.html +tjx + +// tkmaxx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tkmaxx.html +tkmaxx + +// tmall : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/tmall.html +tmall + +// today : Binky Moon, LLC +// https://www.iana.org/domains/root/db/today.html +today + +// tokyo : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/tokyo.html +tokyo + +// tools : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tools.html +tools + +// top : .TOP Registry +// https://www.iana.org/domains/root/db/top.html +top + +// toray : Toray Industries, Inc. +// https://www.iana.org/domains/root/db/toray.html +toray + +// toshiba : TOSHIBA Corporation +// https://www.iana.org/domains/root/db/toshiba.html +toshiba + +// total : TotalEnergies SE +// https://www.iana.org/domains/root/db/total.html +total + +// tours : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tours.html +tours + +// town : Binky Moon, LLC +// https://www.iana.org/domains/root/db/town.html +town + +// toyota : TOYOTA MOTOR CORPORATION +// https://www.iana.org/domains/root/db/toyota.html +toyota + +// toys : Binky Moon, LLC +// https://www.iana.org/domains/root/db/toys.html +toys + +// trade : Elite Registry Limited +// https://www.iana.org/domains/root/db/trade.html +trade + +// trading : Dog Beach, LLC +// https://www.iana.org/domains/root/db/trading.html +trading + +// training : Binky Moon, LLC +// https://www.iana.org/domains/root/db/training.html +training + +// travel : Dog Beach, LLC +// https://www.iana.org/domains/root/db/travel.html +travel + +// travelers : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/travelers.html +travelers + +// travelersinsurance : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/travelersinsurance.html +travelersinsurance + +// trust : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/trust.html +trust + +// trv : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/trv.html +trv + +// tube : Latin American Telecom LLC +// https://www.iana.org/domains/root/db/tube.html +tube + +// tui : TUI AG +// https://www.iana.org/domains/root/db/tui.html +tui + +// tunes : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/tunes.html +tunes + +// tushu : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/tushu.html +tushu + +// tvs : T V SUNDRAM IYENGAR & SONS LIMITED +// https://www.iana.org/domains/root/db/tvs.html +tvs + +// ubank : National Australia Bank Limited +// https://www.iana.org/domains/root/db/ubank.html +ubank + +// ubs : UBS AG +// https://www.iana.org/domains/root/db/ubs.html +ubs + +// unicom : China United Network Communications Corporation Limited +// https://www.iana.org/domains/root/db/unicom.html +unicom + +// university : Binky Moon, LLC +// https://www.iana.org/domains/root/db/university.html +university + +// uno : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/uno.html +uno + +// uol : UBN INTERNET LTDA. +// https://www.iana.org/domains/root/db/uol.html +uol + +// ups : UPS Market Driver, Inc. +// https://www.iana.org/domains/root/db/ups.html +ups + +// vacations : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vacations.html +vacations + +// vana : D3 Registry LLC +// https://www.iana.org/domains/root/db/vana.html +vana + +// vanguard : The Vanguard Group, Inc. +// https://www.iana.org/domains/root/db/vanguard.html +vanguard + +// vegas : Dot Vegas, Inc. +// https://www.iana.org/domains/root/db/vegas.html +vegas + +// ventures : Binky Moon, LLC +// https://www.iana.org/domains/root/db/ventures.html +ventures + +// verisign : VeriSign, Inc. +// https://www.iana.org/domains/root/db/verisign.html +verisign + +// versicherung : tldbox GmbH +// https://www.iana.org/domains/root/db/versicherung.html +versicherung + +// vet : Dog Beach, LLC +// https://www.iana.org/domains/root/db/vet.html +vet + +// viajes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/viajes.html +viajes + +// video : Dog Beach, LLC +// https://www.iana.org/domains/root/db/video.html +video + +// vig : VIENNA INSURANCE GROUP AG Wiener Versicherung Gruppe +// https://www.iana.org/domains/root/db/vig.html +vig + +// viking : Viking River Cruises (Bermuda) Ltd. +// https://www.iana.org/domains/root/db/viking.html +viking + +// villas : Binky Moon, LLC +// https://www.iana.org/domains/root/db/villas.html +villas + +// vin : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vin.html +vin + +// vip : Registry Services, LLC +// https://www.iana.org/domains/root/db/vip.html +vip + +// virgin : Virgin Enterprises Limited +// https://www.iana.org/domains/root/db/virgin.html +virgin + +// visa : Visa Worldwide Pte. Limited +// https://www.iana.org/domains/root/db/visa.html +visa + +// vision : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vision.html +vision + +// viva : Saudi Telecom Company +// https://www.iana.org/domains/root/db/viva.html +viva + +// vivo : Telefonica Brasil S.A. +// https://www.iana.org/domains/root/db/vivo.html +vivo + +// vlaanderen : DNS.be vzw +// https://www.iana.org/domains/root/db/vlaanderen.html +vlaanderen + +// vodka : Registry Services, LLC +// https://www.iana.org/domains/root/db/vodka.html +vodka + +// volvo : Volvo Holding Sverige Aktiebolag +// https://www.iana.org/domains/root/db/volvo.html +volvo + +// vote : Monolith Registry LLC +// https://www.iana.org/domains/root/db/vote.html +vote + +// voting : Valuetainment Corp. +// https://www.iana.org/domains/root/db/voting.html +voting + +// voto : Monolith Registry LLC +// https://www.iana.org/domains/root/db/voto.html +voto + +// voyage : Binky Moon, LLC +// https://www.iana.org/domains/root/db/voyage.html +voyage + +// wales : Nominet UK +// https://www.iana.org/domains/root/db/wales.html +wales + +// walmart : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/walmart.html +walmart + +// walter : Sandvik AB +// https://www.iana.org/domains/root/db/walter.html +walter + +// wang : Zodiac Wang Limited +// https://www.iana.org/domains/root/db/wang.html +wang + +// wanggou : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/wanggou.html +wanggou + +// watch : Binky Moon, LLC +// https://www.iana.org/domains/root/db/watch.html +watch + +// watches : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/watches.html +watches + +// weather : International Business Machines Corporation +// https://www.iana.org/domains/root/db/weather.html +weather + +// weatherchannel : International Business Machines Corporation +// https://www.iana.org/domains/root/db/weatherchannel.html +weatherchannel + +// webcam : dot Webcam Limited +// https://www.iana.org/domains/root/db/webcam.html +webcam + +// weber : Saint-Gobain Weber SA +// https://www.iana.org/domains/root/db/weber.html +weber + +// website : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/website.html +website + +// wed +// https://www.iana.org/domains/root/db/wed.html +wed + +// wedding : Registry Services, LLC +// https://www.iana.org/domains/root/db/wedding.html +wedding + +// weibo : Sina Corporation +// https://www.iana.org/domains/root/db/weibo.html +weibo + +// weir : Weir Group IP Limited +// https://www.iana.org/domains/root/db/weir.html +weir + +// whoswho : Who's Who Registry +// https://www.iana.org/domains/root/db/whoswho.html +whoswho + +// wien : punkt.wien GmbH +// https://www.iana.org/domains/root/db/wien.html +wien + +// wiki : Registry Services, LLC +// https://www.iana.org/domains/root/db/wiki.html +wiki + +// williamhill : William Hill Organization Limited +// https://www.iana.org/domains/root/db/williamhill.html +williamhill + +// win : First Registry Limited +// https://www.iana.org/domains/root/db/win.html +win + +// windows : Microsoft Corporation +// https://www.iana.org/domains/root/db/windows.html +windows + +// wine : Binky Moon, LLC +// https://www.iana.org/domains/root/db/wine.html +wine + +// winners : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/winners.html +winners + +// wme : William Morris Endeavor Entertainment, LLC +// https://www.iana.org/domains/root/db/wme.html +wme + +// wolterskluwer : Wolters Kluwer N.V. +// https://www.iana.org/domains/root/db/wolterskluwer.html +wolterskluwer + +// woodside : Woodside Petroleum Limited +// https://www.iana.org/domains/root/db/woodside.html +woodside + +// work : Registry Services, LLC +// https://www.iana.org/domains/root/db/work.html +work + +// works : Binky Moon, LLC +// https://www.iana.org/domains/root/db/works.html +works + +// world : Binky Moon, LLC +// https://www.iana.org/domains/root/db/world.html +world + +// wow : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/wow.html +wow + +// wtc : World Trade Centers Association, Inc. +// https://www.iana.org/domains/root/db/wtc.html +wtc + +// wtf : Binky Moon, LLC +// https://www.iana.org/domains/root/db/wtf.html +wtf + +// xbox : Microsoft Corporation +// https://www.iana.org/domains/root/db/xbox.html +xbox + +// xerox : Xerox DNHC LLC +// https://www.iana.org/domains/root/db/xerox.html +xerox + +// xihuan : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/xihuan.html +xihuan + +// xin : Elegant Leader Limited +// https://www.iana.org/domains/root/db/xin.html +xin + +// xn--11b4c3d : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--11b4c3d.html +कॉम + +// xn--1ck2e1b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--1ck2e1b.html +セール + +// xn--1qqw23a : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--1qqw23a.html +佛山 + +// xn--30rr7y : Excellent First Limited +// https://www.iana.org/domains/root/db/xn--30rr7y.html +慈善 + +// xn--3bst00m : Eagle Horizon Limited +// https://www.iana.org/domains/root/db/xn--3bst00m.html +集团 + +// xn--3ds443g : Beijing TLD Registry Technology Limited +// https://www.iana.org/domains/root/db/xn--3ds443g.html +在线 + +// xn--3pxu8k : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--3pxu8k.html +点看 + +// xn--42c2d9a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--42c2d9a.html +คอม + +// xn--45q11c : Zodiac Gemini Ltd +// https://www.iana.org/domains/root/db/xn--45q11c.html +八卦 + +// xn--4gbrim : Helium TLDs Ltd +// https://www.iana.org/domains/root/db/xn--4gbrim.html +موقع + +// xn--55qw42g : China Organizational Name Administration Center +// https://www.iana.org/domains/root/db/xn--55qw42g.html +公益 + +// xn--55qx5d : China Internet Network Information Center (CNNIC) +// https://www.iana.org/domains/root/db/xn--55qx5d.html +公司 + +// xn--5su34j936bgsg : Shangri‐La International Hotel Management Limited +// https://www.iana.org/domains/root/db/xn--5su34j936bgsg.html +香格里拉 + +// xn--5tzm5g : Global Website TLD Asia Limited +// https://www.iana.org/domains/root/db/xn--5tzm5g.html +网站 + +// xn--6frz82g : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/xn--6frz82g.html +移动 + +// xn--6qq986b3xl : Tycoon Treasure Limited +// https://www.iana.org/domains/root/db/xn--6qq986b3xl.html +我爱你 + +// xn--80adxhks : Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID) +// https://www.iana.org/domains/root/db/xn--80adxhks.html +москва + +// xn--80aqecdr1a : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--80aqecdr1a.html +католик -// xn--fzc2c9e2c ("Lanka" Sinhalese-Sinhala) : LK -// http://nic.lk -ලංකා +// xn--80asehdb : CORE Association +// https://www.iana.org/domains/root/db/xn--80asehdb.html +онлайн -// xn--xkc2al3hye2a ("Ilangai" Tamil) : LK -// http://nic.lk -இலங்கை +// xn--80aswg : CORE Association +// https://www.iana.org/domains/root/db/xn--80aswg.html +сайт -// xn--mgbc0a9azcg ("Morocco / al-Maghrib" Arabic) : MA -المغرب +// xn--8y0a063a : China United Network Communications Corporation Limited +// https://www.iana.org/domains/root/db/xn--8y0a063a.html +联通 -// xn--mgb9awbf ("Oman" Arabic) : OM -عمان +// xn--9dbq2a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--9dbq2a.html +קום -// xn--ygbi2ammx ("Falasteen" Arabic) : PS -// The Palestinian National Internet Naming Authority (PNINA) -// http://www.pnina.ps -فلسطين +// xn--9et52u : RISE VICTORY LIMITED +// https://www.iana.org/domains/root/db/xn--9et52u.html +时尚 -// xn--90a3ac ("srb" Cyrillic) : RS -срб +// xn--9krt00a : Sina Corporation +// https://www.iana.org/domains/root/db/xn--9krt00a.html +微博 -// xn--p1ai ("rf" Russian-Cyrillic) : RU -// http://www.cctld.ru/en/docs/rulesrf.php -рф +// xn--b4w605ferd : Temasek Holdings (Private) Limited +// https://www.iana.org/domains/root/db/xn--b4w605ferd.html +淡马锡 -// xn--wgbl6a ("Qatar" Arabic) : QA -// http://www.ict.gov.qa/ -قطر +// xn--bck1b9a5dre4c : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--bck1b9a5dre4c.html +ファッション -// xn--mgberp4a5d4ar ("AlSaudiah" Arabic) : SA -// http://www.nic.net.sa/ -السعودية +// xn--c1avg : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--c1avg.html +орг -// xn--mgberp4a5d4a87g ("AlSaudiah" Arabic) variant : SA -السعودیة +// xn--c2br7g : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--c2br7g.html +नेट -// xn--mgbqly7c0a67fbc ("AlSaudiah" Arabic) variant : SA -السعودیۃ +// xn--cck2b3b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--cck2b3b.html +ストア -// xn--mgbqly7cvafr ("AlSaudiah" Arabic) variant : SA -السعوديه +// xn--cckwcxetd : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--cckwcxetd.html +アマゾン -// xn--ogbpf8fl ("Syria" Arabic) : SY -سورية +// xn--cg4bki : SAMSUNG SDS CO., LTD +// https://www.iana.org/domains/root/db/xn--cg4bki.html +삼성 -// xn--mgbtf8fl ("Syria" Arabic) variant : SY -سوريا +// xn--czr694b : Internet DotTrademark Organisation Limited +// https://www.iana.org/domains/root/db/xn--czr694b.html +商标 -// xn--yfro4i67o Singapore ("Singapore" Chinese-Han) : SG -新加坡 +// xn--czrs0t : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--czrs0t.html +商店 -// xn--clchc0ea0b2g2a9gcd ("Singapore" Tamil) : SG -சிங்கப்பூர் +// xn--czru2d : Zodiac Aquarius Limited +// https://www.iana.org/domains/root/db/xn--czru2d.html +商城 -// xn--o3cw4h ("Thai" Thai) : TH -// http://www.thnic.co.th -ไทย +// xn--d1acj3b : The Foundation for Network Initiatives “The Smart Internet” +// https://www.iana.org/domains/root/db/xn--d1acj3b.html +дети -// xn--pgbs0dh ("Tunis") : TN -// http://nic.tn -تونس +// xn--eckvdtc9d : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--eckvdtc9d.html +ポイント -// xn--kpry57d ("Taiwan" Chinese-Han-Traditional) : TW -// http://www.twnic.net/english/dn/dn_07a.htm -台灣 +// xn--efvy88h : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--efvy88h.html +新闻 -// xn--kprw13d ("Taiwan" Chinese-Han-Simplified) : TW -// http://www.twnic.net/english/dn/dn_07a.htm -台湾 +// xn--fct429k : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--fct429k.html +家電 -// xn--nnx388a ("Taiwan") variant : TW -臺灣 +// xn--fhbei : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--fhbei.html +كوم -// xn--j1amh ("ukr" Cyrillic) : UA -укр +// xn--fiq228c5hs : TLD REGISTRY LIMITED OY +// https://www.iana.org/domains/root/db/xn--fiq228c5hs.html +中文网 -// xn--mgb2ddes ("AlYemen" Arabic) : YE -اليمن +// xn--fiq64b : CITIC Group Corporation +// https://www.iana.org/domains/root/db/xn--fiq64b.html +中信 -// xxx : http://icmregistry.com -xxx +// xn--fjq720a : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--fjq720a.html +娱乐 -// ye : http://www.y.net.ye/services/domain_name.htm -*.ye +// xn--flw351e : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--flw351e.html +谷歌 + +// xn--fzys8d69uvgm : PCCW Enterprises Limited +// https://www.iana.org/domains/root/db/xn--fzys8d69uvgm.html +電訊盈科 + +// xn--g2xx48c : Nawang Heli(Xiamen) Network Service Co., LTD. +// https://www.iana.org/domains/root/db/xn--g2xx48c.html +购物 + +// xn--gckr3f0f : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--gckr3f0f.html +クラウド + +// xn--gk3at1e : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--gk3at1e.html +通販 + +// xn--hxt814e : Zodiac Taurus Limited +// https://www.iana.org/domains/root/db/xn--hxt814e.html +网店 + +// xn--i1b6b1a6a2e : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--i1b6b1a6a2e.html +संगठन + +// xn--imr513n : Internet DotTrademark Organisation Limited +// https://www.iana.org/domains/root/db/xn--imr513n.html +餐厅 + +// xn--io0a7i : China Internet Network Information Center (CNNIC) +// https://www.iana.org/domains/root/db/xn--io0a7i.html +网络 + +// xn--j1aef : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--j1aef.html +ком + +// xn--jlq480n2rg : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--jlq480n2rg.html +亚马逊 + +// xn--jvr189m : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--jvr189m.html +食品 + +// xn--kcrx77d1x4a : Koninklijke Philips N.V. +// https://www.iana.org/domains/root/db/xn--kcrx77d1x4a.html +飞利浦 + +// xn--kput3i : Beijing RITT-Net Technology Development Co., Ltd +// https://www.iana.org/domains/root/db/xn--kput3i.html +手机 + +// xn--mgba3a3ejt : Aramco Services Company +// https://www.iana.org/domains/root/db/xn--mgba3a3ejt.html +ارامكو + +// xn--mgba7c0bbn0a : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/xn--mgba7c0bbn0a.html +العليان + +// xn--mgbab2bd : CORE Association +// https://www.iana.org/domains/root/db/xn--mgbab2bd.html +بازار + +// xn--mgbca7dzdo : Abu Dhabi Systems and Information Centre +// https://www.iana.org/domains/root/db/xn--mgbca7dzdo.html +ابوظبي + +// xn--mgbi4ecexp : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--mgbi4ecexp.html +كاثوليك + +// xn--mgbt3dhd +// https://www.iana.org/domains/root/db/xn--mgbt3dhd.html +همراه + +// xn--mk1bu44c : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--mk1bu44c.html +닷컴 + +// xn--mxtq1m : Net-Chinese Co., Ltd. +// https://www.iana.org/domains/root/db/xn--mxtq1m.html +政府 + +// xn--ngbc5azd : International Domain Registry Pty. Ltd. +// https://www.iana.org/domains/root/db/xn--ngbc5azd.html +شبكة + +// xn--ngbe9e0a : Kuwait Finance House +// https://www.iana.org/domains/root/db/xn--ngbe9e0a.html +بيتك + +// xn--ngbrx : League of Arab States +// https://www.iana.org/domains/root/db/xn--ngbrx.html +عرب + +// xn--nqv7f : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--nqv7f.html +机构 + +// xn--nqv7fs00ema : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--nqv7fs00ema.html +组织机构 + +// xn--nyqy26a : Stable Tone Limited +// https://www.iana.org/domains/root/db/xn--nyqy26a.html +健康 + +// xn--otu796d : Jiang Yu Liang Cai Technology Company Limited +// https://www.iana.org/domains/root/db/xn--otu796d.html +招聘 + +// xn--p1acf : Rusnames Limited +// https://www.iana.org/domains/root/db/xn--p1acf.html +рус -// za : http://www.zadna.org.za/slds.html -*.za +// xn--pssy2u : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--pssy2u.html +大拿 -// zm : http://en.wikipedia.org/wiki/.zm -*.zm +// xn--q9jyb4c : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--q9jyb4c.html +みんな -// zw : http://en.wikipedia.org/wiki/.zw -*.zw +// xn--qcka1pmc : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--qcka1pmc.html +グーグル + +// xn--rhqv96g : Stable Tone Limited +// https://www.iana.org/domains/root/db/xn--rhqv96g.html +世界 + +// xn--rovu88b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--rovu88b.html +書籍 + +// xn--ses554g : KNET Co., Ltd. +// https://www.iana.org/domains/root/db/xn--ses554g.html +网址 + +// xn--t60b56a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--t60b56a.html +닷넷 + +// xn--tckwe : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--tckwe.html +コム + +// xn--tiq49xqyj : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--tiq49xqyj.html +天主教 + +// xn--unup4y : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--unup4y.html +游戏 + +// xn--vermgensberater-ctb : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/xn--vermgensberater-ctb.html +vermögensberater + +// xn--vermgensberatung-pwb : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/xn--vermgensberatung-pwb.html +vermögensberatung + +// xn--vhquv : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--vhquv.html +企业 + +// xn--vuq861b : Beijing Tele-info Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--vuq861b.html +信息 + +// xn--w4r85el8fhu5dnra : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/xn--w4r85el8fhu5dnra.html +嘉里大酒店 + +// xn--w4rs40l : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/xn--w4rs40l.html +嘉里 + +// xn--xhq521b : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--xhq521b.html +广东 + +// xn--zfr164b : China Organizational Name Administration Center +// https://www.iana.org/domains/root/db/xn--zfr164b.html +政务 + +// xyz : XYZ.COM LLC +// https://www.iana.org/domains/root/db/xyz.html +xyz + +// yachts : XYZ.COM LLC +// https://www.iana.org/domains/root/db/yachts.html +yachts + +// yahoo : Yahoo Inc. +// https://www.iana.org/domains/root/db/yahoo.html +yahoo + +// yamaxun : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/yamaxun.html +yamaxun + +// yandex : YANDEX, LLC +// https://www.iana.org/domains/root/db/yandex.html +yandex + +// yodobashi : YODOBASHI CAMERA CO.,LTD. +// https://www.iana.org/domains/root/db/yodobashi.html +yodobashi + +// yoga : Registry Services, LLC +// https://www.iana.org/domains/root/db/yoga.html +yoga + +// yokohama : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/yokohama.html +yokohama + +// you : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/you.html +you + +// youtube : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/youtube.html +youtube + +// yun : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/yun.html +yun + +// zappos : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/zappos.html +zappos + +// zara : Industria de Diseño Textil, S.A. (INDITEX, S.A.) +// https://www.iana.org/domains/root/db/zara.html +zara + +// zero : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/zero.html +zero + +// zip : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/zip.html +zip + +// zone : Binky Moon, LLC +// https://www.iana.org/domains/root/db/zone.html +zone + +// zuerich : Kanton Zürich (Canton of Zurich) +// https://www.iana.org/domains/root/db/zuerich.html +zuerich // ===END ICANN DOMAINS=== + // ===BEGIN PRIVATE DOMAINS=== -// Amazon CloudFront : https://aws.amazon.com/cloudfront/ -// Requested by Donavan Miller 2013-03-22 +// (Note: these are in alphabetical order by company name) + +// .KRD : https://nic.krd +co.krd +edu.krd + +// .pl domains (grandfathered) +art.pl +gliwice.pl +krakow.pl +poznan.pl +wroc.pl +zakopane.pl + +// .US +// Submitted by Ed Moore +lib.de.us + +// 12CHARS : https://12chars.com +// Submitted by Kenny Niehage +12chars.dev +12chars.it +12chars.pro + +// 1GB LLC : https://www.1gb.ua/ +// Submitted by 1GB LLC +cc.ua +inf.ua +ltd.ua + +// 611 blockchain domain name system : https://611project.net/ +611.to + +// A2 Hosting +// Submitted by Tyler Hall +a2hosted.com +cpserver.com + +// Acorn Labs : https://acorn.io +// Submitted by Craig Jellick +*.on-acorn.io + +// ActiveTrail : https://www.activetrail.biz/ +// Submitted by Ofer Kalaora +activetrail.biz + +// Adaptable.io : https://adaptable.io +// Submitted by Mark Terrel +adaptable.app + +// addr.tools : https://addr.tools/ +// Submitted by Brian Shea +myaddr.dev +myaddr.io +dyn.addr.tools +myaddr.tools + +// Adobe : https://www.adobe.com/ +// Submitted by Ian Boston and Lars Trieloff +adobeaemcloud.com +*.dev.adobeaemcloud.com +aem.live +hlx.live +adobeaemcloud.net +aem.network +aem.page +hlx.page +aem.reviews + +// Adobe Developer Platform : https://developer.adobe.com +// Submitted by Jesse MacFadyen +adobeio-static.net +adobeioruntime.net + +// Africa.com Web Solutions Ltd : https://registry.africa.com +// Submitted by Gavin Brown +africa.com + +// Agnat sp. z o.o. : https://domena.pl +// Submitted by Przemyslaw Plewa +beep.pl + +// Airkit : https://www.airkit.com/ +// Submitted by Grant Cooksey +airkitapps.com +airkitapps-au.com +airkitapps.eu + +// Aiven : https://aiven.io/ +// Submitted by Aiven Security Team +aiven.app +aivencloud.com + +// Akamai : https://www.akamai.com/ +// Submitted by Akamai Team +akadns.net +akamai.net +akamai-staging.net +akamaiedge.net +akamaiedge-staging.net +akamaihd.net +akamaihd-staging.net +akamaiorigin.net +akamaiorigin-staging.net +akamaized.net +akamaized-staging.net +edgekey.net +edgekey-staging.net +edgesuite.net +edgesuite-staging.net + +// alboto.ca : http://alboto.ca +// Submitted by Anton Avramov +barsy.ca + +// Alces Software Ltd : http://alces-software.com +// Submitted by Mark J. Titorenko +*.compute.estate +*.alces.network + +// Alibaba Cloud API Gateway +// Submitted by Alibaba Cloud Security +alibabacloudcs.com + +// all-inkl.com : https://all-inkl.com +// Submitted by Werner Kaltofen +kasserver.com + +// Altervista : https://www.altervista.org +// Submitted by Carlo Cannas +altervista.org + +// alwaysdata : https://www.alwaysdata.com +// Submitted by Cyril +alwaysdata.net + +// Amaze Software : https://amaze.co +// Submitted by Domain Admin +myamaze.net + +// Amazon : https://www.amazon.com/ +// Submitted by AWS Security +// Subsections of Amazon/subsidiaries will appear until "concludes" tag + +// Amazon API Gateway +// Submitted by AWS Security +// Reference: 6a4f5a95-8c7d-4077-a7af-9cf1abec0a53 +execute-api.cn-north-1.amazonaws.com.cn +execute-api.cn-northwest-1.amazonaws.com.cn +execute-api.af-south-1.amazonaws.com +execute-api.ap-east-1.amazonaws.com +execute-api.ap-northeast-1.amazonaws.com +execute-api.ap-northeast-2.amazonaws.com +execute-api.ap-northeast-3.amazonaws.com +execute-api.ap-south-1.amazonaws.com +execute-api.ap-south-2.amazonaws.com +execute-api.ap-southeast-1.amazonaws.com +execute-api.ap-southeast-2.amazonaws.com +execute-api.ap-southeast-3.amazonaws.com +execute-api.ap-southeast-4.amazonaws.com +execute-api.ap-southeast-5.amazonaws.com +execute-api.ca-central-1.amazonaws.com +execute-api.ca-west-1.amazonaws.com +execute-api.eu-central-1.amazonaws.com +execute-api.eu-central-2.amazonaws.com +execute-api.eu-north-1.amazonaws.com +execute-api.eu-south-1.amazonaws.com +execute-api.eu-south-2.amazonaws.com +execute-api.eu-west-1.amazonaws.com +execute-api.eu-west-2.amazonaws.com +execute-api.eu-west-3.amazonaws.com +execute-api.il-central-1.amazonaws.com +execute-api.me-central-1.amazonaws.com +execute-api.me-south-1.amazonaws.com +execute-api.sa-east-1.amazonaws.com +execute-api.us-east-1.amazonaws.com +execute-api.us-east-2.amazonaws.com +execute-api.us-gov-east-1.amazonaws.com +execute-api.us-gov-west-1.amazonaws.com +execute-api.us-west-1.amazonaws.com +execute-api.us-west-2.amazonaws.com + +// Amazon CloudFront +// Submitted by Donavan Miller +// Reference: 54144616-fd49-4435-8535-19c6a601bdb3 cloudfront.net -// Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/ -// Requested by Osman Surkatty 2013-04-02 -compute.amazonaws.com +// Amazon Cognito +// Submitted by AWS Security +// Reference: eb4652f0-20f0-43f5-b323-e6cc6ae02ad7 +auth.af-south-1.amazoncognito.com +auth.ap-east-1.amazoncognito.com +auth.ap-northeast-1.amazoncognito.com +auth.ap-northeast-2.amazoncognito.com +auth.ap-northeast-3.amazoncognito.com +auth.ap-south-1.amazoncognito.com +auth.ap-south-2.amazoncognito.com +auth.ap-southeast-1.amazoncognito.com +auth.ap-southeast-2.amazoncognito.com +auth.ap-southeast-3.amazoncognito.com +auth.ap-southeast-4.amazoncognito.com +auth.ap-southeast-5.amazoncognito.com +auth.ca-central-1.amazoncognito.com +auth.ca-west-1.amazoncognito.com +auth.eu-central-1.amazoncognito.com +auth.eu-central-2.amazoncognito.com +auth.eu-north-1.amazoncognito.com +auth.eu-south-1.amazoncognito.com +auth.eu-south-2.amazoncognito.com +auth.eu-west-1.amazoncognito.com +auth.eu-west-2.amazoncognito.com +auth.eu-west-3.amazoncognito.com +auth.il-central-1.amazoncognito.com +auth.me-central-1.amazoncognito.com +auth.me-south-1.amazoncognito.com +auth.sa-east-1.amazoncognito.com +auth.us-east-1.amazoncognito.com +auth-fips.us-east-1.amazoncognito.com +auth.us-east-2.amazoncognito.com +auth-fips.us-east-2.amazoncognito.com +auth-fips.us-gov-east-1.amazoncognito.com +auth-fips.us-gov-west-1.amazoncognito.com +auth.us-west-1.amazoncognito.com +auth-fips.us-west-1.amazoncognito.com +auth.us-west-2.amazoncognito.com +auth-fips.us-west-2.amazoncognito.com + +// Amazon EC2 +// Submitted by Luke Wells +// Reference: 4c38fa71-58ac-4768-99e5-689c1767e537 +*.compute.amazonaws.com.cn +*.compute.amazonaws.com +*.compute-1.amazonaws.com us-east-1.amazonaws.com -compute-1.amazonaws.com -z-1.compute-1.amazonaws.com -z-2.compute-1.amazonaws.com -ap-northeast-1.compute.amazonaws.com -ap-southeast-1.compute.amazonaws.com -ap-southeast-2.compute.amazonaws.com -eu-west-1.compute.amazonaws.com -sa-east-1.compute.amazonaws.com -us-gov-west-1.compute.amazonaws.com -us-west-1.compute.amazonaws.com -us-west-2.compute.amazonaws.com - -// Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/ -// Requested by Adam Stein 2013-04-02 -elasticbeanstalk.com - -// Amazon Elastic Load Balancing : https://aws.amazon.com/elasticloadbalancing/ -// Requested by Scott Vidmar 2013-03-27 -elb.amazonaws.com -// Amazon S3 : https://aws.amazon.com/s3/ -// Requested by Courtney Eckhardt 2013-03-22 +// Amazon EMR +// Submitted by AWS Security +// Reference: 82f43f9f-bbb8-400e-8349-854f5a62f20d +emrappui-prod.cn-north-1.amazonaws.com.cn +emrnotebooks-prod.cn-north-1.amazonaws.com.cn +emrstudio-prod.cn-north-1.amazonaws.com.cn +emrappui-prod.cn-northwest-1.amazonaws.com.cn +emrnotebooks-prod.cn-northwest-1.amazonaws.com.cn +emrstudio-prod.cn-northwest-1.amazonaws.com.cn +emrappui-prod.af-south-1.amazonaws.com +emrnotebooks-prod.af-south-1.amazonaws.com +emrstudio-prod.af-south-1.amazonaws.com +emrappui-prod.ap-east-1.amazonaws.com +emrnotebooks-prod.ap-east-1.amazonaws.com +emrstudio-prod.ap-east-1.amazonaws.com +emrappui-prod.ap-northeast-1.amazonaws.com +emrnotebooks-prod.ap-northeast-1.amazonaws.com +emrstudio-prod.ap-northeast-1.amazonaws.com +emrappui-prod.ap-northeast-2.amazonaws.com +emrnotebooks-prod.ap-northeast-2.amazonaws.com +emrstudio-prod.ap-northeast-2.amazonaws.com +emrappui-prod.ap-northeast-3.amazonaws.com +emrnotebooks-prod.ap-northeast-3.amazonaws.com +emrstudio-prod.ap-northeast-3.amazonaws.com +emrappui-prod.ap-south-1.amazonaws.com +emrnotebooks-prod.ap-south-1.amazonaws.com +emrstudio-prod.ap-south-1.amazonaws.com +emrappui-prod.ap-south-2.amazonaws.com +emrnotebooks-prod.ap-south-2.amazonaws.com +emrstudio-prod.ap-south-2.amazonaws.com +emrappui-prod.ap-southeast-1.amazonaws.com +emrnotebooks-prod.ap-southeast-1.amazonaws.com +emrstudio-prod.ap-southeast-1.amazonaws.com +emrappui-prod.ap-southeast-2.amazonaws.com +emrnotebooks-prod.ap-southeast-2.amazonaws.com +emrstudio-prod.ap-southeast-2.amazonaws.com +emrappui-prod.ap-southeast-3.amazonaws.com +emrnotebooks-prod.ap-southeast-3.amazonaws.com +emrstudio-prod.ap-southeast-3.amazonaws.com +emrappui-prod.ap-southeast-4.amazonaws.com +emrnotebooks-prod.ap-southeast-4.amazonaws.com +emrstudio-prod.ap-southeast-4.amazonaws.com +emrappui-prod.ca-central-1.amazonaws.com +emrnotebooks-prod.ca-central-1.amazonaws.com +emrstudio-prod.ca-central-1.amazonaws.com +emrappui-prod.ca-west-1.amazonaws.com +emrnotebooks-prod.ca-west-1.amazonaws.com +emrstudio-prod.ca-west-1.amazonaws.com +emrappui-prod.eu-central-1.amazonaws.com +emrnotebooks-prod.eu-central-1.amazonaws.com +emrstudio-prod.eu-central-1.amazonaws.com +emrappui-prod.eu-central-2.amazonaws.com +emrnotebooks-prod.eu-central-2.amazonaws.com +emrstudio-prod.eu-central-2.amazonaws.com +emrappui-prod.eu-north-1.amazonaws.com +emrnotebooks-prod.eu-north-1.amazonaws.com +emrstudio-prod.eu-north-1.amazonaws.com +emrappui-prod.eu-south-1.amazonaws.com +emrnotebooks-prod.eu-south-1.amazonaws.com +emrstudio-prod.eu-south-1.amazonaws.com +emrappui-prod.eu-south-2.amazonaws.com +emrnotebooks-prod.eu-south-2.amazonaws.com +emrstudio-prod.eu-south-2.amazonaws.com +emrappui-prod.eu-west-1.amazonaws.com +emrnotebooks-prod.eu-west-1.amazonaws.com +emrstudio-prod.eu-west-1.amazonaws.com +emrappui-prod.eu-west-2.amazonaws.com +emrnotebooks-prod.eu-west-2.amazonaws.com +emrstudio-prod.eu-west-2.amazonaws.com +emrappui-prod.eu-west-3.amazonaws.com +emrnotebooks-prod.eu-west-3.amazonaws.com +emrstudio-prod.eu-west-3.amazonaws.com +emrappui-prod.il-central-1.amazonaws.com +emrnotebooks-prod.il-central-1.amazonaws.com +emrstudio-prod.il-central-1.amazonaws.com +emrappui-prod.me-central-1.amazonaws.com +emrnotebooks-prod.me-central-1.amazonaws.com +emrstudio-prod.me-central-1.amazonaws.com +emrappui-prod.me-south-1.amazonaws.com +emrnotebooks-prod.me-south-1.amazonaws.com +emrstudio-prod.me-south-1.amazonaws.com +emrappui-prod.sa-east-1.amazonaws.com +emrnotebooks-prod.sa-east-1.amazonaws.com +emrstudio-prod.sa-east-1.amazonaws.com +emrappui-prod.us-east-1.amazonaws.com +emrnotebooks-prod.us-east-1.amazonaws.com +emrstudio-prod.us-east-1.amazonaws.com +emrappui-prod.us-east-2.amazonaws.com +emrnotebooks-prod.us-east-2.amazonaws.com +emrstudio-prod.us-east-2.amazonaws.com +emrappui-prod.us-gov-east-1.amazonaws.com +emrnotebooks-prod.us-gov-east-1.amazonaws.com +emrstudio-prod.us-gov-east-1.amazonaws.com +emrappui-prod.us-gov-west-1.amazonaws.com +emrnotebooks-prod.us-gov-west-1.amazonaws.com +emrstudio-prod.us-gov-west-1.amazonaws.com +emrappui-prod.us-west-1.amazonaws.com +emrnotebooks-prod.us-west-1.amazonaws.com +emrstudio-prod.us-west-1.amazonaws.com +emrappui-prod.us-west-2.amazonaws.com +emrnotebooks-prod.us-west-2.amazonaws.com +emrstudio-prod.us-west-2.amazonaws.com + +// Amazon Managed Workflows for Apache Airflow +// Submitted by AWS Security +// Reference: f5ea5d0a-ec6a-4f23-ac1c-553fbff13f5c +*.cn-north-1.airflow.amazonaws.com.cn +*.cn-northwest-1.airflow.amazonaws.com.cn +*.af-south-1.airflow.amazonaws.com +*.ap-east-1.airflow.amazonaws.com +*.ap-northeast-1.airflow.amazonaws.com +*.ap-northeast-2.airflow.amazonaws.com +*.ap-northeast-3.airflow.amazonaws.com +*.ap-south-1.airflow.amazonaws.com +*.ap-south-2.airflow.amazonaws.com +*.ap-southeast-1.airflow.amazonaws.com +*.ap-southeast-2.airflow.amazonaws.com +*.ap-southeast-3.airflow.amazonaws.com +*.ap-southeast-4.airflow.amazonaws.com +*.ca-central-1.airflow.amazonaws.com +*.ca-west-1.airflow.amazonaws.com +*.eu-central-1.airflow.amazonaws.com +*.eu-central-2.airflow.amazonaws.com +*.eu-north-1.airflow.amazonaws.com +*.eu-south-1.airflow.amazonaws.com +*.eu-south-2.airflow.amazonaws.com +*.eu-west-1.airflow.amazonaws.com +*.eu-west-2.airflow.amazonaws.com +*.eu-west-3.airflow.amazonaws.com +*.il-central-1.airflow.amazonaws.com +*.me-central-1.airflow.amazonaws.com +*.me-south-1.airflow.amazonaws.com +*.sa-east-1.airflow.amazonaws.com +*.us-east-1.airflow.amazonaws.com +*.us-east-2.airflow.amazonaws.com +*.us-west-1.airflow.amazonaws.com +*.us-west-2.airflow.amazonaws.com + +// Amazon S3 +// Submitted by AWS Security +// Reference: ada5c9df-55e1-4195-a1ce-732d6c81e357 +s3.dualstack.cn-north-1.amazonaws.com.cn +s3-accesspoint.dualstack.cn-north-1.amazonaws.com.cn +s3-website.dualstack.cn-north-1.amazonaws.com.cn +s3.cn-north-1.amazonaws.com.cn +s3-accesspoint.cn-north-1.amazonaws.com.cn +s3-deprecated.cn-north-1.amazonaws.com.cn +s3-object-lambda.cn-north-1.amazonaws.com.cn +s3-website.cn-north-1.amazonaws.com.cn +s3.dualstack.cn-northwest-1.amazonaws.com.cn +s3-accesspoint.dualstack.cn-northwest-1.amazonaws.com.cn +s3.cn-northwest-1.amazonaws.com.cn +s3-accesspoint.cn-northwest-1.amazonaws.com.cn +s3-object-lambda.cn-northwest-1.amazonaws.com.cn +s3-website.cn-northwest-1.amazonaws.com.cn +s3.dualstack.af-south-1.amazonaws.com +s3-accesspoint.dualstack.af-south-1.amazonaws.com +s3-website.dualstack.af-south-1.amazonaws.com +s3.af-south-1.amazonaws.com +s3-accesspoint.af-south-1.amazonaws.com +s3-object-lambda.af-south-1.amazonaws.com +s3-website.af-south-1.amazonaws.com +s3.dualstack.ap-east-1.amazonaws.com +s3-accesspoint.dualstack.ap-east-1.amazonaws.com +s3.ap-east-1.amazonaws.com +s3-accesspoint.ap-east-1.amazonaws.com +s3-object-lambda.ap-east-1.amazonaws.com +s3-website.ap-east-1.amazonaws.com +s3.dualstack.ap-northeast-1.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-1.amazonaws.com +s3-website.dualstack.ap-northeast-1.amazonaws.com +s3.ap-northeast-1.amazonaws.com +s3-accesspoint.ap-northeast-1.amazonaws.com +s3-object-lambda.ap-northeast-1.amazonaws.com +s3-website.ap-northeast-1.amazonaws.com +s3.dualstack.ap-northeast-2.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-2.amazonaws.com +s3-website.dualstack.ap-northeast-2.amazonaws.com +s3.ap-northeast-2.amazonaws.com +s3-accesspoint.ap-northeast-2.amazonaws.com +s3-object-lambda.ap-northeast-2.amazonaws.com +s3-website.ap-northeast-2.amazonaws.com +s3.dualstack.ap-northeast-3.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-3.amazonaws.com +s3-website.dualstack.ap-northeast-3.amazonaws.com +s3.ap-northeast-3.amazonaws.com +s3-accesspoint.ap-northeast-3.amazonaws.com +s3-object-lambda.ap-northeast-3.amazonaws.com +s3-website.ap-northeast-3.amazonaws.com +s3.dualstack.ap-south-1.amazonaws.com +s3-accesspoint.dualstack.ap-south-1.amazonaws.com +s3-website.dualstack.ap-south-1.amazonaws.com +s3.ap-south-1.amazonaws.com +s3-accesspoint.ap-south-1.amazonaws.com +s3-object-lambda.ap-south-1.amazonaws.com +s3-website.ap-south-1.amazonaws.com +s3.dualstack.ap-south-2.amazonaws.com +s3-accesspoint.dualstack.ap-south-2.amazonaws.com +s3-website.dualstack.ap-south-2.amazonaws.com +s3.ap-south-2.amazonaws.com +s3-accesspoint.ap-south-2.amazonaws.com +s3-object-lambda.ap-south-2.amazonaws.com +s3-website.ap-south-2.amazonaws.com +s3.dualstack.ap-southeast-1.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-1.amazonaws.com +s3-website.dualstack.ap-southeast-1.amazonaws.com +s3.ap-southeast-1.amazonaws.com +s3-accesspoint.ap-southeast-1.amazonaws.com +s3-object-lambda.ap-southeast-1.amazonaws.com +s3-website.ap-southeast-1.amazonaws.com +s3.dualstack.ap-southeast-2.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-2.amazonaws.com +s3-website.dualstack.ap-southeast-2.amazonaws.com +s3.ap-southeast-2.amazonaws.com +s3-accesspoint.ap-southeast-2.amazonaws.com +s3-object-lambda.ap-southeast-2.amazonaws.com +s3-website.ap-southeast-2.amazonaws.com +s3.dualstack.ap-southeast-3.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-3.amazonaws.com +s3-website.dualstack.ap-southeast-3.amazonaws.com +s3.ap-southeast-3.amazonaws.com +s3-accesspoint.ap-southeast-3.amazonaws.com +s3-object-lambda.ap-southeast-3.amazonaws.com +s3-website.ap-southeast-3.amazonaws.com +s3.dualstack.ap-southeast-4.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-4.amazonaws.com +s3-website.dualstack.ap-southeast-4.amazonaws.com +s3.ap-southeast-4.amazonaws.com +s3-accesspoint.ap-southeast-4.amazonaws.com +s3-object-lambda.ap-southeast-4.amazonaws.com +s3-website.ap-southeast-4.amazonaws.com +s3.dualstack.ap-southeast-5.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-5.amazonaws.com +s3-website.dualstack.ap-southeast-5.amazonaws.com +s3.ap-southeast-5.amazonaws.com +s3-accesspoint.ap-southeast-5.amazonaws.com +s3-deprecated.ap-southeast-5.amazonaws.com +s3-object-lambda.ap-southeast-5.amazonaws.com +s3-website.ap-southeast-5.amazonaws.com +s3.dualstack.ca-central-1.amazonaws.com +s3-accesspoint.dualstack.ca-central-1.amazonaws.com +s3-accesspoint-fips.dualstack.ca-central-1.amazonaws.com +s3-fips.dualstack.ca-central-1.amazonaws.com +s3-website.dualstack.ca-central-1.amazonaws.com +s3.ca-central-1.amazonaws.com +s3-accesspoint.ca-central-1.amazonaws.com +s3-accesspoint-fips.ca-central-1.amazonaws.com +s3-fips.ca-central-1.amazonaws.com +s3-object-lambda.ca-central-1.amazonaws.com +s3-website.ca-central-1.amazonaws.com +s3.dualstack.ca-west-1.amazonaws.com +s3-accesspoint.dualstack.ca-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.ca-west-1.amazonaws.com +s3-fips.dualstack.ca-west-1.amazonaws.com +s3-website.dualstack.ca-west-1.amazonaws.com +s3.ca-west-1.amazonaws.com +s3-accesspoint.ca-west-1.amazonaws.com +s3-accesspoint-fips.ca-west-1.amazonaws.com +s3-fips.ca-west-1.amazonaws.com +s3-object-lambda.ca-west-1.amazonaws.com +s3-website.ca-west-1.amazonaws.com +s3.dualstack.eu-central-1.amazonaws.com +s3-accesspoint.dualstack.eu-central-1.amazonaws.com +s3-website.dualstack.eu-central-1.amazonaws.com +s3.eu-central-1.amazonaws.com +s3-accesspoint.eu-central-1.amazonaws.com +s3-object-lambda.eu-central-1.amazonaws.com +s3-website.eu-central-1.amazonaws.com +s3.dualstack.eu-central-2.amazonaws.com +s3-accesspoint.dualstack.eu-central-2.amazonaws.com +s3-website.dualstack.eu-central-2.amazonaws.com +s3.eu-central-2.amazonaws.com +s3-accesspoint.eu-central-2.amazonaws.com +s3-object-lambda.eu-central-2.amazonaws.com +s3-website.eu-central-2.amazonaws.com +s3.dualstack.eu-north-1.amazonaws.com +s3-accesspoint.dualstack.eu-north-1.amazonaws.com +s3.eu-north-1.amazonaws.com +s3-accesspoint.eu-north-1.amazonaws.com +s3-object-lambda.eu-north-1.amazonaws.com +s3-website.eu-north-1.amazonaws.com +s3.dualstack.eu-south-1.amazonaws.com +s3-accesspoint.dualstack.eu-south-1.amazonaws.com +s3-website.dualstack.eu-south-1.amazonaws.com +s3.eu-south-1.amazonaws.com +s3-accesspoint.eu-south-1.amazonaws.com +s3-object-lambda.eu-south-1.amazonaws.com +s3-website.eu-south-1.amazonaws.com +s3.dualstack.eu-south-2.amazonaws.com +s3-accesspoint.dualstack.eu-south-2.amazonaws.com +s3-website.dualstack.eu-south-2.amazonaws.com +s3.eu-south-2.amazonaws.com +s3-accesspoint.eu-south-2.amazonaws.com +s3-object-lambda.eu-south-2.amazonaws.com +s3-website.eu-south-2.amazonaws.com +s3.dualstack.eu-west-1.amazonaws.com +s3-accesspoint.dualstack.eu-west-1.amazonaws.com +s3-website.dualstack.eu-west-1.amazonaws.com +s3.eu-west-1.amazonaws.com +s3-accesspoint.eu-west-1.amazonaws.com +s3-deprecated.eu-west-1.amazonaws.com +s3-object-lambda.eu-west-1.amazonaws.com +s3-website.eu-west-1.amazonaws.com +s3.dualstack.eu-west-2.amazonaws.com +s3-accesspoint.dualstack.eu-west-2.amazonaws.com +s3.eu-west-2.amazonaws.com +s3-accesspoint.eu-west-2.amazonaws.com +s3-object-lambda.eu-west-2.amazonaws.com +s3-website.eu-west-2.amazonaws.com +s3.dualstack.eu-west-3.amazonaws.com +s3-accesspoint.dualstack.eu-west-3.amazonaws.com +s3-website.dualstack.eu-west-3.amazonaws.com +s3.eu-west-3.amazonaws.com +s3-accesspoint.eu-west-3.amazonaws.com +s3-object-lambda.eu-west-3.amazonaws.com +s3-website.eu-west-3.amazonaws.com +s3.dualstack.il-central-1.amazonaws.com +s3-accesspoint.dualstack.il-central-1.amazonaws.com +s3-website.dualstack.il-central-1.amazonaws.com +s3.il-central-1.amazonaws.com +s3-accesspoint.il-central-1.amazonaws.com +s3-object-lambda.il-central-1.amazonaws.com +s3-website.il-central-1.amazonaws.com +s3.dualstack.me-central-1.amazonaws.com +s3-accesspoint.dualstack.me-central-1.amazonaws.com +s3-website.dualstack.me-central-1.amazonaws.com +s3.me-central-1.amazonaws.com +s3-accesspoint.me-central-1.amazonaws.com +s3-object-lambda.me-central-1.amazonaws.com +s3-website.me-central-1.amazonaws.com +s3.dualstack.me-south-1.amazonaws.com +s3-accesspoint.dualstack.me-south-1.amazonaws.com +s3.me-south-1.amazonaws.com +s3-accesspoint.me-south-1.amazonaws.com +s3-object-lambda.me-south-1.amazonaws.com +s3-website.me-south-1.amazonaws.com s3.amazonaws.com -s3-us-west-2.amazonaws.com -s3-us-west-1.amazonaws.com -s3-eu-west-1.amazonaws.com +s3-1.amazonaws.com +s3-ap-east-1.amazonaws.com +s3-ap-northeast-1.amazonaws.com +s3-ap-northeast-2.amazonaws.com +s3-ap-northeast-3.amazonaws.com +s3-ap-south-1.amazonaws.com s3-ap-southeast-1.amazonaws.com s3-ap-southeast-2.amazonaws.com -s3-ap-northeast-1.amazonaws.com +s3-ca-central-1.amazonaws.com +s3-eu-central-1.amazonaws.com +s3-eu-north-1.amazonaws.com +s3-eu-west-1.amazonaws.com +s3-eu-west-2.amazonaws.com +s3-eu-west-3.amazonaws.com +s3-external-1.amazonaws.com +s3-fips-us-gov-east-1.amazonaws.com +s3-fips-us-gov-west-1.amazonaws.com +mrap.accesspoint.s3-global.amazonaws.com +s3-me-south-1.amazonaws.com s3-sa-east-1.amazonaws.com +s3-us-east-2.amazonaws.com +s3-us-gov-east-1.amazonaws.com s3-us-gov-west-1.amazonaws.com -s3-fips-us-gov-west-1.amazonaws.com -s3-website-us-east-1.amazonaws.com -s3-website-us-west-2.amazonaws.com -s3-website-us-west-1.amazonaws.com -s3-website-eu-west-1.amazonaws.com +s3-us-west-1.amazonaws.com +s3-us-west-2.amazonaws.com +s3-website-ap-northeast-1.amazonaws.com s3-website-ap-southeast-1.amazonaws.com s3-website-ap-southeast-2.amazonaws.com -s3-website-ap-northeast-1.amazonaws.com +s3-website-eu-west-1.amazonaws.com s3-website-sa-east-1.amazonaws.com +s3-website-us-east-1.amazonaws.com s3-website-us-gov-west-1.amazonaws.com - -// BetaInABox -// Requested by adrian@betainabox.com 2012-09-13 -betainabox.com - -// CentralNic : http://www.centralnic.com/names/domains -// Requested by registry 2012-09-27 -ae.org -ar.com +s3-website-us-west-1.amazonaws.com +s3-website-us-west-2.amazonaws.com +s3.dualstack.sa-east-1.amazonaws.com +s3-accesspoint.dualstack.sa-east-1.amazonaws.com +s3-website.dualstack.sa-east-1.amazonaws.com +s3.sa-east-1.amazonaws.com +s3-accesspoint.sa-east-1.amazonaws.com +s3-object-lambda.sa-east-1.amazonaws.com +s3-website.sa-east-1.amazonaws.com +s3.dualstack.us-east-1.amazonaws.com +s3-accesspoint.dualstack.us-east-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-east-1.amazonaws.com +s3-fips.dualstack.us-east-1.amazonaws.com +s3-website.dualstack.us-east-1.amazonaws.com +s3.us-east-1.amazonaws.com +s3-accesspoint.us-east-1.amazonaws.com +s3-accesspoint-fips.us-east-1.amazonaws.com +s3-deprecated.us-east-1.amazonaws.com +s3-fips.us-east-1.amazonaws.com +s3-object-lambda.us-east-1.amazonaws.com +s3-website.us-east-1.amazonaws.com +s3.dualstack.us-east-2.amazonaws.com +s3-accesspoint.dualstack.us-east-2.amazonaws.com +s3-accesspoint-fips.dualstack.us-east-2.amazonaws.com +s3-fips.dualstack.us-east-2.amazonaws.com +s3-website.dualstack.us-east-2.amazonaws.com +s3.us-east-2.amazonaws.com +s3-accesspoint.us-east-2.amazonaws.com +s3-accesspoint-fips.us-east-2.amazonaws.com +s3-deprecated.us-east-2.amazonaws.com +s3-fips.us-east-2.amazonaws.com +s3-object-lambda.us-east-2.amazonaws.com +s3-website.us-east-2.amazonaws.com +s3.dualstack.us-gov-east-1.amazonaws.com +s3-accesspoint.dualstack.us-gov-east-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-gov-east-1.amazonaws.com +s3-fips.dualstack.us-gov-east-1.amazonaws.com +s3.us-gov-east-1.amazonaws.com +s3-accesspoint.us-gov-east-1.amazonaws.com +s3-accesspoint-fips.us-gov-east-1.amazonaws.com +s3-fips.us-gov-east-1.amazonaws.com +s3-object-lambda.us-gov-east-1.amazonaws.com +s3-website.us-gov-east-1.amazonaws.com +s3.dualstack.us-gov-west-1.amazonaws.com +s3-accesspoint.dualstack.us-gov-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-gov-west-1.amazonaws.com +s3-fips.dualstack.us-gov-west-1.amazonaws.com +s3.us-gov-west-1.amazonaws.com +s3-accesspoint.us-gov-west-1.amazonaws.com +s3-accesspoint-fips.us-gov-west-1.amazonaws.com +s3-fips.us-gov-west-1.amazonaws.com +s3-object-lambda.us-gov-west-1.amazonaws.com +s3-website.us-gov-west-1.amazonaws.com +s3.dualstack.us-west-1.amazonaws.com +s3-accesspoint.dualstack.us-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-west-1.amazonaws.com +s3-fips.dualstack.us-west-1.amazonaws.com +s3-website.dualstack.us-west-1.amazonaws.com +s3.us-west-1.amazonaws.com +s3-accesspoint.us-west-1.amazonaws.com +s3-accesspoint-fips.us-west-1.amazonaws.com +s3-fips.us-west-1.amazonaws.com +s3-object-lambda.us-west-1.amazonaws.com +s3-website.us-west-1.amazonaws.com +s3.dualstack.us-west-2.amazonaws.com +s3-accesspoint.dualstack.us-west-2.amazonaws.com +s3-accesspoint-fips.dualstack.us-west-2.amazonaws.com +s3-fips.dualstack.us-west-2.amazonaws.com +s3-website.dualstack.us-west-2.amazonaws.com +s3.us-west-2.amazonaws.com +s3-accesspoint.us-west-2.amazonaws.com +s3-accesspoint-fips.us-west-2.amazonaws.com +s3-deprecated.us-west-2.amazonaws.com +s3-fips.us-west-2.amazonaws.com +s3-object-lambda.us-west-2.amazonaws.com +s3-website.us-west-2.amazonaws.com + +// Amazon SageMaker Ground Truth +// Submitted by AWS Security +// Reference: 98dbfde4-7802-48c3-8751-b60f204e0d9c +labeling.ap-northeast-1.sagemaker.aws +labeling.ap-northeast-2.sagemaker.aws +labeling.ap-south-1.sagemaker.aws +labeling.ap-southeast-1.sagemaker.aws +labeling.ap-southeast-2.sagemaker.aws +labeling.ca-central-1.sagemaker.aws +labeling.eu-central-1.sagemaker.aws +labeling.eu-west-1.sagemaker.aws +labeling.eu-west-2.sagemaker.aws +labeling.us-east-1.sagemaker.aws +labeling.us-east-2.sagemaker.aws +labeling.us-west-2.sagemaker.aws + +// Amazon SageMaker Notebook Instances +// Submitted by AWS Security +// Reference: b5ea56df-669e-43cc-9537-14aa172f5dfc +notebook.af-south-1.sagemaker.aws +notebook.ap-east-1.sagemaker.aws +notebook.ap-northeast-1.sagemaker.aws +notebook.ap-northeast-2.sagemaker.aws +notebook.ap-northeast-3.sagemaker.aws +notebook.ap-south-1.sagemaker.aws +notebook.ap-south-2.sagemaker.aws +notebook.ap-southeast-1.sagemaker.aws +notebook.ap-southeast-2.sagemaker.aws +notebook.ap-southeast-3.sagemaker.aws +notebook.ap-southeast-4.sagemaker.aws +notebook.ca-central-1.sagemaker.aws +notebook-fips.ca-central-1.sagemaker.aws +notebook.ca-west-1.sagemaker.aws +notebook-fips.ca-west-1.sagemaker.aws +notebook.eu-central-1.sagemaker.aws +notebook.eu-central-2.sagemaker.aws +notebook.eu-north-1.sagemaker.aws +notebook.eu-south-1.sagemaker.aws +notebook.eu-south-2.sagemaker.aws +notebook.eu-west-1.sagemaker.aws +notebook.eu-west-2.sagemaker.aws +notebook.eu-west-3.sagemaker.aws +notebook.il-central-1.sagemaker.aws +notebook.me-central-1.sagemaker.aws +notebook.me-south-1.sagemaker.aws +notebook.sa-east-1.sagemaker.aws +notebook.us-east-1.sagemaker.aws +notebook-fips.us-east-1.sagemaker.aws +notebook.us-east-2.sagemaker.aws +notebook-fips.us-east-2.sagemaker.aws +notebook.us-gov-east-1.sagemaker.aws +notebook-fips.us-gov-east-1.sagemaker.aws +notebook.us-gov-west-1.sagemaker.aws +notebook-fips.us-gov-west-1.sagemaker.aws +notebook.us-west-1.sagemaker.aws +notebook-fips.us-west-1.sagemaker.aws +notebook.us-west-2.sagemaker.aws +notebook-fips.us-west-2.sagemaker.aws +notebook.cn-north-1.sagemaker.com.cn +notebook.cn-northwest-1.sagemaker.com.cn + +// Amazon SageMaker Studio +// Submitted by AWS Security +// Reference: 475f237e-ab88-4041-9f41-7cfccdf66aeb +studio.af-south-1.sagemaker.aws +studio.ap-east-1.sagemaker.aws +studio.ap-northeast-1.sagemaker.aws +studio.ap-northeast-2.sagemaker.aws +studio.ap-northeast-3.sagemaker.aws +studio.ap-south-1.sagemaker.aws +studio.ap-southeast-1.sagemaker.aws +studio.ap-southeast-2.sagemaker.aws +studio.ap-southeast-3.sagemaker.aws +studio.ca-central-1.sagemaker.aws +studio.eu-central-1.sagemaker.aws +studio.eu-central-2.sagemaker.aws +studio.eu-north-1.sagemaker.aws +studio.eu-south-1.sagemaker.aws +studio.eu-south-2.sagemaker.aws +studio.eu-west-1.sagemaker.aws +studio.eu-west-2.sagemaker.aws +studio.eu-west-3.sagemaker.aws +studio.il-central-1.sagemaker.aws +studio.me-central-1.sagemaker.aws +studio.me-south-1.sagemaker.aws +studio.sa-east-1.sagemaker.aws +studio.us-east-1.sagemaker.aws +studio.us-east-2.sagemaker.aws +studio.us-gov-east-1.sagemaker.aws +studio-fips.us-gov-east-1.sagemaker.aws +studio.us-gov-west-1.sagemaker.aws +studio-fips.us-gov-west-1.sagemaker.aws +studio.us-west-1.sagemaker.aws +studio.us-west-2.sagemaker.aws +studio.cn-north-1.sagemaker.com.cn +studio.cn-northwest-1.sagemaker.com.cn + +// Amazon SageMaker with MLflow +// Submited by: AWS Security +// Reference: c19f92b3-a82a-452d-8189-831b572eea7e +*.experiments.sagemaker.aws + +// Analytics on AWS +// Submitted by AWS Security +// Reference: 955f9f40-a495-4e73-ae85-67b77ac9cadd +analytics-gateway.ap-northeast-1.amazonaws.com +analytics-gateway.ap-northeast-2.amazonaws.com +analytics-gateway.ap-south-1.amazonaws.com +analytics-gateway.ap-southeast-1.amazonaws.com +analytics-gateway.ap-southeast-2.amazonaws.com +analytics-gateway.eu-central-1.amazonaws.com +analytics-gateway.eu-west-1.amazonaws.com +analytics-gateway.us-east-1.amazonaws.com +analytics-gateway.us-east-2.amazonaws.com +analytics-gateway.us-west-2.amazonaws.com + +// AWS Amplify +// Submitted by AWS Security +// Reference: c35bed18-6f4f-424f-9298-5756f2f7d72b +amplifyapp.com + +// AWS App Runner +// Submitted by AWS Security +// Reference: 6828c008-ba5d-442f-ade5-48da4e7c2316 +*.awsapprunner.com + +// AWS Cloud9 +// Submitted by: AWS Security +// Reference: 30717f72-4007-4f0f-8ed4-864c6f2efec9 +webview-assets.aws-cloud9.af-south-1.amazonaws.com +vfs.cloud9.af-south-1.amazonaws.com +webview-assets.cloud9.af-south-1.amazonaws.com +webview-assets.aws-cloud9.ap-east-1.amazonaws.com +vfs.cloud9.ap-east-1.amazonaws.com +webview-assets.cloud9.ap-east-1.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-1.amazonaws.com +vfs.cloud9.ap-northeast-1.amazonaws.com +webview-assets.cloud9.ap-northeast-1.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-2.amazonaws.com +vfs.cloud9.ap-northeast-2.amazonaws.com +webview-assets.cloud9.ap-northeast-2.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-3.amazonaws.com +vfs.cloud9.ap-northeast-3.amazonaws.com +webview-assets.cloud9.ap-northeast-3.amazonaws.com +webview-assets.aws-cloud9.ap-south-1.amazonaws.com +vfs.cloud9.ap-south-1.amazonaws.com +webview-assets.cloud9.ap-south-1.amazonaws.com +webview-assets.aws-cloud9.ap-southeast-1.amazonaws.com +vfs.cloud9.ap-southeast-1.amazonaws.com +webview-assets.cloud9.ap-southeast-1.amazonaws.com +webview-assets.aws-cloud9.ap-southeast-2.amazonaws.com +vfs.cloud9.ap-southeast-2.amazonaws.com +webview-assets.cloud9.ap-southeast-2.amazonaws.com +webview-assets.aws-cloud9.ca-central-1.amazonaws.com +vfs.cloud9.ca-central-1.amazonaws.com +webview-assets.cloud9.ca-central-1.amazonaws.com +webview-assets.aws-cloud9.eu-central-1.amazonaws.com +vfs.cloud9.eu-central-1.amazonaws.com +webview-assets.cloud9.eu-central-1.amazonaws.com +webview-assets.aws-cloud9.eu-north-1.amazonaws.com +vfs.cloud9.eu-north-1.amazonaws.com +webview-assets.cloud9.eu-north-1.amazonaws.com +webview-assets.aws-cloud9.eu-south-1.amazonaws.com +vfs.cloud9.eu-south-1.amazonaws.com +webview-assets.cloud9.eu-south-1.amazonaws.com +webview-assets.aws-cloud9.eu-west-1.amazonaws.com +vfs.cloud9.eu-west-1.amazonaws.com +webview-assets.cloud9.eu-west-1.amazonaws.com +webview-assets.aws-cloud9.eu-west-2.amazonaws.com +vfs.cloud9.eu-west-2.amazonaws.com +webview-assets.cloud9.eu-west-2.amazonaws.com +webview-assets.aws-cloud9.eu-west-3.amazonaws.com +vfs.cloud9.eu-west-3.amazonaws.com +webview-assets.cloud9.eu-west-3.amazonaws.com +webview-assets.aws-cloud9.il-central-1.amazonaws.com +vfs.cloud9.il-central-1.amazonaws.com +webview-assets.aws-cloud9.me-south-1.amazonaws.com +vfs.cloud9.me-south-1.amazonaws.com +webview-assets.cloud9.me-south-1.amazonaws.com +webview-assets.aws-cloud9.sa-east-1.amazonaws.com +vfs.cloud9.sa-east-1.amazonaws.com +webview-assets.cloud9.sa-east-1.amazonaws.com +webview-assets.aws-cloud9.us-east-1.amazonaws.com +vfs.cloud9.us-east-1.amazonaws.com +webview-assets.cloud9.us-east-1.amazonaws.com +webview-assets.aws-cloud9.us-east-2.amazonaws.com +vfs.cloud9.us-east-2.amazonaws.com +webview-assets.cloud9.us-east-2.amazonaws.com +webview-assets.aws-cloud9.us-west-1.amazonaws.com +vfs.cloud9.us-west-1.amazonaws.com +webview-assets.cloud9.us-west-1.amazonaws.com +webview-assets.aws-cloud9.us-west-2.amazonaws.com +vfs.cloud9.us-west-2.amazonaws.com +webview-assets.cloud9.us-west-2.amazonaws.com + +// AWS Directory Service +// Submitted by AWS Security +// Reference: a13203e8-42dc-4045-a0d2-2ee67bed1068 +awsapps.com + +// AWS Elastic Beanstalk +// Submitted by AWS Security +// Reference: bb5a965c-dec3-4967-aa22-e306ad064797 +cn-north-1.eb.amazonaws.com.cn +cn-northwest-1.eb.amazonaws.com.cn +elasticbeanstalk.com +af-south-1.elasticbeanstalk.com +ap-east-1.elasticbeanstalk.com +ap-northeast-1.elasticbeanstalk.com +ap-northeast-2.elasticbeanstalk.com +ap-northeast-3.elasticbeanstalk.com +ap-south-1.elasticbeanstalk.com +ap-southeast-1.elasticbeanstalk.com +ap-southeast-2.elasticbeanstalk.com +ap-southeast-3.elasticbeanstalk.com +ca-central-1.elasticbeanstalk.com +eu-central-1.elasticbeanstalk.com +eu-north-1.elasticbeanstalk.com +eu-south-1.elasticbeanstalk.com +eu-west-1.elasticbeanstalk.com +eu-west-2.elasticbeanstalk.com +eu-west-3.elasticbeanstalk.com +il-central-1.elasticbeanstalk.com +me-south-1.elasticbeanstalk.com +sa-east-1.elasticbeanstalk.com +us-east-1.elasticbeanstalk.com +us-east-2.elasticbeanstalk.com +us-gov-east-1.elasticbeanstalk.com +us-gov-west-1.elasticbeanstalk.com +us-west-1.elasticbeanstalk.com +us-west-2.elasticbeanstalk.com + +// (AWS) Elastic Load Balancing +// Submitted by Luke Wells +// Reference: 12a3d528-1bac-4433-a359-a395867ffed2 +*.elb.amazonaws.com.cn +*.elb.amazonaws.com + +// AWS Global Accelerator +// Submitted by Daniel Massaguer +// Reference: d916759d-a08b-4241-b536-4db887383a6a +awsglobalaccelerator.com + +// AWS re:Post Private +// Submitted by AWS Security +// Reference: 83385945-225f-416e-9aa0-ad0632bfdcee +*.private.repost.aws + +// AWS Transfer Family web apps +// Submitted by AWS Security +// Reference: 67e9cfe6-ac57-49c7-b197-6652711c8e8d +transfer-webapp.ap-northeast-1.on.aws +transfer-webapp.ap-southeast-1.on.aws +transfer-webapp.ap-southeast-2.on.aws +transfer-webapp.eu-central-1.on.aws +transfer-webapp.eu-north-1.on.aws +transfer-webapp.eu-west-1.on.aws +transfer-webapp.us-east-1.on.aws +transfer-webapp.us-east-2.on.aws +transfer-webapp.us-west-2.on.aws + +// eero +// Submitted by Yue Kang +// Reference: 264afe70-f62c-4c02-8ab9-b5281ed24461 +eero.online +eero-stage.online + +// concludes Amazon + +// Apigee : https://apigee.com/ +// Submitted by Apigee Security Team +apigee.io + +// Apis Networks : https://apisnetworks.com +// Submitted by Matt Saladna +panel.dev + +// Apphud : https://apphud.com +// Submitted by Alexander Selivanov +siiites.com + +// Appspace : https://www.appspace.com +// Submitted by Appspace Security Team +appspacehosted.com +appspaceusercontent.com + +// Appudo UG (haftungsbeschränkt) : https://www.appudo.com +// Submitted by Alexander Hochbaum +appudo.net + +// Appwrite : https://appwrite.io +// Submitted by Steven Nguyen +appwrite.global +*.appwrite.run + +// Aptible : https://www.aptible.com/ +// Submitted by Thomas Orozco +on-aptible.com + +// Aquapal : https://aquapal.net/ +// Submitted by Aki Ueno +f5.si + +// ArvanCloud EdgeCompute +// Submitted by ArvanCloud CDN +arvanedge.ir + +// ASEINet : https://www.aseinet.com/ +// Submitted by Asei SEKIGUCHI +user.aseinet.ne.jp +gv.vc +d.gv.vc + +// Asociación Amigos de la Informática "Euskalamiga" : http://encounter.eus/ +// Submitted by Hector Martin +user.party.eus + +// Association potager.org : https://potager.org/ +// Submitted by Lunar +pimienta.org +poivron.org +potager.org +sweetpepper.org + +// ASUSTOR Inc. : http://www.asustor.com +// Submitted by Vincent Tseng +myasustor.com + +// Atlassian : https://atlassian.com +// Submitted by Sam Smyth +cdn.prod.atlassian-dev.net + +// Authentick UG (haftungsbeschränkt) : https://authentick.net +// Submitted by Lukas Reschke +translated.page + +// AVM : https://avm.de +// Submitted by Andreas Weise +myfritz.link +myfritz.net + +// AVStack Pte. Ltd. : https://avstack.io +// Submitted by Jasper Hugo +onavstack.net + +// AW AdvisorWebsites.com Software Inc : https://advisorwebsites.com +// Submitted by James Kennedy +*.awdev.ca +*.advisor.ws + +// AZ.pl sp. z.o.o : https://az.pl +// Submitted by Krzysztof Wolski +ecommerce-shop.pl + +// b-data GmbH : https://www.b-data.io +// Submitted by Olivier Benz +b-data.io + +// Balena : https://www.balena.io +// Submitted by Petros Angelatos +balena-devices.com + +// BASE, Inc. : https://binc.jp +// Submitted by Yuya NAGASAWA +base.ec +official.ec +buyshop.jp +fashionstore.jp +handcrafted.jp +kawaiishop.jp +supersale.jp +theshop.jp +shopselect.net +base.shop + +// BeagleBoard.org Foundation : https://beagleboard.org +// Submitted by Jason Kridner +beagleboard.io + +// Beget Ltd +// Submitted by Lev Nekrasov +*.beget.app + +// Besties : https://besties.house +// Submitted by Hazel Cora +pages.gay + +// BinaryLane : http://www.binarylane.com +// Submitted by Nathan O'Sullivan +bnr.la + +// Bitbucket : http://bitbucket.org +// Submitted by Andy Ortlieb +bitbucket.io + +// Blackbaud, Inc. : https://www.blackbaud.com +// Submitted by Paul Crowder +blackbaudcdn.net + +// Blatech : http://www.blatech.net +// Submitted by Luke Bratch +of.je + +// Block, Inc. : https://block.xyz +// Submitted by Jonathan Boice +square.site + +// Blue Bite, LLC : https://bluebite.com +// Submitted by Joshua Weiss +bluebite.io + +// Boomla : https://boomla.com +// Submitted by Tibor Halter +boomla.net + +// Boutir : https://www.boutir.com +// Submitted by Eric Ng Ka Ka +boutir.com + +// Boxfuse : https://boxfuse.com +// Submitted by Axel Fontaine +boxfuse.io + +// bplaced : https://www.bplaced.net/ +// Submitted by Miroslav Bozic +square7.ch +bplaced.com +bplaced.de +square7.de +bplaced.net +square7.net + +// Brave : https://brave.com +// Submitted by Andrea Brancaleoni +brave.app +*.s.brave.app +brave.io +*.s.brave.io + +// Brendly : https://brendly.rs +// Submitted by Dusan Radovanovic +shop.brendly.hr +shop.brendly.rs + +// BrowserSafetyMark +// Submitted by Dave Tharp +browsersafetymark.io + +// BRS Media : https://brsmedia.com/ +// Submitted by Gavin Brown +radio.am +radio.fm + +// Bubble : https://bubble.io/ +// Submitted by Merlin Zhao +cdn.bubble.io +bubbleapps.io + +// Bytemark Hosting : https://www.bytemark.co.uk +// Submitted by Paul Cammish +uk0.bigv.io +dh.bytemark.co.uk +vm.bytemark.co.uk + +// Caf.js Labs LLC : https://www.cafjs.com +// Submitted by Antonio Lain +cafjs.com + +// Canva Pty Ltd : https://canva.com/ +// Submitted by Joel Aquilina +canva-apps.cn +my.canvasite.cn +canva-apps.com +my.canva.site + +// Carrd : https://carrd.co +// Submitted by AJ +drr.ac +uwu.ai +carrd.co +crd.co +ju.mp + +// CDDO : https://www.gov.uk/guidance/get-an-api-domain-on-govuk +// Submitted by Jamie Tanna +api.gov.uk + +// CDN77.com : http://www.cdn77.com +// Submitted by Jan Krpes +cdn77-storage.com +rsc.contentproxy9.cz +r.cdn77.net +cdn77-ssl.net +c.cdn77.org +rsc.cdn77.org +ssl.origin.cdn77-secure.org + +// CentralNic : https://teaminternet.com/ +// Submitted by registry +za.bz br.com cn.com -com.de de.com eu.com -gb.com -gb.net -gr.com -hu.com -hu.net -jp.net jpn.com -kr.com -no.com -qc.com +mex.com ru.com sa.com -se.com -se.net uk.com -uk.net us.com -us.org -uy.com za.com - -// c.la : http://www.c.la/ -c.la - -// cloudControl : https://www.cloudcontrol.com/ -// Requested by Tobias Wilken 2013-07-23 -cloudcontrolled.com -cloudcontrolapp.com +com.de +gb.net +hu.net +jp.net +se.net +uk.net +ae.org +com.se + +// Cityhost LLC : https://cityhost.ua +// Submitted by Maksym Rivtin +cx.ua + +// Civilized Discourse Construction Kit, Inc. : https://www.discourse.org/ +// Submitted by Rishabh Nambiar & Michael Brown +discourse.group +discourse.team + +// Clerk : https://www.clerk.dev +// Submitted by Colin Sidoti +clerk.app +clerkstage.app +*.lcl.dev +*.lclstage.dev +*.stg.dev +*.stgstage.dev + +// Clever Cloud : https://www.clever-cloud.com/ +// Submitted by Quentin Adam +cleverapps.cc +*.services.clever-cloud.com +cleverapps.io +cleverapps.tech + +// ClickRising : https://clickrising.com/ +// Submitted by Umut Gumeli +clickrising.net + +// Cloud DNS Ltd : http://www.cloudns.net +// Submitted by Aleksander Hristov & Boyan Peychev +cloudns.asia +cloudns.be +cloud-ip.biz +cloudns.biz +cloudns.cc +cloudns.ch +cloudns.cl +cloudns.club +dnsabr.com +ip-ddns.com +cloudns.cx +cloudns.eu +cloudns.in +cloudns.info +ddns-ip.net +dns-cloud.net +dns-dynamic.net +cloudns.nz +cloudns.org +ip-dynamic.org +cloudns.ph +cloudns.pro +cloudns.pw +cloudns.us + +// Cloud66 : https://www.cloud66.com/ +// Submitted by Khash Sajadi +c66.me +cloud66.ws + +// CloudAccess.net : https://www.cloudaccess.net/ +// Submitted by Pawel Panek +jdevcloud.com +wpdevcloud.com +cloudaccess.host +freesite.host +cloudaccess.net + +// Cloudbees, Inc. : https://www.cloudbees.com/ +// Submitted by Mohideen Shajith +cloudbeesusercontent.io + +// Cloudera, Inc. : https://www.cloudera.com/ +// Submitted by Kedarnath Waikar +*.cloudera.site + +// Cloudflare, Inc. : https://www.cloudflare.com/ +// Submitted by Cloudflare Team +cf-ipfs.com +cloudflare-ipfs.com +trycloudflare.com +pages.dev +r2.dev +workers.dev +cloudflare.net +cdn.cloudflare.net +cdn.cloudflareanycast.net +cdn.cloudflarecn.net +cdn.cloudflareglobal.net + +// cloudscale.ch AG : https://www.cloudscale.ch/ +// Submitted by Gaudenz Steinlin +cust.cloudscale.ch +objects.lpg.cloudscale.ch +objects.rma.cloudscale.ch +lpg.objectstorage.ch +rma.objectstorage.ch + +// Clovyr : https://clovyr.io +// Submitted by Patrick Nielsen +wnext.app + +// CNPY : https://cnpy.gdn +// Submitted by Angelo Gladding +cnpy.gdn + +// Co & Co : https://co-co.nl/ +// Submitted by Govert Versluis +*.otap.co // co.ca : http://registry.co.ca/ co.ca +// co.com Registry, LLC : https://registry.co.com +// Submitted by Gavin Brown +co.com + +// Codeberg e. V. : https://codeberg.org +// Submitted by Moritz Marquardt +codeberg.page + +// CodeSandbox B.V. : https://codesandbox.io +// Submitted by Ives van Hoorne +csb.app +preview.csb.app + // CoDNS B.V. co.nl co.no +// Cognition AI, Inc. : https://cognition.ai +// Submitted by Philip Papurt +*.devinapps.com + +// Combell.com : https://www.combell.com +// Submitted by Thomas Wouters +webhosting.be +hosting-cluster.nl + +// Contentful GmbH : https://www.contentful.com +// Submitted by Contentful Developer Experience Team +ctfcloud.net + +// Convex : https://convex.dev/ +// Submitted by James Cowling +convex.app +convex.cloud +convex.site + +// Coordination Center for TLD RU and XN--P1AI : https://cctld.ru/en/domains/domens_ru/reserved/ +// Submitted by George Georgievsky +ac.ru +edu.ru +gov.ru +int.ru +mil.ru + +// COSIMO GmbH : http://www.cosimo.de +// Submitted by Rene Marticke +dyn.cosidns.de +dnsupdater.de +dynamisches-dns.de +internet-dns.de +l-o-g-i-n.de +dynamic-dns.info +feste-ip.net +knx-server.net +static-access.net + +// Craft Docs Ltd : https://www.craft.do/ +// Submitted by Zsombor Fuszenecker +craft.me + +// Craynic, s.r.o. : http://www.craynic.com/ +// Submitted by Ales Krajnik +realm.cz + +// Crisp IM SAS : https://crisp.chat/ +// Submitted by Baptiste Jamin +on.crisp.email + +// Cryptonomic : https://cryptonomic.net/ +// Submitted by Andrew Cady +*.cryptonomic.net + +// cyber_Folks S.A. : https://cyberfolks.pl +// Submitted by Bartlomiej Kida +cfolks.pl + +// cyon GmbH : https://www.cyon.ch/ +// Submitted by Dominic Luechinger +cyon.link +cyon.site + +// Dansk.net : http://www.dansk.net/ +// Submitted by Anani Voule +biz.dk +co.dk +firm.dk +reg.dk +store.dk + +// dappnode.io : https://dappnode.io/ +// Submitted by Abel Boldu / DAppNode Team +dyndns.dappnode.io + +// Dark, Inc. : https://darklang.com +// Submitted by Paul Biggar +builtwithdark.com +darklang.io + +// DataDetect, LLC. : https://datadetect.com +// Submitted by Andrew Banchich +demo.datadetect.com +instance.datadetect.com + +// Datawire, Inc : https://www.datawire.io +// Submitted by Richard Li +edgestack.me + +// Datto, Inc. : https://www.datto.com/ +// Submitted by Philipp Heckel +dattolocal.com +dattorelay.com +dattoweb.com +mydatto.com +dattolocal.net +mydatto.net + +// ddnss.de : https://www.ddnss.de/ +// Submitted by Robert Niedziela +ddnss.de +dyn.ddnss.de +dyndns.ddnss.de +dyn-ip24.de +dyndns1.de +home-webserver.de +dyn.home-webserver.de +myhome-server.de +ddnss.org + +// Debian : https://www.debian.org/ +// Submitted by Peter Palfrader / Debian Sysadmin Team +debian.net + +// Definima : http://www.definima.com/ +// Submitted by Maxence Bitterli +definima.io +definima.net + +// Deno Land Inc : https://deno.com/ +// Submitted by Luca Casonato +deno.dev +deno-staging.dev +deno.net + +// deSEC : https://desec.io/ +// Submitted by Peter Thomassen +dedyn.io + +// Deta : https://www.deta.sh/ +// Submitted by Aavash Shrestha +deta.app +deta.dev + +// Dfinity Foundation: https://dfinity.org/ +// Submitted by Dfinity Team +caffeine.ai +id.ai +icp-api.io +icp0.io +*.raw.icp0.io +icp1.io +*.raw.icp1.io +caffeine.site + +// dhosting.pl Sp. z o.o. : https://dhosting.pl/ +// Submitted by Michal Kokoszkiewicz +dfirma.pl +dkonto.pl +you2.pl + +// DigitalOcean App Platform : https://www.digitalocean.com/products/app-platform/ +// Submitted by Braxton Huggins +ondigitalocean.app + +// DigitalOcean Spaces : https://www.digitalocean.com/products/spaces/ +// Submitted by Robin H. Johnson +*.digitaloceanspaces.com + +// DigitalPlat : https://www.digitalplat.org/ +// Submitted by Edward Hsing +qzz.io +us.kg +xx.kg +dpdns.org + +// Discord Inc : https://discord.com +// Submitted by Sahn Lam +discordsays.com +discordsez.com + +// DNS Africa Ltd : https://dns.business +// Submitted by Calvin Browne +jozi.biz + +// DNShome : https://www.dnshome.de/ +// Submitted by Norbert Auler +dnshome.de + +// DotArai : https://www.dotarai.com/ +// Submitted by Atsadawat Netcharadsang +online.th +shop.th + +// DrayTek Corp. : https://www.draytek.com/ +// Submitted by Paul Fang +drayddns.com + +// DreamCommerce : https://shoper.pl/ +// Submitted by Konrad Kotarba +shoparena.pl + // DreamHost : http://www.dreamhost.com/ -// Requested by Andrew Farmer 2012-10-02 +// Submitted by Andrew Farmer dreamhosters.com +// Dreamyoungs, Inc. : https://durumis.com +// Submitted by Infra Team +durumis.com + +// Drobo : http://www.drobo.com/ +// Submitted by Ricardo Padilha +mydrobo.com + +// DuckDNS : http://www.duckdns.org/ +// Submitted by Richard Harper +duckdns.org + +// dy.fi : http://dy.fi/ +// Submitted by Heikki Hannikainen +dy.fi +tunk.org + // DynDNS.com : http://www.dyndns.com/services/dns/dyndns/ +dyndns.biz +for-better.biz +for-more.biz +for-some.biz +for-the.biz +selfip.biz +webhop.biz +ftpaccess.cc +game-server.cc +myphotos.cc +scrapping.cc +blogdns.com +cechire.com +dnsalias.com +dnsdojo.com +doesntexist.com +dontexist.com +doomdns.com +dyn-o-saur.com +dynalias.com dyndns-at-home.com dyndns-at-work.com dyndns-blog.com @@ -6676,64 +12823,14 @@ dyndns-server.com dyndns-web.com dyndns-wiki.com dyndns-work.com -dyndns.biz -dyndns.info -dyndns.org -dyndns.tv -at-band-camp.net -ath.cx -barrel-of-knowledge.info -barrell-of-knowledge.info -better-than.tv -blogdns.com -blogdns.net -blogdns.org -blogsite.org -boldlygoingnowhere.org -broke-it.net -buyshouses.net -cechire.com -dnsalias.com -dnsalias.net -dnsalias.org -dnsdojo.com -dnsdojo.net -dnsdojo.org -does-it.net -doesntexist.com -doesntexist.org -dontexist.com -dontexist.net -dontexist.org -doomdns.com -doomdns.org -dvrdns.org -dyn-o-saur.com -dynalias.com -dynalias.net -dynalias.org -dynathome.net -dyndns.ws -endofinternet.net -endofinternet.org -endoftheinternet.org est-a-la-maison.com est-a-la-masion.com est-le-patron.com est-mon-blogueur.com -for-better.biz -for-more.biz -for-our.info -for-some.biz -for-the.biz -forgot.her.name -forgot.his.name from-ak.com from-al.com from-ar.com -from-az.net from-ca.com -from-co.net from-ct.com from-dc.com from-de.com @@ -6746,10 +12843,8 @@ from-il.com from-in.com from-ks.com from-ky.com -from-la.net from-ma.com from-md.com -from-me.org from-mi.com from-mn.com from-mo.com @@ -6762,7 +12857,6 @@ from-nh.com from-nj.com from-nm.com from-nv.com -from-ny.net from-oh.com from-ok.com from-or.com @@ -6779,46 +12873,19 @@ from-vt.com from-wa.com from-wi.com from-wv.com -from-wy.com -ftpaccess.cc -fuettertdasnetz.de -game-host.org -game-server.cc -getmyip.com -gets-it.net -go.dyndns.org -gotdns.com -gotdns.org -groks-the.info -groks-this.info -ham-radio-op.net -here-for-more.info +from-wy.com +getmyip.com +gotdns.com hobby-site.com -hobby-site.org -home.dyndns.org -homedns.org -homeftp.net -homeftp.org -homeip.net homelinux.com -homelinux.net -homelinux.org homeunix.com -homeunix.net -homeunix.org iamallama.com -in-the-band.net is-a-anarchist.com is-a-blogger.com is-a-bookkeeper.com -is-a-bruinsfan.org is-a-bulls-fan.com -is-a-candidate.org is-a-caterer.com -is-a-celticsfan.org is-a-chef.com -is-a-chef.net -is-a-chef.org is-a-conservative.com is-a-cpa.com is-a-cubicle-slave.com @@ -6827,31 +12894,25 @@ is-a-designer.com is-a-doctor.com is-a-financialadvisor.com is-a-geek.com -is-a-geek.net -is-a-geek.org is-a-green.com is-a-guru.com is-a-hard-worker.com is-a-hunter.com -is-a-knight.org is-a-landscaper.com is-a-lawyer.com is-a-liberal.com is-a-libertarian.com -is-a-linux-user.org is-a-llama.com is-a-musician.com is-a-nascarfan.com is-a-nurse.com is-a-painter.com -is-a-patsfan.org is-a-personaltrainer.com is-a-photographer.com is-a-player.com is-a-republican.com is-a-rockstar.com is-a-socialist.com -is-a-soxfan.org is-a-student.com is-a-teacher.com is-a-techie.com @@ -6863,183 +12924,2961 @@ is-an-anarchist.com is-an-artist.com is-an-engineer.com is-an-entertainer.com -is-by.us is-certified.com -is-found.org is-gone.com is-into-anime.com is-into-cars.com is-into-cartoons.com is-into-games.com is-leet.com -is-lost.org is-not-certified.com -is-saved.org is-slick.com is-uberleet.com -is-very-bad.org -is-very-evil.org -is-very-good.org -is-very-nice.org -is-very-sweet.org is-with-theband.com isa-geek.com -isa-geek.net -isa-geek.org isa-hockeynut.com issmarterthanyou.com +likes-pie.com +likescandy.com +neat-url.com +saves-the-whales.com +selfip.com +sells-for-less.com +sells-for-u.com +servebbs.com +simple-url.com +space-to-rent.com +teaches-yoga.com +writesthisblog.com +ath.cx +fuettertdasnetz.de isteingeek.de istmein.de -kicks-ass.net -kicks-ass.org -knowsitall.info -land-4-sale.us lebtimnetz.de leitungsen.de -likes-pie.com -likescandy.com +traeumtgerade.de +barrel-of-knowledge.info +barrell-of-knowledge.info +dyndns.info +for-our.info +groks-the.info +groks-this.info +here-for-more.info +knowsitall.info +selfip.info +webhop.info +forgot.her.name +forgot.his.name +at-band-camp.net +blogdns.net +broke-it.net +buyshouses.net +dnsalias.net +dnsdojo.net +does-it.net +dontexist.net +dynalias.net +dynathome.net +endofinternet.net +from-az.net +from-co.net +from-la.net +from-ny.net +gets-it.net +ham-radio-op.net +homeftp.net +homeip.net +homelinux.net +homeunix.net +in-the-band.net +is-a-chef.net +is-a-geek.net +isa-geek.net +kicks-ass.net +office-on-the.net +podzone.net +scrapper-site.net +selfip.net +sells-it.net +servebbs.net +serveftp.net +thruhere.net +webhop.net merseine.nu mine.nu +shacknet.nu +blogdns.org +blogsite.org +boldlygoingnowhere.org +dnsalias.org +dnsdojo.org +doesntexist.org +dontexist.org +doomdns.org +dvrdns.org +dynalias.org +dyndns.org +go.dyndns.org +home.dyndns.org +endofinternet.org +endoftheinternet.org +from-me.org +game-host.org +gotdns.org +hobby-site.org +homedns.org +homeftp.org +homelinux.org +homeunix.org +is-a-bruinsfan.org +is-a-candidate.org +is-a-celticsfan.org +is-a-chef.org +is-a-geek.org +is-a-knight.org +is-a-linux-user.org +is-a-patsfan.org +is-a-soxfan.org +is-found.org +is-lost.org +is-saved.org +is-very-bad.org +is-very-evil.org +is-very-good.org +is-very-nice.org +is-very-sweet.org +isa-geek.org +kicks-ass.org misconfused.org -mypets.ws -myphotos.cc -neat-url.com -office-on-the.net -on-the-web.tv -podzone.net podzone.org readmyblog.org -saves-the-whales.com -scrapper-site.net -scrapping.cc -selfip.biz -selfip.com -selfip.info -selfip.net selfip.org -sells-for-less.com -sells-for-u.com -sells-it.net sellsyourhome.org -servebbs.com -servebbs.net servebbs.org -serveftp.net serveftp.org servegame.org -shacknet.nu -simple-url.com -space-to-rent.com stuff-4-sale.org -stuff-4-sale.us -teaches-yoga.com -thruhere.net -traeumtgerade.de -webhop.biz -webhop.info -webhop.net webhop.org +better-than.tv +dyndns.tv +on-the-web.tv worse-than.tv -writesthisblog.com +is-by.us +land-4-sale.us +stuff-4-sale.us +dyndns.ws +mypets.ws -// Fastly Inc. http://www.fastly.com/ -// Requested by Vladimir Vuksan 2013-05-31 +// Dynu.com : https://www.dynu.com/ +// Submitted by Sue Ye +ddnsfree.com +ddnsgeek.com +giize.com +gleeze.com +kozow.com +loseyourip.com +ooguy.com +theworkpc.com +casacam.net +dynu.net +accesscam.org +camdvr.org +freeddns.org +mywire.org +webredirect.org +myddns.rocks + +// dynv6 : https://dynv6.com +// Submitted by Dominik Menke +dynv6.net + +// E4YOU spol. s.r.o. : https://e4you.cz/ +// Submitted by Vladimir Dudr +e4.cz + +// Easypanel : https://easypanel.io +// Submitted by Andrei Canta +easypanel.app +easypanel.host + +// EasyWP : https://www.easywp.com +// Submitted by +*.ewp.live + +// eDirect Corp. : https://hosting.url.com.tw/ +// Submitted by C.S. chang +twmail.cc +twmail.net +twmail.org +mymailer.com.tw +url.tw + +// Electromagnetic Field : https://www.emfcamp.org +// Submitted by +at.emf.camp + +// Elefunc, Inc. : https://elefunc.com +// Submitted by Cetin Sert +rt.ht + +// Elementor : Elementor Ltd. +// Submitted by Anton Barkan +elementor.cloud +elementor.cool + +// En root‽ : https://en-root.org +// Submitted by Emmanuel Raviart +en-root.fr + +// Enalean SAS : https://www.enalean.com +// Submitted by Enalean Security Team +mytuleap.com +tuleap-partners.com + +// Encoretivity AB : https://encore.cloud +// Submitted by André Eriksson +encr.app +frontend.encr.app +encoreapi.com +lp.dev +api.lp.dev +objects.lp.dev + +// encoway GmbH : https://www.encoway.de +// Submitted by Marcel Daus +eu.encoway.cloud + +// EU.org : https://eu.org/ +// Submitted by Pierre Beyssac +eu.org +al.eu.org +asso.eu.org +at.eu.org +au.eu.org +be.eu.org +bg.eu.org +ca.eu.org +cd.eu.org +ch.eu.org +cn.eu.org +cy.eu.org +cz.eu.org +de.eu.org +dk.eu.org +edu.eu.org +ee.eu.org +es.eu.org +fi.eu.org +fr.eu.org +gr.eu.org +hr.eu.org +hu.eu.org +ie.eu.org +il.eu.org +in.eu.org +int.eu.org +is.eu.org +it.eu.org +jp.eu.org +kr.eu.org +lt.eu.org +lu.eu.org +lv.eu.org +me.eu.org +mk.eu.org +mt.eu.org +my.eu.org +net.eu.org +ng.eu.org +nl.eu.org +no.eu.org +nz.eu.org +pl.eu.org +pt.eu.org +ro.eu.org +ru.eu.org +se.eu.org +si.eu.org +sk.eu.org +tr.eu.org +uk.eu.org +us.eu.org + +// Eurobyte : https://eurobyte.ru +// Submitted by Evgeniy Subbotin +eurodir.ru + +// Evennode : http://www.evennode.com/ +// Submitted by Michal Kralik +eu-1.evennode.com +eu-2.evennode.com +eu-3.evennode.com +eu-4.evennode.com +us-1.evennode.com +us-2.evennode.com +us-3.evennode.com +us-4.evennode.com + +// Evervault : https://evervault.com +// Submitted by Hannah Neary +relay.evervault.app +relay.evervault.dev + +// Expo : https://expo.dev/ +// Submitted by James Ide +expo.app +staging.expo.app + +// Fabrica Technologies, Inc. : https://www.fabrica.dev/ +// Submitted by Eric Jiang +onfabrica.com + +// FAITID : https://faitid.org/ +// Submitted by Maxim Alzoba +// https://www.flexireg.net/stat_info +ru.net +adygeya.ru +bashkiria.ru +bir.ru +cbg.ru +com.ru +dagestan.ru +grozny.ru +kalmykia.ru +kustanai.ru +marine.ru +mordovia.ru +msk.ru +mytis.ru +nalchik.ru +nov.ru +pyatigorsk.ru +spb.ru +vladikavkaz.ru +vladimir.ru +abkhazia.su +adygeya.su +aktyubinsk.su +arkhangelsk.su +armenia.su +ashgabad.su +azerbaijan.su +balashov.su +bashkiria.su +bryansk.su +bukhara.su +chimkent.su +dagestan.su +east-kazakhstan.su +exnet.su +georgia.su +grozny.su +ivanovo.su +jambyl.su +kalmykia.su +kaluga.su +karacol.su +karaganda.su +karelia.su +khakassia.su +krasnodar.su +kurgan.su +kustanai.su +lenug.su +mangyshlak.su +mordovia.su +msk.su +murmansk.su +nalchik.su +navoi.su +north-kazakhstan.su +nov.su +obninsk.su +penza.su +pokrovsk.su +sochi.su +spb.su +tashkent.su +termez.su +togliatti.su +troitsk.su +tselinograd.su +tula.su +tuva.su +vladikavkaz.su +vladimir.su +vologda.su + +// Fancy Bits, LLC : http://getchannels.com +// Submitted by Aman Gupta +channelsdvr.net +u.channelsdvr.net + +// Fastly Inc. : http://www.fastly.com/ +// Submitted by Fastly Security +edgecompute.app +fastly-edge.com +fastly-terrarium.com +freetls.fastly.net +map.fastly.net +a.prod.fastly.net +global.prod.fastly.net a.ssl.fastly.net b.ssl.fastly.net global.ssl.fastly.net -a.prod.fastly.net -global.prod.fastly.net +fastlylb.net +map.fastlylb.net + +// Fastmail : https://www.fastmail.com/ +// Submitted by Marc Bradshaw +*.user.fm + +// FASTVPS EESTI OU : https://fastvps.ru/ +// Submitted by Likhachev Vasiliy +fastvps-server.com +fastvps.host +myfast.host +fastvps.site +myfast.space + +// FearWorks Media Ltd. : https://fearworksmedia.co.uk +// Submitted by Keith Fairley +conn.uk +copro.uk +hosp.uk + +// Fedora : https://fedoraproject.org/ +// Submitted by Patrick Uiterwijk +fedorainfracloud.org +fedorapeople.org +cloud.fedoraproject.org +app.os.fedoraproject.org +app.os.stg.fedoraproject.org + +// Fermax : https://fermax.com/ +// Submitted by Koen Van Isterdael +mydobiss.com + +// FH Muenster : https://www.fh-muenster.de +// Submitted by Robin Naundorf +fh-muenster.io + +// Figma : https://www.figma.com +// Submitted by Nick Frost +figma.site +preview.site + +// Filegear Inc. : https://www.filegear.com +// Submitted by Jason Zhu +filegear.me + +// Firebase, Inc. +// Submitted by Chris Raynor +firebaseapp.com + +// FlashDrive : https://flashdrive.io +// Submitted by Eric Chan +fldrv.com + +// Fleek Labs Inc : https://fleek.xyz +// Submitted by Parsa Ghadimi +on-fleek.app + +// FlutterFlow : https://flutterflow.io +// Submitted by Anton Emelyanov +flutterflow.app + +// fly.io : https://fly.io +// Submitted by Kurt Mackey +fly.dev +shw.io +edgeapp.net + +// Forgerock : https://www.forgerock.com +// Submitted by Roderick Parr +forgeblocks.com +id.forgerock.io + +// FoundryLabs, Inc : https://e2b.dev/ +// Submitted by Jiri Sveceny +e2b.app + +// Framer : https://www.framer.com +// Submitted by Koen Rouwhorst +framer.ai +framer.app +framercanvas.com +framer.media +framer.photos +framer.website +framer.wiki + +// Frederik Braun : https://frederik-braun.com +// Submitted by Frederik Braun +*.0e.vc + +// Freebox : http://www.freebox.fr +// Submitted by Romain Fliedel +freebox-os.com +freeboxos.com +fbx-os.fr +fbxos.fr +freebox-os.fr +freeboxos.fr + +// freedesktop.org : https://www.freedesktop.org +// Submitted by Daniel Stone +freedesktop.org + +// freemyip.com : https://freemyip.com +// Submitted by Cadence +freemyip.com + +// Frusky MEDIA&PR : https://www.frusky.de +// Submitted by Victor Pupynin +*.frusky.de + +// FunkFeuer - Verein zur Förderung freier Netze : https://www.funkfeuer.at +// Submitted by Daniel A. Maierhofer +wien.funkfeuer.at + +// Future Versatile Group. : https://www.fvg-on.net/ +// T.Kabu +daemon.asia +dix.asia +mydns.bz +0am.jp +0g0.jp +0j0.jp +0t0.jp +mydns.jp +pgw.jp +wjg.jp +keyword-on.net +live-on.net +server-on.net +mydns.tw +mydns.vc + +// Futureweb GmbH : https://www.futureweb.at +// Submitted by Andreas Schnederle-Wagner +*.futurecms.at +*.ex.futurecms.at +*.in.futurecms.at +futurehosting.at +futuremailing.at +*.ex.ortsinfo.at +*.kunden.ortsinfo.at +*.statics.cloud + +// GCom Internet : https://www.gcom.net.au +// Submitted by Leo Julius +aliases121.com + +// GDS : https://www.gov.uk/service-manual/technology/managing-domain-names +// Submitted by Stephen Ford +campaign.gov.uk +service.gov.uk +independent-commission.uk +independent-inquest.uk +independent-inquiry.uk +independent-panel.uk +independent-review.uk +public-inquiry.uk +royal-commission.uk + +// Gehirn Inc. : https://www.gehirn.co.jp/ +// Submitted by Kohei YOSHIDA +gehirn.ne.jp +usercontent.jp + +// Gentlent, Inc. : https://www.gentlent.com +// Submitted by Tom Klein +gentapps.com +gentlentapis.com +cdn-edges.net + +// GignoSystemJapan : http://gsj.bz +// Submitted by GignoSystemJapan +gsj.bz // GitHub, Inc. -// Requested by Ben Toews 2013-04-18 +// Submitted by Patrick Toomey +github.app +githubusercontent.com +githubpreview.dev github.io -// GlobeHosting, Inc. -// Requested by Zoltan Egresi 2013-07-12 -ro.com +// GitLab, Inc. : https://about.gitlab.com/ +// Submitted by Alex Hanselka +gitlab.io + +// Gitplac.si : https://gitplac.si +// Submitted by Aljaž Starc +gitapp.si +gitpage.si + +// Glitch, Inc : https://glitch.com +// Submitted by Mads Hartmann +glitch.me + +// Global NOG Alliance : https://nogalliance.org/ +// Submitted by Sander Steffann +nog.community + +// Globe Hosting SRL : https://www.globehosting.com/ +// Submitted by Gavin Brown +co.ro +shop.ro + +// GMO Pepabo, Inc. : https://pepabo.com/ +// Submitted by Hosting Div +lolipop.io +angry.jp +babyblue.jp +babymilk.jp +backdrop.jp +bambina.jp +bitter.jp +blush.jp +boo.jp +boy.jp +boyfriend.jp +but.jp +candypop.jp +capoo.jp +catfood.jp +cheap.jp +chicappa.jp +chillout.jp +chips.jp +chowder.jp +chu.jp +ciao.jp +cocotte.jp +coolblog.jp +cranky.jp +cutegirl.jp +daa.jp +deca.jp +deci.jp +digick.jp +egoism.jp +fakefur.jp +fem.jp +flier.jp +floppy.jp +fool.jp +frenchkiss.jp +girlfriend.jp +girly.jp +gloomy.jp +gonna.jp +greater.jp +hacca.jp +heavy.jp +her.jp +hiho.jp +hippy.jp +holy.jp +hungry.jp +icurus.jp +itigo.jp +jellybean.jp +kikirara.jp +kill.jp +kilo.jp +kuron.jp +littlestar.jp +lolipopmc.jp +lolitapunk.jp +lomo.jp +lovepop.jp +lovesick.jp +main.jp +mods.jp +mond.jp +mongolian.jp +moo.jp +namaste.jp +nikita.jp +nobushi.jp +noor.jp +oops.jp +parallel.jp +parasite.jp +pecori.jp +peewee.jp +penne.jp +pepper.jp +perma.jp +pigboat.jp +pinoko.jp +punyu.jp +pupu.jp +pussycat.jp +pya.jp +raindrop.jp +readymade.jp +sadist.jp +schoolbus.jp +secret.jp +staba.jp +stripper.jp +sub.jp +sunnyday.jp +thick.jp +tonkotsu.jp +under.jp +upper.jp +velvet.jp +verse.jp +versus.jp +vivian.jp +watson.jp +weblike.jp +whitesnow.jp +zombie.jp +heteml.net + +// GoDaddy Registry : https://registry.godaddy +// Submitted by Rohan Durrant +graphic.design + +// GoIP DNS Services : http://www.goip.de +// Submitted by Christian Poulter +goip.de // Google, Inc. -// Requested by Eduardo Vela 2012-10-24 +// Submitted by Shannon McCabe +*.hosted.app +*.run.app +*.mtls.run.app +web.app +*.0emm.com appspot.com -blogspot.be -blogspot.bj -blogspot.ca -blogspot.cf -blogspot.ch -blogspot.co.at -blogspot.co.il -blogspot.co.nz -blogspot.co.uk +*.r.appspot.com blogspot.com -blogspot.com.ar -blogspot.com.au -blogspot.com.br -blogspot.com.es -blogspot.cv -blogspot.cz -blogspot.de -blogspot.dk -blogspot.fi -blogspot.fr -blogspot.gr -blogspot.hk -blogspot.hu -blogspot.ie -blogspot.in -blogspot.it -blogspot.jp -blogspot.kr -blogspot.mr -blogspot.mx -blogspot.nl -blogspot.no -blogspot.pt -blogspot.re -blogspot.ro -blogspot.se -blogspot.sg -blogspot.sk -blogspot.td -blogspot.tw codespot.com googleapis.com googlecode.com +pagespeedmobilizer.com +withgoogle.com +withyoutube.com +*.gateway.dev +cloud.goog +translate.goog +*.usercontent.goog +cloudfunctions.net + +// Goupile : https://goupile.fr +// Submitted by Niels Martignene +goupile.fr + +// GOV.UK Pay : https://www.payments.service.gov.uk/ +// Submitted by Richard Baker +pymnt.uk + +// GOV.UK Platform as a Service : https://www.cloud.service.gov.uk/ +// Submitted by Tom Whitwell +cloudapps.digital +london.cloudapps.digital + +// Government of the Netherlands : https://www.government.nl +// Submitted by +gov.nl + +// Grafana Labs : https://grafana.com/ +// Submitted by Platform Engineering +grafana-dev.net + +// GrayJay Web Solutions Inc. : https://grayjaysports.ca +// Submitted by Matt Yamkowy +grayjayleagues.com + +// GünstigBestellen : https://günstigbestellen.de +// Submitted by Furkan Akkoc +günstigbestellen.de +günstigliefern.de + +// Häkkinen.fi : https://www.häkkinen.fi/ +// Submitted by Eero Häkkinen +häkkinen.fi + +// Hashbang : https://hashbang.sh +hashbang.sh + +// Hasura : https://hasura.io +// Submitted by Shahidh K Muhammed +hasura.app +hasura-app.io + +// Hatena Co., Ltd. : https://hatena.co.jp +// Submitted by Masato Nakamura +hatenablog.com +hatenadiary.com +hateblo.jp +hatenablog.jp +hatenadiary.jp +hatenadiary.org + +// Heilbronn University of Applied Sciences - Faculty Informatics (GitLab Pages) : https://www.hs-heilbronn.de +// Submitted by Richard Zowalla +pages.it.hs-heilbronn.de +pages-research.it.hs-heilbronn.de + +// HeiyuSpace : https://lazycat.cloud +// Submitted by Xia Bin +heiyu.space + +// Helio Networks : https://heliohost.org +// Submitted by Ben Frede +helioho.st +heliohost.us + +// Hepforge : https://www.hepforge.org +// Submitted by David Grellscheid +hepforge.org // Heroku : https://www.heroku.com/ -// Requested by Tom Maher 2013-05-02 +// Submitted by Shumon Huque herokuapp.com -herokussl.com + +// Heyflow : https://www.heyflow.com +// Submitted by Mirko Nitschke +heyflow.page +heyflow.site + +// Hibernating Rhinos +// Submitted by Oren Eini +ravendb.cloud +ravendb.community +development.run +ravendb.run + +// home.pl S.A. : https://home.pl +// Submitted by Krzysztof Wolski +homesklep.pl + +// Homebase : https://homebase.id/ +// Submitted by Jason Babo +*.kin.one +*.id.pub +*.kin.pub + +// Hoplix : https://www.hoplix.com +// Submitted by Danilo De Franco +hoplix.shop + +// HOSTBIP REGISTRY : https://www.hostbip.com/ +// Submitted by Atanunu Igbunuroghene +orx.biz +biz.gl +biz.ng +co.biz.ng +dl.biz.ng +go.biz.ng +lg.biz.ng +on.biz.ng +col.ng +firm.ng +gen.ng +ltd.ng +ngo.ng +plc.ng + +// HostyHosting : https://hostyhosting.com +hostyhosting.io + +// Hugging Face : https://huggingface.co +// Submitted by Eliott Coyac +hf.space +static.hf.space + +// Hypernode B.V. : https://www.hypernode.com/ +// Submitted by Cipriano Groenendal +hypernode.io + +// I-O DATA DEVICE, INC. : http://www.iodata.com/ +// Submitted by Yuji Minagawa +iobb.net + +// i-registry s.r.o. : http://www.i-registry.cz/ +// Submitted by Martin Semrad +co.cz + +// Ici la Lune : http://www.icilalune.com/ +// Submitted by Simon Morvan +*.moonscale.io +moonscale.net + +// iDOT Services Limited : http://www.domain.gr.com +// Submitted by Gavin Brown +gr.com // iki.fi -// Requested by Hannu Aronsson 2009-11-05 +// Submitted by Hannu Aronsson iki.fi +// iliad italia : https://www.iliad.it +// Submitted by Marios Makassikis +ibxos.it +iliadboxos.it + +// Incsub, LLC : https://incsub.com/ +// Submitted by Aaron Edwards +smushcdn.com +wphostedmail.com +wpmucdn.com +tempurl.host +wpmudev.host + +// Individual Network Berlin e.V. : https://www.in-berlin.de/ +// Submitted by Christian Seitz +dyn-berlin.de +in-berlin.de +in-brb.de +in-butter.de +in-dsl.de +in-vpn.de +in-dsl.net +in-vpn.net +in-dsl.org +in-vpn.org + +// Inferno Communications : https://inferno.co.uk +// Submitted by Connor McFarlane +oninferno.net + // info.at : http://www.info.at/ biz.at info.at +// info.cx : http://info.cx +// Submitted by June Slater +info.cx + +// Interlegis : http://www.interlegis.leg.br +// Submitted by Gabriel Ferreira +ac.leg.br +al.leg.br +am.leg.br +ap.leg.br +ba.leg.br +ce.leg.br +df.leg.br +es.leg.br +go.leg.br +ma.leg.br +mg.leg.br +ms.leg.br +mt.leg.br +pa.leg.br +pb.leg.br +pe.leg.br +pi.leg.br +pr.leg.br +rj.leg.br +rn.leg.br +ro.leg.br +rr.leg.br +rs.leg.br +sc.leg.br +se.leg.br +sp.leg.br +to.leg.br + +// intermetrics GmbH : https://pixolino.com/ +// Submitted by Wolfgang Schwarz +pixolino.com + +// Internet-Pro, LLP : https://netangels.ru/ +// Submitted by Vasiliy Sheredeko +na4u.ru + +// Inventor Services : https://inventor.gg/ +// Submitted by Inventor Team +botdash.app +botdash.dev +botdash.gg +botdash.net +botda.sh +botdash.xyz + +// IONOS SE : https://www.ionos.com/ +// IONOS Group SE : https://www.ionos-group.com/ +// Submitted by Henrik Willert +apps-1and1.com +live-website.com +webspace-host.com +apps-1and1.net +websitebuilder.online +app-ionos.space + +// iopsys software solutions AB : https://iopsys.eu/ +// Submitted by Roman Azarenko +iopsys.se + +// IPFS Project : https://ipfs.tech/ +// Submitted by Interplanetary Shipyard +*.inbrowser.dev +*.dweb.link +*.inbrowser.link + +// IPiFony Systems, Inc. : https://www.ipifony.com/ +// Submitted by Matthew Hardeman +ipifony.net + +// ir.md : https://nic.ir.md +// Submitted by Ali Soizi +ir.md + +// is-a-good.dev : https://is-a-good.dev +// Submitted by William Harrison +is-a-good.dev + +// is-a.dev : https://is-a.dev +// Submitted by William Harrison +is-a.dev + +// IServ GmbH : https://iserv.de +// Submitted by Kim Brodowski +iservschule.de +mein-iserv.de +schuldock.de +schulplattform.de +schulserver.de +test-iserv.de +iserv.dev +iserv.host + +// Jelastic, Inc. : https://jelastic.com/ +// Submitted by Ihor Kolodyuk +mel.cloudlets.com.au +cloud.interhostsolutions.be +alp1.ae.flow.ch +appengine.flow.ch +es-1.axarnet.cloud +diadem.cloud +vip.jelastic.cloud +jele.cloud +it1.eur.aruba.jenv-aruba.cloud +it1.jenv-aruba.cloud +keliweb.cloud +cs.keliweb.cloud +oxa.cloud +tn.oxa.cloud +uk.oxa.cloud +primetel.cloud +uk.primetel.cloud +ca.reclaim.cloud +uk.reclaim.cloud +us.reclaim.cloud +ch.trendhosting.cloud +de.trendhosting.cloud +jele.club +dopaas.com +paas.hosted-by-previder.com +rag-cloud.hosteur.com +rag-cloud-ch.hosteur.com +jcloud.ik-server.com +jcloud-ver-jpc.ik-server.com +demo.jelastic.com +paas.massivegrid.com +jed.wafaicloud.com +ryd.wafaicloud.com +j.scaleforce.com.cy +jelastic.dogado.eu +fi.cloudplatform.fi +demo.datacenter.fi +paas.datacenter.fi +jele.host +mircloud.host +paas.beebyte.io +sekd1.beebyteapp.io +jele.io +jc.neen.it +jcloud.kz +cloudjiffy.net +fra1-de.cloudjiffy.net +west1-us.cloudjiffy.net +jls-sto1.elastx.net +jls-sto2.elastx.net +jls-sto3.elastx.net +fr-1.paas.massivegrid.net +lon-1.paas.massivegrid.net +lon-2.paas.massivegrid.net +ny-1.paas.massivegrid.net +ny-2.paas.massivegrid.net +sg-1.paas.massivegrid.net +jelastic.saveincloud.net +nordeste-idc.saveincloud.net +j.scaleforce.net +sdscloud.pl +unicloud.pl +mircloud.ru +enscaled.sg +jele.site +jelastic.team +orangecloud.tn +j.layershift.co.uk +phx.enscaled.us +mircloud.us + +// Jino : https://www.jino.ru +// Submitted by Sergey Ulyashin +myjino.ru +*.hosting.myjino.ru +*.landing.myjino.ru +*.spectrum.myjino.ru +*.vps.myjino.ru + +// Jotelulu S.L. : https://jotelulu.com +// Submitted by Daniel Fariña +jotelulu.cloud + +// JouwWeb B.V. : https://www.jouwweb.nl +// Submitted by Camilo Sperberg +webadorsite.com +jouwweb.site + +// Joyent : https://www.joyent.com/ +// Submitted by Brian Bennett +*.cns.joyent.com +*.triton.zone + +// JS.ORG : http://dns.js.org +// Submitted by Stefan Keim +js.org + +// KaasHosting : http://www.kaashosting.nl/ +// Submitted by Wouter Bakker +kaas.gg +khplay.nl + +// Kapsi : https://kapsi.fi +// Submitted by Tomi Juntunen +kapsi.fi + +// Katholieke Universiteit Leuven : https://www.kuleuven.be +// Submitted by Abuse KU Leuven +ezproxy.kuleuven.be +kuleuven.cloud + +// Keyweb AG : https://www.keyweb.de +// Submitted by Martin Dannehl +keymachine.de + +// KingHost : https://king.host +// Submitted by Felipe Keller Braz +kinghost.net +uni5.net + +// KnightPoint Systems, LLC : http://www.knightpoint.com/ +// Submitted by Roy Keene +knightpoint.systems + +// KoobinEvent, SL : https://www.koobin.com +// Submitted by Iván Oliva +koobin.events + +// Krellian Ltd. : https://krellian.com +// Submitted by Ben Francis +webthings.io +krellian.net + +// KUROKU LTD : https://kuroku.ltd/ +// Submitted by DisposaBoy +oya.to + +// Laravel Holdings, Inc. : https://laravel.com +// Submitted by André Valentin +laravel.cloud + +// LCube - Professional hosting e.K. : https://www.lcube-webhosting.de +// Submitted by Lars Laehn +git-repos.de +lcube-server.de +svn-repos.de + +// Leadpages : https://www.leadpages.net +// Submitted by Greg Dallavalle +leadpages.co +lpages.co +lpusercontent.com + +// Liara : https://liara.ir +// Submitted by Amirhossein Badinloo +liara.run +iran.liara.run + +// libp2p project : https://libp2p.io +// Submitted by Interplanetary Shipyard +libp2p.direct + +// Libre IT Ltd : https://libre.nz +// Submitted by Tomas Maggio +runcontainers.dev + +// Lifetime Hosting : https://Lifetime.Hosting/ +// Submitted by Mike Fillator +co.business +co.education +co.events +co.financial +co.network +co.place +co.technology + +// linkyard ldt : https://www.linkyard.ch/ +// Submitted by Mario Siegenthaler +linkyard-cloud.ch +linkyard.cloud + +// Linode : https://linode.com +// Submitted by +members.linode.com +*.nodebalancer.linode.com +*.linodeobjects.com +ip.linodeusercontent.com + +// LiquidNet Ltd : http://www.liquidnetlimited.com/ +// Submitted by Victor Velchev +we.bs + +// Listen53 : https://www.l53.net +// Submitted by Gerry Keh +filegear-sg.me +ggff.net + +// Localcert : https://localcert.dev +// Submitted by Lann Martin +*.user.localcert.dev + +// LocalCert : https://localcert.net +// Submitted by William Harrison +localcert.net + +// Localtonet : https://localtonet.com/ +// Submitted by Burak Isleyici +localtonet.com +*.localto.net + +// Lodz University of Technology LODMAN regional domains : https://www.man.lodz.pl/dns +// Submitted by Piotr Wilk +lodz.pl +pabianice.pl +plock.pl +sieradz.pl +skierniewice.pl +zgierz.pl + +// Log'in Line : https://www.loginline.com/ +// Submitted by Rémi Mach +loginline.app +loginline.dev +loginline.io +loginline.services +loginline.site + +// Lõhmus Family, The : https://lohmus.me/ +// Submitted by Heiki Lõhmus +lohmus.me + +// Lokalized : https://lokalized.nl +// Submitted by Noah Taheij +servers.run + +// Lovable : https://lovable.dev +// Submitted by Fabian Hedin +lovable.app +lovableproject.com + +// LubMAN UMCS Sp. z o.o : https://lubman.pl/ +// Submitted by Ireneusz Maliszewski +krasnik.pl +leczna.pl +lubartow.pl +lublin.pl +poniatowa.pl +swidnik.pl + +// Lug.org.uk : https://lug.org.uk +// Submitted by Jon Spriggs +glug.org.uk +lug.org.uk +lugs.org.uk + +// Lukanet Ltd : https://lukanet.com +// Submitted by Anton Avramov +barsy.bg +barsy.club +barsycenter.com +barsyonline.com +barsy.de +barsy.dev +barsy.eu +barsy.gr +barsy.in +barsy.info +barsy.io +barsy.me +barsy.menu +barsyonline.menu +barsy.mobi +barsy.net +barsy.online +barsy.org +barsy.pro +barsy.pub +barsy.ro +barsy.rs +barsy.shop +barsyonline.shop +barsy.site +barsy.store +barsy.support +barsy.uk +barsy.co.uk +barsyonline.co.uk + +// Lutra : https://lutra.ai +// Submitted by Joshua Newman +*.lutrausercontent.com + +// Luyani Inc. : https://luyani.com/ +// Submitted by Umut Gumeli +luyani.app +luyani.net + +// Magento Commerce +// Submitted by Damien Tournoud +*.magentosite.cloud + +// Mail.Ru Group : https://hb.cldmail.ru +// Submitted by Ilya Zaretskiy +hb.cldmail.ru + +// MathWorks : https://www.mathworks.com/ +// Submitted by Emily Reed +matlab.cloud +modelscape.com +mwcloudnonprod.com +polyspace.com + +// May First - People Link : https://mayfirst.org/ +// Submitted by Jamie McClelland +mayfirst.info +mayfirst.org + +// Maze Play : https://www.mazeplay.com +// Submitted by Adam Humpherys +mazeplay.com + +// McHost : https://mchost.ru +// Submitted by Evgeniy Subbotin +mcdir.me +mcdir.ru +vps.mcdir.ru +mcpre.ru + +// Mediatech : https://mediatech.by +// Submitted by Evgeniy Kozhuhovskiy +mediatech.by +mediatech.dev + +// Medicom Health : https://medicomhealth.com +// Submitted by Michael Olson +hra.health + +// MedusaJS, Inc : https://medusajs.com/ +// Submitted by Stevche Radevski +medusajs.app + +// Memset hosting : https://www.memset.com +// Submitted by Tom Whitwell +miniserver.com +memset.net + +// Messerli Informatik AG : https://www.messerli.ch/ +// Submitted by Ruben Schmidmeister +messerli.app + +// Meta Platforms, Inc. : https://meta.com/ +// Submitted by Jacob Cordero +atmeta.com +apps.fbsbx.com + +// MetaCentrum, CESNET z.s.p.o. : https://www.metacentrum.cz/en/ +// Submitted by Zdeněk Šustr and Radim Janča +*.cloud.metacentrum.cz +custom.metacentrum.cz +flt.cloud.muni.cz +usr.cloud.muni.cz + +// Meteor Development Group : https://www.meteor.com/hosting +// Submitted by Pierre Carrier +meteorapp.com +eu.meteorapp.com + // Michau Enterprises Limited : http://www.co.pl/ co.pl -// NYC.mn : http://www.information.nyc.mn -// Requested by Matthew Brown 2013-03-11 +// Microsoft Corporation : http://microsoft.com +// Submitted by Public Suffix List Admin +// Managed by Corporate Domains +// Microsoft Azure : https://home.azure +*.azurecontainer.io +azure-api.net +azure-mobile.net +azureedge.net +azurefd.net +azurestaticapps.net +1.azurestaticapps.net +2.azurestaticapps.net +3.azurestaticapps.net +4.azurestaticapps.net +5.azurestaticapps.net +6.azurestaticapps.net +7.azurestaticapps.net +centralus.azurestaticapps.net +eastasia.azurestaticapps.net +eastus2.azurestaticapps.net +westeurope.azurestaticapps.net +westus2.azurestaticapps.net +azurewebsites.net +cloudapp.net +trafficmanager.net +blob.core.windows.net +servicebus.windows.net + +// MikroTik : https://mikrotik.com +// Submitted by MikroTik SysAdmin Team +routingthecloud.com +sn.mynetname.net +routingthecloud.net +routingthecloud.org + +// minion.systems : http://minion.systems +// Submitted by Robert Böttinger +csx.cc + +// Mittwald CM Service GmbH & Co. KG : https://mittwald.de +// Submitted by Marco Rieger +mydbserver.com +webspaceconfig.de +mittwald.info +mittwaldserver.info +typo3server.info +project.space + +// MODX Systems LLC : https://modx.com +// Submitted by Elizabeth Southwell +modx.dev + +// Mozilla Foundation : https://mozilla.org/ +// Submitted by glob +bmoattachments.org + +// MSK-IX : https://www.msk-ix.ru/ +// Submitted by Khannanov Roman +net.ru +org.ru +pp.ru + +// Mythic Beasts : https://www.mythic-beasts.com +// Submitted by Paul Cammish +hostedpi.com +caracal.mythic-beasts.com +customer.mythic-beasts.com +fentiger.mythic-beasts.com +lynx.mythic-beasts.com +ocelot.mythic-beasts.com +oncilla.mythic-beasts.com +onza.mythic-beasts.com +sphinx.mythic-beasts.com +vs.mythic-beasts.com +x.mythic-beasts.com +yali.mythic-beasts.com +cust.retrosnub.co.uk + +// Nabu Casa : https://www.nabucasa.com +// Submitted by Paulus Schoutsen +ui.nabu.casa + +// Net at Work Gmbh : https://www.netatwork.de +// Submitted by Jan Jaeschke +cloud.nospamproxy.com +o365.cloud.nospamproxy.com + +// Net libre : https://www.netlib.re +// Submitted by Philippe PITTOLI +netlib.re + +// Netfy Domains : https://netfy.domains +// Submitted by Suranga Ranasinghe +netfy.app + +// Netlify : https://www.netlify.com +// Submitted by Jessica Parsons +netlify.app + +// Neustar Inc. +// Submitted by Trung Tran +4u.com + +// NFSN, Inc. : https://www.NearlyFreeSpeech.NET/ +// Submitted by Jeff Wheelhouse +nfshost.com + +// NFT.Storage : https://nft.storage/ +// Submitted by Vasco Santos or +ipfs.nftstorage.link + +// NGO.US Registry : https://nic.ngo.us +// Submitted by Alstra Solutions Ltd. Networking Team +ngo.us + +// ngrok : https://ngrok.com/ +// Submitted by Alan Shreve +ngrok.app +ngrok-free.app +ngrok.dev +ngrok-free.dev +ngrok.io +ap.ngrok.io +au.ngrok.io +eu.ngrok.io +in.ngrok.io +jp.ngrok.io +sa.ngrok.io +us.ngrok.io +ngrok.pizza +ngrok.pro + +// Nicolaus Copernicus University in Torun - MSK TORMAN : https://www.man.torun.pl +torun.pl + +// Nimbus Hosting Ltd. : https://www.nimbushosting.co.uk/ +// Submitted by Nicholas Ford +nh-serv.co.uk +nimsite.uk + +// No-IP.com : https://noip.com/ +// Submitted by Deven Reza +mmafan.biz +myftp.biz +no-ip.biz +no-ip.ca +fantasyleague.cc +gotdns.ch +3utilities.com +blogsyte.com +ciscofreak.com +damnserver.com +ddnsking.com +ditchyourip.com +dnsiskinky.com +dynns.com +geekgalaxy.com +health-carereform.com +homesecuritymac.com +homesecuritypc.com +myactivedirectory.com +mysecuritycamera.com +myvnc.com +net-freaks.com +onthewifi.com +point2this.com +quicksytes.com +securitytactics.com +servebeer.com +servecounterstrike.com +serveexchange.com +serveftp.com +servegame.com +servehalflife.com +servehttp.com +servehumour.com +serveirc.com +servemp3.com +servep2p.com +servepics.com +servequake.com +servesarcasm.com +stufftoread.com +unusualperson.com +workisboring.com +dvrcam.info +ilovecollege.info +no-ip.info +brasilia.me +ddns.me +dnsfor.me +hopto.me +loginto.me +noip.me +webhop.me +bounceme.net +ddns.net +eating-organic.net +mydissent.net +myeffect.net +mymediapc.net +mypsx.net +mysecuritycamera.net +nhlfan.net +no-ip.net +pgafan.net +privatizehealthinsurance.net +redirectme.net +serveblog.net +serveminecraft.net +sytes.net +cable-modem.org +collegefan.org +couchpotatofries.org +hopto.org +mlbfan.org +myftp.org +mysecuritycamera.org +nflfan.org +no-ip.org +read-books.org +ufcfan.org +zapto.org +no-ip.co.uk +golffan.us +noip.us +pointto.us + +// NodeArt : https://nodeart.io +// Submitted by Konstantin Nosov +stage.nodeart.io + +// Noop : https://noop.app +// Submitted by Nathaniel Schweinberg +*.developer.app +noop.app + +// Northflank Ltd. : https://northflank.com/ +// Submitted by Marco Suter +*.northflank.app +*.build.run +*.code.run +*.database.run +*.migration.run + +// Noticeable : https://noticeable.io +// Submitted by Laurent Pellegrino +noticeable.news + +// Notion Labs, Inc : https://www.notion.so/ +// Submitted by Jess Yao +notion.site + +// Now-DNS : https://now-dns.com +// Submitted by Steve Russell +dnsking.ch +mypi.co +myiphost.com +forumz.info +soundcast.me +tcp4.me +dnsup.net +hicam.net +now-dns.net +ownip.net +vpndns.net +dynserv.org +now-dns.org +x443.pw +ntdll.top +freeddns.us + +// nsupdate.info : https://www.nsupdate.info/ +// Submitted by Thomas Waldmann +nsupdate.info +nerdpol.ovh + +// NYC.mn : https://dot.nyc.mn/ +// Submitted by NYC.mn Subdomain Service nyc.mn +// O3O.Foundation : https://o3o.foundation/ +// Submitted by the prvcy.page Registry Team +prvcy.page + +// Obl.ong : https://obl.ong +// Submitted by Reese Armstrong +obl.ong + +// Observable, Inc. : https://observablehq.com +// Submitted by Mike Bostock +observablehq.cloud +static.observableusercontent.com + +// OMG.LOL : https://omg.lol +// Submitted by Adam Newbold +omg.lol + +// Omnibond Systems, LLC. : https://www.omnibond.com +// Submitted by Cole Estep +cloudycluster.net + +// OmniWe Limited : https://omniwe.com +// Submitted by Vicary Archangel +omniwe.site + +// One.com : https://www.one.com/ +// Submitted by Jacob Bunk Nielsen +123webseite.at +123website.be +simplesite.com.br +123website.ch +simplesite.com +123webseite.de +123hjemmeside.dk +123miweb.es +123kotisivu.fi +123siteweb.fr +simplesite.gr +123homepage.it +123website.lu +123website.nl +123hjemmeside.no +service.one +simplesite.pl +123paginaweb.pt +123minsida.se + +// ONID : https://get.onid.ca +// Submitted by ONID Engineering Team +onid.ca + +// Open Domains : https://open-domains.net +// Submitted by William Harrison +is-a-fullstack.dev +is-cool.dev +is-not-a.dev +localplayer.dev +is-local.org + +// Open Social : https://www.getopensocial.com/ +// Submitted by Alexander Varwijk +opensocial.site + +// OpenAI : https://openai.com +// Submitted by Thomas Shadwell +*.oaiusercontent.com + +// OpenCraft GmbH : http://opencraft.com/ +// Submitted by Sven Marnach +opencraft.hosting + +// OpenHost : https://registry.openhost.uk +// Submitted by OpenHost Registry Team +16-b.it +32-b.it +64-b.it + +// OpenResearch GmbH : https://openresearch.com/ +// Submitted by Philipp Schmid +orsites.com + // Opera Software, A.S.A. -// Requested by Yngve Pettersen 2009-11-26 +// Submitted by Yngve Pettersen operaunite.com -// Red Hat, Inc. OpenShift : https://openshift.redhat.com/ -// Requested by Tim Kramer 2012-10-24 -rhcloud.com +// Oracle Dyn : https://cloud.oracle.com/home https://dyn.com/dns/ +// Submitted by Gregory Drake +// Note: This is intended to also include customer-oci.com due to wildcards implicitly including the current label +*.customer-oci.com +*.oci.customer-oci.com +*.ocp.customer-oci.com +*.ocs.customer-oci.com +*.oraclecloudapps.com +*.oraclegovcloudapps.com +*.oraclegovcloudapps.uk + +// Orange : https://www.orange.com +// Submitted by Alexandre Linte +tech.orange + +// OsSav Technology Ltd. : https://ossav.com/ +// Submitted by OsSav Technology Ltd. +// https://nic.can.re +can.re + +// Oursky Limited : https://authgear.com/ +// Submitted by Authgear Team & Skygear Developer +authgear-staging.com +authgearapps.com +skygearapp.com + +// OutSystems +// Submitted by Duarte Santos +outsystemscloud.com + +// OVHcloud : https://ovhcloud.com +// Submitted by Vincent Cassé +*.hosting.ovh.net +*.webpaas.ovh.net + +// OwnProvider GmbH : http://www.ownprovider.com +// Submitted by Jan Moennich +ownprovider.com +own.pm + +// OwO : https://whats-th.is/ +// Submitted by Dean Sheather +*.owo.codes + +// OX : http://www.ox.rs +// Submitted by Adam Grand +ox.rs + +// oy.lc +// Submitted by Charly Coste +oy.lc + +// Pagefog : https://pagefog.com/ +// Submitted by Derek Myers +pgfog.com + +// PageXL : https://pagexl.com +// Submitted by Yann Guichard +pagexl.com + +// Pantheon Systems, Inc. : https://pantheon.io/ +// Submitted by Gary Dylina +gotpantheon.com +pantheonsite.io + +// Paywhirl, Inc : https://paywhirl.com/ +// Submitted by Daniel Netzer +*.paywhirl.com + +// pcarrier.ca Software Inc : https://pcarrier.ca/ +// Submitted by Pierre Carrier +*.xmit.co +xmit.dev +madethis.site +srv.us +gh.srv.us +gl.srv.us + +// PE Ulyanov Kirill Sergeevich : https://airy.host +// Submitted by Kirill Ulyanov +lk3.ru + +// Peplink | Pepwave : http://peplink.com/ +// Submitted by Steve Leung +mypep.link + +// Perspecta : https://perspecta.com/ +// Submitted by Kenneth Van Alstyne +perspecta.cloud + +// Planet-Work : https://www.planet-work.com/ +// Submitted by Frédéric VANNIÈRE +on-web.fr + +// Platform.sh : https://platform.sh +// Submitted by Nikola Kotur +*.upsun.app +upsunapp.com +ent.platform.sh +eu.platform.sh +us.platform.sh +*.platformsh.site +*.tst.site + +// Platter : https://platter.dev +// Submitted by Patrick Flor +platter-app.dev +platterp.us + +// Pley AB : https://www.pley.com/ +// Submitted by Henning Pohl +pley.games + +// Porter : https://porter.run/ +// Submitted by Rudraksh MK +onporter.run + +// Positive Codes Technology Company : http://co.bn/faq.html +// Submitted by Zulfais +co.bn + +// Postman, Inc : https://postman.com +// Submitted by Rahul Dhawan +postman-echo.com +pstmn.io +mock.pstmn.io +httpbin.org + +// prequalifyme.today : https://prequalifyme.today +// Submitted by DeepakTiwari deepak@ivylead.io +prequalifyme.today + +// prgmr.com : https://prgmr.com/ +// Submitted by Sarah Newman +xen.prgmr.com // priv.at : http://www.nic.priv.at/ -// Requested by registry 2008-06-09 +// Submitted by registry priv.at +// PROJECT ELIV : https://eliv.kr/ +// Submitted by PROJECT ELIV Domain Team +c01.kr +eliv-cdn.kr +eliv-dns.kr +mmv.kr +vki.kr + +// project-study : https://project-study.com +// Submitted by yumenewa +dev.project-study.com + +// Protonet GmbH : http://protonet.io +// Submitted by Martin Meier +protonet.io + +// Publication Presse Communication SARL : https://ppcom.fr +// Submitted by Yaacov Akiba Slama +chirurgiens-dentistes-en-france.fr +byen.site + +// pubtls.org : https://www.pubtls.org +// Submitted by Kor Nielsen +pubtls.org + +// PythonAnywhere LLP : https://www.pythonanywhere.com +// Submitted by Giles Thomas +pythonanywhere.com +eu.pythonanywhere.com + +// QA2 +// Submitted by Daniel Dent : https://www.danieldent.com/ +qa2.com + +// QCX +// Submitted by Cassandra Beelen +qcx.io +*.sys.qcx.io + +// QNAP System Inc : https://www.qnap.com +// Submitted by Nick Chang +myqnapcloud.cn +alpha-myqnapcloud.com +dev-myqnapcloud.com +mycloudnas.com +mynascloud.com +myqnapcloud.com + +// QOTO, Org. +// Submitted by Jeffrey Phillips Freeman +qoto.io + +// Qualifio : https://qualifio.com/ +// Submitted by Xavier De Cock +qualifioapp.com + +// Quality Unit : https://qualityunit.com +// Submitted by Vasyl Tsalko +ladesk.com + +// QuickBackend : https://www.quickbackend.com +// Submitted by Dani Biro +qbuser.com + +// Quip : https://quip.com +// Submitted by Patrick Linehan +*.quipelements.com + +// Qutheory LLC : http://qutheory.io +// Submitted by Jonas Schwartz +vapor.cloud +vaporcloud.io + +// Rackmaze LLC : https://www.rackmaze.com +// Submitted by Kirill Pertsev +rackmaze.com +rackmaze.net + +// Rad Web Hosting : https://radwebhosting.com +// Submitted by Scott Claeys +cloudsite.builders +myradweb.net +servername.us + +// Radix FZC : http://domains.in.net +// Submitted by Gavin Brown +web.in +in.net + +// Raidboxes GmbH : https://raidboxes.de +// Submitted by Auke Tembrink +myrdbx.io +site.rb-hosting.io + +// Rancher Labs, Inc : https://rancher.com +// Submitted by Vincent Fiduccia +*.on-rancher.cloud +*.on-k3s.io +*.on-rio.io + +// RavPage : https://www.ravpage.co.il +// Submitted by Roni Horowitz +ravpage.co.il + +// Read The Docs, Inc : https://www.readthedocs.org +// Submitted by David Fischer +readthedocs-hosted.com +readthedocs.io + +// Red Hat, Inc. OpenShift : https://openshift.redhat.com/ +// Submitted by Tim Kramer +rhcloud.com + +// Redgate Software : https://red-gate.com +// Submitted by Andrew Farries +instances.spawn.cc + +// Render : https://render.com +// Submitted by Anurag Goel +onrender.com +app.render.com + +// Repl.it : https://repl.it +// Submitted by Lincoln Bergeson +replit.app +id.replit.app +firewalledreplit.co +id.firewalledreplit.co +repl.co +id.repl.co +replit.dev +archer.replit.dev +bones.replit.dev +canary.replit.dev +global.replit.dev +hacker.replit.dev +id.replit.dev +janeway.replit.dev +kim.replit.dev +kira.replit.dev +kirk.replit.dev +odo.replit.dev +paris.replit.dev +picard.replit.dev +pike.replit.dev +prerelease.replit.dev +reed.replit.dev +riker.replit.dev +sisko.replit.dev +spock.replit.dev +staging.replit.dev +sulu.replit.dev +tarpit.replit.dev +teams.replit.dev +tucker.replit.dev +wesley.replit.dev +worf.replit.dev +repl.run + +// Resin.io : https://resin.io +// Submitted by Tim Perry +resindevice.io +devices.resinstaging.io + +// RethinkDB : https://www.rethinkdb.com/ +// Submitted by Chris Kastorff +hzc.io + +// Rico Developments Limited : https://adimo.co +// Submitted by Colin Brown +adimo.co.uk + +// Riseup Networks : https://riseup.net +// Submitted by Micah Anderson +itcouldbewor.se + +// Roar Domains LLC : https://roar.basketball/ +// Submitted by Gavin Brown +aus.basketball +nz.basketball + +// ROBOT PAYMENT INC. : https://www.robotpayment.co.jp/ +// Submitted by Kentaro Takamori +subsc-pay.com +subsc-pay.net + +// Rochester Institute of Technology : http://www.rit.edu/ +// Submitted by Jennifer Herting +git-pages.rit.edu + +// Rocky Enterprise Software Foundation : https://resf.org +// Submitted by Neil Hanlon +rocky.page + +// Ruhr University Bochum : https://www.ruhr-uni-bochum.de/ +// Submitted by Andreas Jobs +rub.de +ruhr-uni-bochum.de +io.noc.ruhr-uni-bochum.de + +// Rusnames Limited : http://rusnames.ru/ +// Submitted by Sergey Zotov +биз.рус +ком.рус +крым.рус +мир.рус +мск.рус +орг.рус +самара.рус +сочи.рус +спб.рус +я.рус + +// Russian Academy of Sciences +// Submitted by Tech Support +ras.ru + +// Sakura Frp : https://www.natfrp.com +// Submitted by Bobo Liu +nyat.app + +// SAKURA Internet Inc. : https://www.sakura.ad.jp/ +// Submitted by Internet Service Department +180r.com +dojin.com +sakuratan.com +sakuraweb.com +x0.com +2-d.jp +bona.jp +crap.jp +daynight.jp +eek.jp +flop.jp +halfmoon.jp +jeez.jp +matrix.jp +mimoza.jp +ivory.ne.jp +mail-box.ne.jp +mints.ne.jp +mokuren.ne.jp +opal.ne.jp +sakura.ne.jp +sumomo.ne.jp +topaz.ne.jp +netgamers.jp +nyanta.jp +o0o0.jp +rdy.jp +rgr.jp +rulez.jp +s3.isk01.sakurastorage.jp +s3.isk02.sakurastorage.jp +saloon.jp +sblo.jp +skr.jp +tank.jp +uh-oh.jp +undo.jp +rs.webaccel.jp +user.webaccel.jp +websozai.jp +xii.jp +squares.net +jpn.org +kirara.st +x0.to +from.tv +sakura.tv + +// Salesforce.com, Inc. : https://salesforce.com/ +// Submitted by Salesforce Public Suffix List Team +*.builder.code.com +*.dev-builder.code.com +*.stg-builder.code.com +*.001.test.code-builder-stg.platform.salesforce.com +*.d.crm.dev +*.w.crm.dev +*.wa.crm.dev +*.wb.crm.dev +*.wc.crm.dev +*.wd.crm.dev +*.we.crm.dev +*.wf.crm.dev + +// Sandstorm Development Group, Inc. : https://sandcats.io/ +// Submitted by Asheesh Laroia +sandcats.io + +// SBE network solutions GmbH : https://www.sbe.de/ +// Submitted by Norman Meilick +logoip.com +logoip.de + +// Scaleway : https://www.scaleway.com/ +// Submitted by Scaleway PSL Maintainer +fr-par-1.baremetal.scw.cloud +fr-par-2.baremetal.scw.cloud +nl-ams-1.baremetal.scw.cloud +cockpit.fr-par.scw.cloud +ddl.fr-par.scw.cloud +dtwh.fr-par.scw.cloud +fnc.fr-par.scw.cloud +functions.fnc.fr-par.scw.cloud +ifr.fr-par.scw.cloud +k8s.fr-par.scw.cloud +nodes.k8s.fr-par.scw.cloud +kafk.fr-par.scw.cloud +mgdb.fr-par.scw.cloud +rdb.fr-par.scw.cloud +s3.fr-par.scw.cloud +s3-website.fr-par.scw.cloud +scbl.fr-par.scw.cloud +whm.fr-par.scw.cloud +priv.instances.scw.cloud +pub.instances.scw.cloud +k8s.scw.cloud +cockpit.nl-ams.scw.cloud +ddl.nl-ams.scw.cloud +dtwh.nl-ams.scw.cloud +ifr.nl-ams.scw.cloud +k8s.nl-ams.scw.cloud +nodes.k8s.nl-ams.scw.cloud +kafk.nl-ams.scw.cloud +mgdb.nl-ams.scw.cloud +rdb.nl-ams.scw.cloud +s3.nl-ams.scw.cloud +s3-website.nl-ams.scw.cloud +scbl.nl-ams.scw.cloud +whm.nl-ams.scw.cloud +cockpit.pl-waw.scw.cloud +ddl.pl-waw.scw.cloud +dtwh.pl-waw.scw.cloud +ifr.pl-waw.scw.cloud +k8s.pl-waw.scw.cloud +nodes.k8s.pl-waw.scw.cloud +kafk.pl-waw.scw.cloud +mgdb.pl-waw.scw.cloud +rdb.pl-waw.scw.cloud +s3.pl-waw.scw.cloud +s3-website.pl-waw.scw.cloud +scbl.pl-waw.scw.cloud +scalebook.scw.cloud +smartlabeling.scw.cloud +dedibox.fr + +// schokokeks.org GbR : https://schokokeks.org/ +// Submitted by Hanno Böck +schokokeks.net + +// Scottish Government : https://www.gov.scot +// Submitted by Martin Ellis +gov.scot +service.gov.scot + +// Scry Security : http://www.scrysec.com +// Submitted by Shante Adam +scrysec.com + +// Scrypted : https://scrypted.app +// Submitted by Koushik Dutta +client.scrypted.io + +// Securepoint GmbH : https://www.securepoint.de +// Submitted by Erik Anders +firewall-gateway.com +firewall-gateway.de +my-gateway.de +my-router.de +spdns.de +spdns.eu +firewall-gateway.net +my-firewall.org +myfirewall.org +spdns.org + +// Seidat : https://www.seidat.com +// Submitted by Artem Kondratev +seidat.net + +// Sellfy : https://sellfy.com +// Submitted by Yuriy Romadin +sellfy.store + +// Sendmsg : https://www.sendmsg.co.il +// Submitted by Assaf Stern +minisite.ms + +// Senseering GmbH : https://www.senseering.de +// Submitted by Felix Mönckemeyer +senseering.net + +// Servebolt AS : https://servebolt.com +// Submitted by Daniel Kjeserud +servebolt.cloud + +// Service Online LLC : http://drs.ua/ +// Submitted by Serhii Bulakh +biz.ua +co.ua +pp.ua + +// Shanghai Accounting Society : https://www.sasf.org.cn +// Submitted by Information Administration +as.sh.cn + +// Sheezy.Art : https://sheezy.art +// Submitted by Nyoom +sheezy.games + +// Shopblocks : http://www.shopblocks.com/ +// Submitted by Alex Bowers +myshopblocks.com + +// Shopify : https://www.shopify.com +// Submitted by Alex Richter +myshopify.com + +// Shopit : https://www.shopitcommerce.com/ +// Submitted by Craig McMahon +shopitsite.com + +// shopware AG : https://shopware.com +// Submitted by Jens Küper +shopware.shop +shopware.store + +// Siemens Mobility GmbH +// Submitted by Oliver Graebner +mo-siemens.io + +// SinaAppEngine : http://sae.sina.com.cn/ +// Submitted by SinaAppEngine +1kapp.com +appchizi.com +applinzi.com +sinaapp.com +vipsinaapp.com + +// Siteleaf : https://www.siteleaf.com/ +// Submitted by Skylar Challand +siteleaf.net + +// Small Technology Foundation : https://small-tech.org +// Submitted by Aral Balkan +small-web.org + +// Smallregistry by Promopixel SARL : https://www.smallregistry.net +// Former AFNIC's SLDs +// Submitted by Jérôme Lipowicz +aeroport.fr +avocat.fr +chambagri.fr +chirurgiens-dentistes.fr +experts-comptables.fr +medecin.fr +notaires.fr +pharmacien.fr +port.fr +veterinaire.fr + +// Smoove.io : https://www.smoove.io/ +// Submitted by Dan Kozak +vp4.me + +// Snowflake Inc : https://www.snowflake.com/ +// Submitted by Sam Haar +*.snowflake.app +*.privatelink.snowflake.app +streamlit.app +streamlitapp.com + +// Snowplow Analytics : https://snowplowanalytics.com/ +// Submitted by Ian Streeter +try-snowplow.com + +// Software Consulting Michal Zalewski : https://www.mafelo.com +// Submitted by Michal Zalewski +mafelo.net + +// Sony Interactive Entertainment LLC : https://sie.com/ +// Submitted by David Coles +playstation-cloud.com + +// SourceHut : https://sourcehut.org +// Submitted by Drew DeVault +srht.site + +// SourceLair PC : https://www.sourcelair.com +// Submitted by Antonis Kalipetis +apps.lair.io +*.stolos.io + +// sourceWAY GmbH : https://sourceway.de +// Submitted by Richard Reiber +4.at +my.at +my.de +*.nxa.eu +nx.gw + +// SpeedPartner GmbH : https://www.speedpartner.de/ +// Submitted by Stefan Neufeind +customer.speedpartner.de + +// Spreadshop (sprd.net AG) : https://www.spreadshop.com/ +// Submitted by Martin Breest +myspreadshop.at +myspreadshop.com.au +myspreadshop.be +myspreadshop.ca +myspreadshop.ch +myspreadshop.com +myspreadshop.de +myspreadshop.dk +myspreadshop.es +myspreadshop.fi +myspreadshop.fr +myspreadshop.ie +myspreadshop.it +myspreadshop.net +myspreadshop.nl +myspreadshop.no +myspreadshop.pl +myspreadshop.se +myspreadshop.co.uk + +// StackBlitz : https://stackblitz.com +// Submitted by Dominic Elm +w-corp-staticblitz.com +w-credentialless-staticblitz.com +w-staticblitz.com + +// Stackhero : https://www.stackhero.io +// Submitted by Adrien Gillon +stackhero-network.com + +// STACKIT GmbH & Co. KG : https://www.stackit.de/en/ +// Submitted by STACKIT-DNS Team (Simon Stier) +runs.onstackit.cloud +stackit.gg +stackit.rocks +stackit.run +stackit.zone + +// Staclar : https://staclar.com +// Submitted by Q Misell +// Submitted by Matthias Merkel +musician.io +novecore.site + +// Standard Library : https://stdlib.com +// Submitted by Jacob Lee +api.stdlib.com + +// stereosense GmbH : https://www.involve.me +// Submitted by Florian Burmann +feedback.ac +forms.ac +assessments.cx +calculators.cx +funnels.cx +paynow.cx +quizzes.cx +researched.cx +tests.cx +surveys.so + +// Storacha Network : https://storacha.network +// Submitted by Alan Shaw +ipfs.storacha.link +ipfs.w3s.link + +// Storebase : https://www.storebase.io +// Submitted by Tony Schirmer +storebase.store + +// Storipress : https://storipress.com +// Submitted by Benno Liu +storipress.app + +// Storj Labs Inc. : https://storj.io/ +// Submitted by Philip Hutchins +storj.farm + +// Strapi : https://strapi.io/ +// Submitted by Florent Baldino +strapiapp.com +media.strapiapp.com + +// Strategic System Consulting (eApps Hosting) : https://www.eapps.com/ +// Submitted by Alex Oancea +vps-host.net +atl.jelastic.vps-host.net +njs.jelastic.vps-host.net +ric.jelastic.vps-host.net + +// Streak : https://streak.com +// Submitted by Blake Kadatz +streak-link.com +streaklinks.com +streakusercontent.com + +// Student-Run Computing Facility : https://www.srcf.net/ +// Submitted by Edwin Balani +soc.srcf.net +user.srcf.net + +// Studenten Net Twente : http://www.snt.utwente.nl/ +// Submitted by Silke Hofstra +utwente.io + +// Sub 6 Limited : http://www.sub6.com +// Submitted by Dan Miller +temp-dns.com + +// Supabase : https://supabase.io +// Submitted by Inian Parameshwaran +supabase.co +supabase.in +supabase.net + +// Syncloud : https://syncloud.org +// Submitted by Boris Rybalkin +syncloud.it + +// Synology, Inc. : https://www.synology.com/ +// Submitted by Rony Weng +dscloud.biz +direct.quickconnect.cn +dsmynas.com +familyds.com +diskstation.me +dscloud.me +i234.me +myds.me +synology.me +dscloud.mobi +dsmynas.net +familyds.net +dsmynas.org +familyds.org +direct.quickconnect.to +vpnplus.to + +// Tabit Technologies Ltd. : https://tabit.cloud/ +// Submitted by Oren Agiv +mytabit.com +mytabit.co.il +tabitorder.co.il + +// TAIFUN Software AG : http://taifun-software.de +// Submitted by Bjoern Henke +taifun-dns.de + +// Tailscale Inc. : https://www.tailscale.com +// Submitted by David Anderson +ts.net +*.c.ts.net + +// TASK geographical domains : https://task.gda.pl/en/services/for-entrepreneurs/ +gda.pl +gdansk.pl +gdynia.pl +med.pl +sopot.pl + +// Tave Creative Corp : https://tave.com/ +// Submitted by Adrian Ziemkowski +taveusercontent.com + +// tawk.to, Inc : https://www.tawk.to +// Submitted by tawk.to developer team +p.tawk.email +p.tawkto.email + +// Tche.br : https://tche.br +// Submitted by Bruno Lorensi +tche.br + +// team.blue : https://team.blue +// Submitted by Cedric Dubois +site.tb-hosting.com + +// Teckids e.V. : https://www.teckids.org +// Submitted by Dominik George +edugit.io +s3.teckids.org + +// Telebit : https://telebit.cloud +// Submitted by AJ ONeal +telebit.app +telebit.io +*.telebit.xyz + +// Thingdust AG : https://thingdust.com/ +// Submitted by Adrian Imboden +*.firenet.ch +*.svc.firenet.ch +reservd.com +thingdustdata.com +cust.dev.thingdust.io +reservd.dev.thingdust.io +cust.disrec.thingdust.io +reservd.disrec.thingdust.io +cust.prod.thingdust.io +cust.testing.thingdust.io +reservd.testing.thingdust.io + +// ticket i/O GmbH : https://ticket.io +// Submitted by Christian Franke +tickets.io + +// Tlon.io : https://tlon.io +// Submitted by Mark Staarink +arvo.network +azimuth.network +tlon.network + +// Tor Project, Inc. : https://torproject.org +// Submitted by Antoine Beaupré +torproject.net +pages.torproject.net + +// TownNews.com : http://www.townnews.com +// Submitted by Dustin Ward +townnews-staging.com + +// TrafficPlex GmbH : https://www.trafficplex.de/ +// Submitted by Phillipp Röll +12hp.at +2ix.at +4lima.at +lima-city.at +12hp.ch +2ix.ch +4lima.ch +lima-city.ch +trafficplex.cloud +de.cool +12hp.de +2ix.de +4lima.de +lima-city.de +1337.pictures +clan.rip +lima-city.rocks +webspace.rocks +lima.zone + +// TransIP : https://www.transip.nl +// Submitted by Rory Breuk and Cedric Dubois +*.transurl.be +*.transurl.eu +site.transip.me +*.transurl.nl + +// TuxFamily : http://tuxfamily.org +// Submitted by TuxFamily administrators +tuxfamily.org + +// TwoDNS : https://www.twodns.de/ +// Submitted by TwoDNS-Support +dd-dns.de +dray-dns.de +draydns.de +dyn-vpn.de +dynvpn.de +mein-vigor.de +my-vigor.de +my-wan.de +syno-ds.de +synology-diskstation.de +synology-ds.de +diskstation.eu +diskstation.org + +// Typedream : https://typedream.com +// Submitted by Putri Karunia +typedream.app + +// Typeform : https://www.typeform.com +// Submitted by Typeform +pro.typeform.com + +// Uberspace : https://uberspace.de +// Submitted by Moritz Werner +*.uberspace.de +uber.space + +// UDR Limited : http://www.udr.hk.com +// Submitted by registry +hk.com +inc.hk +ltd.hk +hk.org + +// UK Intis Telecom LTD : https://it.com +// Submitted by ITComdomains +it.com + +// Unison Computing, PBC : https://unison.cloud +// Submitted by Simon Højberg +unison-services.cloud + +// United Gameserver GmbH : https://united-gameserver.de +// Submitted by Stefan Schwarz +virtual-user.de +virtualuser.de + +// United States Writing Corporation : https://uswriting.co +// Submitted by Andrew Sampson +obj.ag + +// UNIVERSAL DOMAIN REGISTRY : https://www.udr.org.yt/ +// see also: whois -h whois.udr.org.yt help +// Submitted by Atanunu Igbunuroghene +name.pm +sch.tf +biz.wf +sch.wf +org.yt + +// University of Banja Luka : https://unibl.org +// Domains for Republic of Srpska administrative entity. +// Submitted by Marko Ivanovic +rs.ba + +// University of Bielsko-Biala regional domain : http://dns.bielsko.pl/ +// Submitted by Marcin +bielsko.pl + +// urown.net : https://urown.net +// Submitted by Hostmaster +urown.cloud +dnsupdate.info + +// US REGISTRY LLC : http://us.org +// Submitted by Gavin Brown +us.org + +// V.UA Domain Registry: https://www.v.ua/ +// Submitted by Serhii Rostilo +v.ua + +// Val Town, Inc : https://val.town/ +// Submitted by Tom MacWright +val.run +web.val.run + +// Vercel, Inc : https://vercel.com/ +// Submitted by Laurens Duijvesteijn +vercel.app +v0.build +vercel.dev +vusercontent.net +vercel.run +now.sh + +// VeryPositive SIA : http://very.lv +// Submitted by Danko Aleksejevs +2038.io + +// Virtual-Info : https://www.virtual-info.info/ +// Submitted by Adnan RIHAN +v-info.info + +// VistaBlog : https://vistablog.ir/ +// Submitted by Hossein Piri +vistablog.ir + +// Viva Republica, Inc. : https://toss.im/ +// Submitted by Deus Team +deus-canvas.com + +// Voorloper.com : https://voorloper.com +// Submitted by Nathan van Bakel +voorloper.cloud + +// Vultr Objects : https://www.vultr.com/products/object-storage/ +// Submitted by Niels Maumenee +*.vultrobjects.com + +// Waffle Computer Inc., Ltd. : https://docs.waffleinfo.com +// Submitted by Masayuki Note +wafflecell.com + +// Webflow, Inc. : https://www.webflow.com +// Submitted by Webflow Security Team +webflow.io +webflowtest.io + +// WebHare bv : https://www.webhare.com/ +// Submitted by Arnold Hendriks +*.webhare.dev + +// WebHotelier Technologies Ltd : https://www.webhotelier.net/ +// Submitted by Apostolos Tsakpinis +bookonline.app +hotelwithflight.com +reserve-online.com +reserve-online.net + +// WebPros International, LLC : https://webpros.com/ +// Submitted by Nicolas Rochelemagne +cprapid.com +pleskns.com +wp2.host +pdns.page +plesk.page +cpanel.site +wpsquared.site + +// WebWaddle Ltd : https://webwaddle.com/ +// Submitted by Merlin Glander +*.wadl.top + +// Western Digital Technologies, Inc : https://www.wdc.com +// Submitted by Jung Jin +remotewd.com + +// Whatbox Inc. : https://whatbox.ca/ +// Submitted by Anthony Ryan +box.ca + +// WIARD Enterprises : https://wiardweb.com +// Submitted by Kidd Hustle +pages.wiardweb.com + +// Wikimedia Labs : https://wikitech.wikimedia.org +// Submitted by Arturo Borrero Gonzalez +toolforge.org +wmcloud.org +wmflabs.org + +// William Harrison : https://wharrison.com.au +// Submitted by William Harrison +wdh.app +hrsn.dev + +// Windsurf : https://windsurf.com +// Submitted by Douglas Chen +windsurf.app +windsurf.build + +// WISP : https://wisp.gg +// Submitted by Stepan Fedotov +panel.gg +daemon.panel.gg + +// Wix.com, Inc. : https://www.wix.com +// Submitted by Shahar Talmi / Alon Kochba +wixsite.com +wixstudio.com +editorx.io +wixstudio.io +wix.run + +// Wizard Zines : https://wizardzines.com +// Submitted by Julia Evans +messwithdns.com + +// WoltLab GmbH : https://www.woltlab.com +// Submitted by Tim Düsterhus +woltlab-demo.com +myforum.community +community-pro.de +diskussionsbereich.de +community-pro.net +meinforum.net + +// Woods Valldata : https://www.woodsvalldata.co.uk/ +// Submitted by Chris Whittle +affinitylottery.org.uk +raffleentry.org.uk +weeklylottery.org.uk + +// WP Engine : https://wpengine.com/ +// Submitted by Michael Smith +// Submitted by Brandon DuRette +wpenginepowered.com +js.wpenginepowered.com + +// XenonCloud GbR : https://xenoncloud.net +// Submitted by Julian Uphoff +half.host + +// XnBay Technology : http://www.xnbay.com/ +// Submitted by XnBay Developer +xnbay.com +u2.xnbay.com +u2-local.xnbay.com + +// XS4ALL Internet bv : https://www.xs4all.nl/ +// Submitted by Daniel Mostertman +cistron.nl +demon.nl +xs4all.space + +// Yandex.Cloud LLC : https://cloud.yandex.com +// Submitted by Alexander Lodin +yandexcloud.net +storage.yandexcloud.net +website.yandexcloud.net + +// YesCourse Pty Ltd : https://yescourse.com +// Submitted by Atul Bhouraskar +official.academy + +// Yola : https://www.yola.com/ +// Submitted by Stefano Rivera +yolasite.com + +// Yunohost : https://yunohost.org +// Submitted by Valentin Grimaud +ynh.fr +nohost.me +noho.st + // ZaNiC : http://www.za.net/ -// Requested by registry 2009-10-03 +// Submitted by registry za.net za.org +// ZAP-Hosting GmbH & Co. KG : https://zap-hosting.com +// Submitted by Julian Alker +zap.cloud + +// Zeabur : https://zeabur.com/ +// Submitted by Zeabur Team +zeabur.app + +// Zerops : https://zerops.io/ +// Submitted by Zerops Team +*.zerops.app + +// Zine EOOD : https://zine.bg/ +// Submitted by Martin Angelov +bss.design + +// Zitcom A/S : https://www.zitcom.dk +// Submitted by Emil Stahl +basicserver.io +virtualserver.io +enterprisecloud.nu + +// Zone.ID: https://zone.id +// Submitted by Gx1.org +zone.id + // ===END PRIVATE DOMAINS=== From 71fe7e17a491fe396e1dd28ed13068bdf2f7c3a8 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:38:50 +0900 Subject: [PATCH 083/169] Update CHANGES.md for 2.0.2 --- CHANGES.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index a6c131a5..baab5de0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,13 @@ +2.0.2 +----- + +### Fixes + +* Fixes for `org.archive.net.PublicSuffixes` [#110](https://github.com/iipc/webarchive-commons/pull/110) + * Updated to the latest version of the public suffix list. + * Fixed parsing failures with newer list versions. + * Moved `effective_tld_names.dat` to `org/archive/effective_tld_names.dat` to prevent conflict with `crawler-commons`. + 2.0.1 ----- From 1765320fb111a96097407f2222c3eaa38b775b55 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:54:25 +0900 Subject: [PATCH 084/169] Update from OSSRH to Central portal https://central.sonatype.org/news/20250326_ossrh_sunset/ --- pom.xml | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index e03dd34d..b3c103e1 100644 --- a/pom.xml +++ b/pom.xml @@ -203,25 +203,22 @@ release - - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.7.0 + org.sonatype.central + central-publishing-maven-plugin + 0.7.0 true - ossrh - https://oss.sonatype.org/ - true + central + + org.apache.maven.plugins + maven-release-plugin + 3.1.1 + org.apache.maven.plugins maven-source-plugin From e7fdd309c6e3daeaa06f3758a3d9856839a08b59 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:57:27 +0900 Subject: [PATCH 085/169] Bump junit-jupiter from 5.12.2 to 5.13.3 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b3c103e1..68b912c7 100644 --- a/pom.xml +++ b/pom.xml @@ -54,7 +54,7 @@ org.junit.jupiter junit-jupiter - 5.12.2 + 5.13.3 test From 40f11d8ea65da31904e8fb60a420511b147bd494 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:58:13 +0900 Subject: [PATCH 086/169] [maven-release-plugin] prepare release webarchive-commons-2.0.2 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 68b912c7..b7be51aa 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.2-SNAPSHOT + 2.0.2 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-2.0.2 From 7c848622cc5de634c0ecd41eab9ebb0bedbf6df3 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:58:17 +0900 Subject: [PATCH 087/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b7be51aa..02e06e76 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.2 + 2.0.3-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.2 + webarchive-commons-2.0.0 From 7c1cb7f54a970116ebca6a76beb6e91695b8ca97 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Tue, 15 Jul 2025 10:34:13 +0900 Subject: [PATCH 088/169] Upgrade from commons-lang 2.6 to commons-lang3 3.18.0 commons-lang 2.x was last released in 2011 and has unpatched vulnerabilities. --- CHANGES.md | 19 +++++++++++++++++++ pom.xml | 6 +++--- .../extract/JSONViewExtractorOutput.java | 2 +- .../archive/format/cdx/FieldSplitLine.java | 2 +- .../TimestampBestPickDedupIterator.java | 2 +- .../format/json/CrossProductOfLists.java | 2 +- .../org/archive/format/json/JSONView.java | 2 +- .../archive/hadoop/ArchiveJSONViewLoader.java | 2 +- .../java/org/archive/io/arc/ARCRecord.java | 2 +- .../java/org/archive/io/warc/WARCReader.java | 2 +- .../java/org/archive/io/warc/WARCWriter.java | 2 +- src/main/java/org/archive/util/FileUtils.java | 18 +++++++++--------- .../java/org/archive/util/PropertyUtils.java | 2 +- src/main/java/org/archive/util/Recorder.java | 2 +- src/main/java/org/archive/util/TextUtils.java | 8 ++++---- .../archive/io/ArchiveReaderFactoryTest.java | 2 +- .../org/archive/url/UsableURIFactoryTest.java | 2 +- .../java/org/archive/util/FileUtilsTest.java | 6 +++--- 18 files changed, 51 insertions(+), 32 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index baab5de0..54056f65 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,22 @@ +Unreleased +---------- + +### Changes + +`FileUtils.pagedLines()` and `FileUtils.expandRange()` now return the Apache Commons Lang 3 version of `LongRange`. +Users of these methods may need to make the following changes: + +| Old | New | +|-------------------------------------------------|---------------------------------------------| +| `import org.apache.commons.lang.math.LongRange` | `import org.apache.commons.lang3.LongRange` | +| `new LongRange(min, max)` | `LongRange.of(min, max)` | +| `longRange.getMaximumLong()` | `longRange.getMaximum()` | +| `longRange.getMinimumLong()` | `longRange.getMinimum()` | + +### Dependency upgrades + +- **commons-lang**: 2.6 → 3.18.0 + 2.0.2 ----- diff --git a/pom.xml b/pom.xml index 02e06e76..93dd4514 100644 --- a/pom.xml +++ b/pom.xml @@ -133,9 +133,9 @@
- commons-lang - commons-lang - 2.6 + org.apache.commons + commons-lang3 + 3.18.0 diff --git a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java index 530dadd0..fb6dc847 100644 --- a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java +++ b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java @@ -5,7 +5,7 @@ import java.io.PrintStream; import java.util.List; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.format.json.JSONView; import org.archive.resource.Resource; import org.archive.util.StreamCopy; diff --git a/src/main/java/org/archive/format/cdx/FieldSplitLine.java b/src/main/java/org/archive/format/cdx/FieldSplitLine.java index 7e965b2f..2da61808 100644 --- a/src/main/java/org/archive/format/cdx/FieldSplitLine.java +++ b/src/main/java/org/archive/format/cdx/FieldSplitLine.java @@ -3,7 +3,7 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Base class for text lines that are split by a delimiter Some examples will be diff --git a/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java b/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java index 4afb58c4..8c4616a3 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java +++ b/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java @@ -1,6 +1,6 @@ package org.archive.format.gzip.zipnum; -import org.apache.commons.lang.math.NumberUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.archive.util.iterator.CloseableIterator; public class TimestampBestPickDedupIterator extends TimestampDedupIterator { diff --git a/src/main/java/org/archive/format/json/CrossProductOfLists.java b/src/main/java/org/archive/format/json/CrossProductOfLists.java index 7be11fda..f9e2abd2 100644 --- a/src/main/java/org/archive/format/json/CrossProductOfLists.java +++ b/src/main/java/org/archive/format/json/CrossProductOfLists.java @@ -8,7 +8,7 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; public class CrossProductOfLists { private static final Logger LOG = diff --git a/src/main/java/org/archive/format/json/JSONView.java b/src/main/java/org/archive/format/json/JSONView.java index b73c0666..7a984ebe 100644 --- a/src/main/java/org/archive/format/json/JSONView.java +++ b/src/main/java/org/archive/format/json/JSONView.java @@ -5,7 +5,7 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.json.JSONObject; /** diff --git a/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java b/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java index e92ed7e1..d31e31c9 100644 --- a/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java +++ b/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java @@ -6,7 +6,7 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index dafc63b6..0815c18a 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -32,7 +32,7 @@ import java.util.logging.Logger; import java.util.regex.Matcher; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.io.ArchiveRecordHeader; diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index f9b41af7..d33874a3 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -31,7 +31,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.lang3.NotImplementedException; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index 1e6135c8..5c6a6854 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -36,7 +36,7 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.format.ArchiveFileConstants; import org.archive.io.UTF8Bytes; import org.archive.io.WriterPoolMember; diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java index 55255e2e..b7bdcee8 100644 --- a/src/main/java/org/archive/util/FileUtils.java +++ b/src/main/java/org/archive/util/FileUtils.java @@ -39,7 +39,7 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.IOFileFilter; -import org.apache.commons.lang.math.LongRange; +import org.apache.commons.lang3.LongRange; /** Utility methods for manipulating files and directories. @@ -473,7 +473,7 @@ public static LongRange pagedLines(File file, long position, if(signedDesiredLineCount>0) { if(startPosition+bufferSize == fileEnd) { // nothing more to read: return nothing - return new LongRange(fileEnd,fileEnd); + return LongRange.of(fileEnd,fileEnd); } else { // retry with larger lineEstimate return pagedLines(file, position, signedDesiredLineCount, lines, Math.max(bufferSize,lineEstimate)); @@ -501,7 +501,7 @@ public static LongRange pagedLines(File file, long position, } int firstLine = lineStarts.getFirst(); int partialLine = lineStarts.getLast(); - LongRange range = new LongRange(startPosition + firstLine, startPosition + partialLine); + LongRange range = LongRange.of(startPosition + firstLine, startPosition + partialLine); List foundLines = IOUtils.readLines(new ByteArrayInputStream(buf,firstLine,partialLine-firstLine)); @@ -510,7 +510,7 @@ public static LongRange pagedLines(File file, long position, range = expandRange( range, pagedLines(file, - range.getMinimumLong()-1, + range.getMinimum()-1, signedDesiredLineCount+foundFullLines, lines, bufferSize/foundFullLines)); @@ -519,7 +519,7 @@ public static LongRange pagedLines(File file, long position, lines.addAll(foundLines); - if(signedDesiredLineCount < 0 && range.getMaximumLong() < position) { + if(signedDesiredLineCount < 0 && range.getMaximum() < position) { // did not get line containining start position range = expandRange( range, @@ -530,12 +530,12 @@ public static LongRange pagedLines(File file, long position, bufferSize/foundFullLines)); } - if(signedDesiredLineCount > 0 && foundFullLines < desiredLineCount && range.getMaximumLong() < fileEnd) { + if(signedDesiredLineCount > 0 && foundFullLines < desiredLineCount && range.getMaximum() < fileEnd) { // need more forward lines range = expandRange( range, pagedLines(file, - range.getMaximumLong(), + range.getMaximum(), desiredLineCount - foundFullLines, lines, bufferSize/foundFullLines)); @@ -545,8 +545,8 @@ public static LongRange pagedLines(File file, long position, } public static LongRange expandRange(LongRange range1, LongRange range2) { - return new LongRange(Math.min(range1.getMinimumLong(), range2.getMinimumLong()), - Math.max(range1.getMaximumLong(), range2.getMaximumLong())); + return LongRange.of(Math.min(range1.getMinimum(), range2.getMinimum()), + Math.max(range1.getMaximum(), range2.getMaximum())); } diff --git a/src/main/java/org/archive/util/PropertyUtils.java b/src/main/java/org/archive/util/PropertyUtils.java index e29862bf..659b2820 100644 --- a/src/main/java/org/archive/util/PropertyUtils.java +++ b/src/main/java/org/archive/util/PropertyUtils.java @@ -21,7 +21,7 @@ import java.util.Properties; import java.util.regex.Matcher; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Utilities for dealing with Java Properties (incl. System Properties) diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index e67cfb48..6a7a53d7 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -34,7 +34,7 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.io.GenericReplayCharSequence; import org.archive.io.RecordingInputStream; import org.archive.io.RecordingOutputStream; diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 42e5f31f..98b471f8 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -34,7 +34,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang3.StringEscapeUtils; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; @@ -198,7 +198,7 @@ public static String getFirstWord(String s) { * @return The same string escaped. */ public static String escapeForHTMLJavascript(String s) { - return escapeForHTML(StringEscapeUtils.escapeJavaScript(s)); + return escapeForHTML(StringEscapeUtils.escapeEcmaScript(s)); } /** @@ -239,7 +239,7 @@ public static void writeEscapedForHTML(String s, Writer w) BufferedReader reader = new BufferedReader(new StringReader(s)); String line; while((line=reader.readLine()) != null){ - out.println(StringEscapeUtils.escapeHtml(line)); + out.println(StringEscapeUtils.escapeHtml3(line)); } } @@ -253,7 +253,7 @@ public static CharSequence unescapeHtml(final CharSequence cs) { return cs; } - return StringEscapeUtils.unescapeHtml(cs.toString()); + return StringEscapeUtils.unescapeHtml4(cs.toString()); } /** diff --git a/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java index f7ad75d2..791a1148 100644 --- a/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java +++ b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java @@ -24,7 +24,7 @@ import java.net.URL; import java.util.Iterator; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.io.arc.ARCWriterTest; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; diff --git a/src/test/java/org/archive/url/UsableURIFactoryTest.java b/src/test/java/org/archive/url/UsableURIFactoryTest.java index 85d423c0..8daebe12 100644 --- a/src/test/java/org/archive/url/UsableURIFactoryTest.java +++ b/src/test/java/org/archive/url/UsableURIFactoryTest.java @@ -21,7 +21,7 @@ import java.util.TreeMap; -import org.apache.commons.lang.SerializationUtils; +import org.apache.commons.lang3.SerializationUtils; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; diff --git a/src/test/java/org/archive/util/FileUtilsTest.java b/src/test/java/org/archive/util/FileUtilsTest.java index 6142913f..bd58bd09 100644 --- a/src/test/java/org/archive/util/FileUtilsTest.java +++ b/src/test/java/org/archive/util/FileUtilsTest.java @@ -27,7 +27,7 @@ import java.util.List; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.math.LongRange; +import org.apache.commons.lang3.LongRange; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -216,7 +216,7 @@ private List getTestTailLines(File file, int count, int estimate) throws LongRange range = FileUtils.pagedLines(file,pos,-count,returnedLines,estimate); Collections.reverse(returnedLines); testLines.addAll(returnedLines); - pos = range.getMinimumLong()-1; + pos = range.getMinimum()-1; } while (pos>=0); Collections.reverse(testLines); return testLines; @@ -291,7 +291,7 @@ private List getTestHeadLines(File file, int count, int estimate) throws List testLines = new LinkedList(); do { LongRange range = FileUtils.pagedLines(file,pos,count,testLines,estimate); - pos = range.getMaximumLong(); + pos = range.getMaximum(); } while (pos Date: Tue, 15 Jul 2025 16:32:28 +0900 Subject: [PATCH 089/169] Add .idea to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index fc8f67e9..feee77d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea *.pydevproject .project .metadata From 9a2b1d8d8aeaf4b31bef7c41cad54c8208362a3d Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 21 Jul 2025 12:00:45 +0900 Subject: [PATCH 090/169] Bump commons-io from 2.19.0 to 2.20.0 --- CHANGES.md | 1 + pom.xml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 54056f65..15ebd4eb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,7 @@ Users of these methods may need to make the following changes: ### Dependency upgrades +- **commons-io**: 2.19.0 → 2.20.0 - **commons-lang**: 2.6 → 3.18.0 2.0.2 diff --git a/pom.xml b/pom.xml index 93dd4514..7678c038 100644 --- a/pom.xml +++ b/pom.xml @@ -141,7 +141,7 @@ commons-io commons-io - 2.19.0 + 2.20.0 From eeb10f87759130d1677f79b99036c9bc8c010552 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 21 Jul 2025 16:42:27 +0900 Subject: [PATCH 091/169] Update CHANGES.md for 3.0.0 --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 15ebd4eb..0f66d386 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,9 @@ Unreleased ---------- +3.0.0 +----- + ### Changes `FileUtils.pagedLines()` and `FileUtils.expandRange()` now return the Apache Commons Lang 3 version of `LongRange`. From b30ff6ff2c01c66d26e6667cf3f49333e85fcd80 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 21 Jul 2025 16:43:18 +0900 Subject: [PATCH 092/169] [maven-release-plugin] prepare release webarchive-commons-3.0.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7678c038..f6224560 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 2.0.3-SNAPSHOT + 3.0.0 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-3.0.0 From 83ffa442fcf4098a22590b3e6b305638a9a74631 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 21 Jul 2025 16:43:23 +0900 Subject: [PATCH 093/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index f6224560..e64711f7 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.0 + 3.0.1-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-3.0.0 + webarchive-commons-2.0.0 From e39b5936729161cc69a64dd45f0600f70d5fd85f Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 24 Oct 2025 20:41:34 +0900 Subject: [PATCH 094/169] Enable Dependabot for monthly updates --- .github/dependabot.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..69a75a8b --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + - package-ecosystem: "maven" + directory: "/" + open-pull-requests-limit: 10 + schedule: + interval: "monthly" From b8542dd25aa3307f6a67ce1ebb74f54bef775714 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:29 +0000 Subject: [PATCH 095/169] Bump org.apache.pig:pig from 0.17.0 to 0.18.0 Bumps org.apache.pig:pig from 0.17.0 to 0.18.0. --- updated-dependencies: - dependency-name: org.apache.pig:pig dependency-version: 0.18.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e64711f7..3855017b 100644 --- a/pom.xml +++ b/pom.xml @@ -122,7 +122,7 @@ org.apache.pig pig - 0.17.0 + 0.18.0 provided From 0db2d0c09e8402531ba123deafb649aa2910f00f Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 24 Oct 2025 20:47:05 +0900 Subject: [PATCH 096/169] Remove maven-enforcer-plugin I don't think we need to worry about people using versions of Maven more than 12 years old anymore. --- pom.xml | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pom.xml b/pom.xml index 3855017b..e553e03b 100644 --- a/pom.xml +++ b/pom.xml @@ -168,27 +168,6 @@ 8 - - org.apache.maven.plugins - maven-enforcer-plugin - 1.3.1 - - - enforce-maven - - enforce - - - - - This project requires Maven 3.0.5 or higher - 3.0.5 - - - - - - From e05650d0c4befc654461c05b0f576884338af6a9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:57 +0000 Subject: [PATCH 097/169] Bump com.google.guava:guava from 33.4.8-jre to 33.5.0-jre Bumps [com.google.guava:guava](https://github.com/google/guava) from 33.4.8-jre to 33.5.0-jre. - [Release notes](https://github.com/google/guava/releases) - [Commits](https://github.com/google/guava/commits) --- updated-dependencies: - dependency-name: com.google.guava:guava dependency-version: 33.5.0-jre dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e553e03b..8e7e8eb7 100644 --- a/pom.xml +++ b/pom.xml @@ -61,7 +61,7 @@ com.google.guava guava - 33.4.8-jre + 33.5.0-jre From f3877f6ea001f23500317e67eeb7c36cf9f33a21 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:41 +0000 Subject: [PATCH 098/169] Bump commons-cli:commons-cli from 1.9.0 to 1.10.0 Bumps [commons-cli:commons-cli](https://github.com/apache/commons-cli) from 1.9.0 to 1.10.0. - [Changelog](https://github.com/apache/commons-cli/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-cli/compare/rel/commons-cli-1.9.0...rel/commons-cli-1.10.0) --- updated-dependencies: - dependency-name: commons-cli:commons-cli dependency-version: 1.10.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8e7e8eb7..6167c38c 100644 --- a/pom.xml +++ b/pom.xml @@ -90,7 +90,7 @@ commons-cli commons-cli - 1.9.0 + 1.10.0 From 83af1301a554e7d4ea4f002d6ddfa6a7359445f4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:47 +0000 Subject: [PATCH 099/169] Bump org.apache.commons:commons-lang3 from 3.18.0 to 3.19.0 Bumps org.apache.commons:commons-lang3 from 3.18.0 to 3.19.0. --- updated-dependencies: - dependency-name: org.apache.commons:commons-lang3 dependency-version: 3.19.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6167c38c..acc795a6 100644 --- a/pom.xml +++ b/pom.xml @@ -135,7 +135,7 @@ org.apache.commons commons-lang3 - 3.18.0 + 3.19.0 From 797f3adbbc226d8223b4eca3ebb645df3b395a34 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:37 +0000 Subject: [PATCH 100/169] Bump org.apache.maven.plugins:maven-gpg-plugin from 3.2.7 to 3.2.8 Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 3.2.7 to 3.2.8. - [Release notes](https://github.com/apache/maven-gpg-plugin/releases) - [Commits](https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-3.2.7...maven-gpg-plugin-3.2.8) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-gpg-plugin dependency-version: 3.2.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index acc795a6..4c2470bd 100644 --- a/pom.xml +++ b/pom.xml @@ -227,7 +227,7 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.7 + 3.2.8 sign-artifacts From 3f4e9dbf09e06a0dc9e690a889f54703106f9ec2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:45 +0000 Subject: [PATCH 101/169] Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.11.2 to 3.12.0 Bumps [org.apache.maven.plugins:maven-javadoc-plugin](https://github.com/apache/maven-javadoc-plugin) from 3.11.2 to 3.12.0. - [Release notes](https://github.com/apache/maven-javadoc-plugin/releases) - [Commits](https://github.com/apache/maven-javadoc-plugin/compare/maven-javadoc-plugin-3.11.2...maven-javadoc-plugin-3.12.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-javadoc-plugin dependency-version: 3.12.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4c2470bd..358b0e26 100644 --- a/pom.xml +++ b/pom.xml @@ -214,7 +214,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.11.2 + 3.12.0 attach-javadocs From 6225cc811a8562fe7bb837523b7a93169a284c39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:52 +0000 Subject: [PATCH 102/169] Bump org.sonatype.central:central-publishing-maven-plugin Bumps [org.sonatype.central:central-publishing-maven-plugin](https://github.com/sonatype/central-publishing-maven-plugin) from 0.7.0 to 0.9.0. - [Commits](https://github.com/sonatype/central-publishing-maven-plugin/commits) --- updated-dependencies: - dependency-name: org.sonatype.central:central-publishing-maven-plugin dependency-version: 0.9.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 358b0e26..d41205f7 100644 --- a/pom.xml +++ b/pom.xml @@ -187,7 +187,7 @@ org.sonatype.central central-publishing-maven-plugin - 0.7.0 + 0.9.0 true central From a3a46baa6134a12c971508ecbbf7e0822ec2d014 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 24 Oct 2025 20:51:52 +0900 Subject: [PATCH 103/169] Test on JDK 25 instead of 22 --- .github/workflows/maven.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 2caac444..2421cef3 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -13,7 +13,7 @@ jobs: build: strategy: matrix: - jdk: [8, 11, 17, 21, 22] + jdk: [8, 11, 17, 21, 25] runs-on: ubuntu-latest timeout-minutes: 30 @@ -34,4 +34,4 @@ jobs: restore-keys: | ${{ runner.os }}-maven- - name: Build with Maven - run: mvn -B package --file pom.xml \ No newline at end of file + run: mvn -B package --file pom.xml From 292f4796fb54adcf48f9e74e6e50d7a881cc4484 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:42:58 +0000 Subject: [PATCH 104/169] Bump org.apache.hadoop:hadoop-common from 3.4.1 to 3.4.2 Bumps org.apache.hadoop:hadoop-common from 3.4.1 to 3.4.2. --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-common dependency-version: 3.4.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d41205f7..7755f544 100644 --- a/pom.xml +++ b/pom.xml @@ -96,7 +96,7 @@ org.apache.hadoop hadoop-common - 3.4.1 + 3.4.2 true From 8144985a4a97018f5f46851aff596c60ab1faac5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:55:04 +0000 Subject: [PATCH 105/169] Bump commons-codec:commons-codec from 1.18.0 to 1.19.0 Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.18.0 to 1.19.0. - [Changelog](https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-codec/compare/rel/commons-codec-1.18.0...rel/commons-codec-1.19.0) --- updated-dependencies: - dependency-name: commons-codec:commons-codec dependency-version: 1.19.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7755f544..d6cffe8a 100644 --- a/pom.xml +++ b/pom.xml @@ -84,7 +84,7 @@ commons-codec commons-codec - 1.18.0 + 1.19.0 From ae7a8e7caa8c24ae947c11db808aa302fa4efe35 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:55:10 +0000 Subject: [PATCH 106/169] Bump org.apache.maven.plugins:maven-compiler-plugin Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.14.0 to 3.14.1. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.14.0...maven-compiler-plugin-3.14.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-compiler-plugin dependency-version: 3.14.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d6cffe8a..a7f88430 100644 --- a/pom.xml +++ b/pom.xml @@ -162,7 +162,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.14.0 + 3.14.1 8 8 From b0c88879c406486cb6af4304c0ae1054d2c4b123 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:55:17 +0000 Subject: [PATCH 107/169] Bump org.apache.hadoop:hadoop-mapreduce-client-core from 3.4.1 to 3.4.2 Bumps org.apache.hadoop:hadoop-mapreduce-client-core from 3.4.1 to 3.4.2. --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-mapreduce-client-core dependency-version: 3.4.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a7f88430..a6cf26cd 100644 --- a/pom.xml +++ b/pom.xml @@ -109,7 +109,7 @@ org.apache.hadoop hadoop-mapreduce-client-core - 3.4.1 + 3.4.2 true From 5a68ead883eaf55dbbc950988142c9cf3a770c22 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 27 Oct 2025 09:05:19 +0900 Subject: [PATCH 108/169] FileUtils: Ensure streams are closed with a try-block --- src/main/java/org/archive/util/FileUtils.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java index b7bdcee8..70b5ffae 100644 --- a/src/main/java/org/archive/util/FileUtils.java +++ b/src/main/java/org/archive/util/FileUtils.java @@ -424,12 +424,12 @@ public static LongRange pagedLines(File file, long position, } // read that reasonable chunk - FileInputStream fis = new FileInputStream(file); - fis.getChannel().position(startPosition); byte[] buf = new byte[bufferSize]; - ArchiveUtils.readFully(fis, buf); - IOUtils.closeQuietly(fis); - + try (FileInputStream fis = new FileInputStream(file)) { + fis.getChannel().position(startPosition); + ArchiveUtils.readFully(fis, buf); + } + // find all line starts fully in buffer // (positions after a line-end, per line-end definition in // BufferedReader.readLine) @@ -700,13 +700,12 @@ public static File tryToCanonicalize(File file) { public static void appendTo(File fileToAppendTo, File fileToAppendFrom) throws IOException { // optimal io block size according to http://lingrok.org/xref/coreutils/src/ioblksize.h byte[] buf = new byte[65536]; - FileOutputStream out = new FileOutputStream(fileToAppendTo, true); - FileInputStream in = new FileInputStream(fileToAppendFrom); - for (int n = in.read(buf); n > 0; n = in.read(buf)) { - out.write(buf, 0, n); - } - in.close(); - out.flush(); - out.close(); + try (FileInputStream in = new FileInputStream(fileToAppendFrom); + FileOutputStream out = new FileOutputStream(fileToAppendTo, true)) { + for (int n = in.read(buf); n > 0; n = in.read(buf)) { + out.write(buf, 0, n); + } + out.flush(); + } } } \ No newline at end of file From fa22e4854d6f3c84eb28e84d65fc1306431693f4 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 27 Oct 2025 09:48:45 +0900 Subject: [PATCH 109/169] Update CHANGES.md for 3.0.1 release --- CHANGES.md | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0f66d386..21b30eb1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,24 @@ Unreleased ---------- -3.0.0 ------ +3.0.1 (2025-10-27) +------------------ + +### Fixes + +* Fixed a file handle leak in `FileUtils.pagedLines()` and `FileUtils.appendTo()` that could occur during I/O errors. + +### Dependency Upgrades + +* **commons-codec**: 1.18.0 → 1.19.0 +* **commons-lang3**: 3.18.0 → 3.19.0 +* **commons-cli**: 1.9.0 → 1.10.0 +* **guava**: 33.4.8-jre → 33.5.0-jre +* **hadoop**: 3.4.1 → 3.4.2 +* **pig**: 0.17.0 → 0.18.0 + +3.0.0 (2025-07-21) +------------------ ### Changes @@ -21,8 +37,8 @@ Users of these methods may need to make the following changes: - **commons-io**: 2.19.0 → 2.20.0 - **commons-lang**: 2.6 → 3.18.0 -2.0.2 ------ +2.0.2 (2025-07-15) +------------------ ### Fixes @@ -31,16 +47,16 @@ Users of these methods may need to make the following changes: * Fixed parsing failures with newer list versions. * Moved `effective_tld_names.dat` to `org/archive/effective_tld_names.dat` to prevent conflict with `crawler-commons`. -2.0.1 ------ +2.0.1 (2025-05-21) +------------------ ### Changes * Re-added `Reporter.shortReportLineTo(PrintWriter)` as it turned out to be important to Heritrix. -2.0.0 ------ +2.0.0 (2025-05-21) +------------------ ### New features @@ -115,8 +131,8 @@ Static imports should be used instead. - **json**: 20240303 → 20250517 - **junit**: 4.13.2 → 5.12.2 -1.3.0 ------ +1.3.0 (2024-12-20) +------------------ #### URL Canonicalization Changed @@ -152,8 +168,8 @@ Apache Commons HttpClient 3.1. * org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory * org.archive.util.binsearch.impl.http.HTTPSeekableLineReaderFactory.HttpLibs.APACHE_31 -1.2.0 ------ +1.2.0 (2024-11-29) +------------------ #### New features @@ -170,15 +186,15 @@ Apache Commons HttpClient 3.1. * json 20240303 * junit 4.13.2 -1.1.11 ------- +1.1.11 (2024-11-27) +------------------- #### Bug fixes * Fixed URLParser and WaybackURLKeyMaker failing on URLs with IPv6 address hostnames [#100](https://github.com/iipc/webarchive-commons/pull/100) -1.1.10 ------- +1.1.10 (2024-10-15) +------------------- #### Bug fixes From 381c878f804121d977e06e66718c1d939d677fc2 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 27 Oct 2025 09:50:19 +0900 Subject: [PATCH 110/169] [maven-release-plugin] prepare release webarchive-commons-3.0.1 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index a6cf26cd..5e20e918 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.1-SNAPSHOT + 3.0.1 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-3.0.1 From 856973739aeae3584265ccb2b5be1f6443d8dda4 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Mon, 27 Oct 2025 09:50:25 +0900 Subject: [PATCH 111/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 5e20e918..4fc92ec4 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.1 + 3.0.2-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-3.0.1 + webarchive-commons-2.0.0 From 7e24a1905d266d0cb55de06e160619260c640afd Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 12:10:47 +0100 Subject: [PATCH 112/169] Require a minimum Maven surefire plugin version --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pom.xml b/pom.xml index 4fc92ec4..73ba9ba2 100644 --- a/pom.xml +++ b/pom.xml @@ -168,6 +168,11 @@ 8 + + org.apache.maven.plugins + maven-surefire-plugin + 3.2.5 + From 3ef28b5dd76249ae7e9f9fdc028e94edef566d0f Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 15:45:52 +0100 Subject: [PATCH 113/169] BasicURLCanonicalizer: more efficient normalization of dots in host name --- .../archive/url/BasicURLCanonicalizer.java | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index 37b448c1..fe2e0d42 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -34,7 +34,9 @@ public class BasicURLCanonicalizer implements URLCanonicalizer { .compile("^(0[0-7]*)(\\.[0-7]+)?(\\.[0-7]+)?(\\.[0-7]+)?$"); Pattern DECIMAL_IP = Pattern .compile("^([1-9][0-9]*)(\\.[0-9]+)?(\\.[0-9]+)?(\\.[0-9]+)?$"); + Pattern MULTIDOT = Pattern.compile("\\.{2,}"); + @Override public void canonicalize(HandyURL url) { url.setHash(null); url.setAuthUser(minimalEscape(url.getAuthUser())); @@ -55,8 +57,7 @@ public void canonicalize(HandyURL url) { host = hostE; } - host = host.replaceAll("^\\.+", "").replaceAll("\\.\\.+", ".") - .replaceAll("\\.$", ""); + host = normalizeDots(host); } String ip = null; @@ -74,6 +75,36 @@ public void canonicalize(HandyURL url) { url.setPath(escapeOnce(normalizePath(path))); } + /** + * Normalize dots in the host name. + * + * @param host + * @return host name with all sequences of dots replaced with a single dot, + * and all leading and trailing dots removed + */ + private String normalizeDots(String host) { + if (host.indexOf('.') == -1) { + return host; + } + int start = 0, end = host.length(); + boolean changed = false; + while (host.charAt(start) == '.') { + start++; + changed = true; + } + while (host.charAt(end - 1) == '.') { + end--; + changed = true; + } + if (changed) { + host = host.substring(start, end); + } + if (host.contains("..")) { + host = MULTIDOT.matcher(host).replaceAll("."); + } + return host; + } + private static final Pattern SINGLE_FORWARDSLASH_PATTERN = Pattern .compile("/"); From bf3fcb9e787ae7f4f740416a91c2d74b79f31fe7 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 15:11:12 +0100 Subject: [PATCH 114/169] Add forbiddenAPIs Maven plugin to fail the build when methods relying on default locale are charset are used. Also forbid usage of URL.equals and .hashCode which may resolve host named per DNS lookup. --- pom.xml | 25 +++++++++++++++++++ .../resources/forbidden-apis-signatures.txt | 2 ++ 2 files changed, 27 insertions(+) create mode 100644 src/test/resources/forbidden-apis-signatures.txt diff --git a/pom.xml b/pom.xml index 73ba9ba2..c1c17e9b 100644 --- a/pom.xml +++ b/pom.xml @@ -173,6 +173,31 @@ maven-surefire-plugin 3.2.5 + + de.thetaphi + forbiddenapis + 3.10 + + + false + + jdk-unsafe + jdk-deprecated + jdk-non-portable + + + src/test/resources/forbidden-apis-signatures.txt + + + + + + check + testCheck + + + + diff --git a/src/test/resources/forbidden-apis-signatures.txt b/src/test/resources/forbidden-apis-signatures.txt new file mode 100644 index 00000000..1eda9eec --- /dev/null +++ b/src/test/resources/forbidden-apis-signatures.txt @@ -0,0 +1,2 @@ +java.net.URL#equals(java.lang.Object) @ may trigger a DNS lookup to resolve the host part +java.net.URL#hashCode() @ may trigger a DNS lookup to resolve the host part From c94928e324b633a882783b72c99b4e24a8a23bbb Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 18:00:01 +0100 Subject: [PATCH 115/169] Add Locale.ROOT as parameter to all occurrences of String.toLowerCase(), String.toUpperCase() and String.format(...) --- .../extract/ExtractingResourceFactoryMapper.java | 11 ++++++----- .../extract/ExtractingResourceProducer.java | 3 ++- .../archive/extract/RealCDXExtractorOutput.java | 15 ++++++++------- .../org/archive/extract/ResourceExtractor.java | 7 ++++--- .../org/archive/extract/WATExtractorOutput.java | 3 ++- .../org/archive/format/gzip/GZIPMemberSeries.java | 9 +++++---- .../java/org/archive/format/http/HttpHeader.java | 3 ++- .../java/org/archive/format/http/HttpHeaders.java | 5 +++-- .../archive/format/http/HttpMessageParser.java | 7 ++++--- .../format/http/HttpRequestMessageParser.java | 3 ++- .../archive/format/http/HttpResponseMessage.java | 6 ++++-- .../archive/format/json/CrossProductOfLists.java | 7 ++++--- .../java/org/archive/format/json/JSONView.java | 3 ++- .../format/text/charset/CharsetDetector.java | 7 ++++--- .../org/archive/format/text/html/NodeUtils.java | 10 ++++++---- .../org/archive/hadoop/ArchiveMetadataLoader.java | 3 ++- .../org/archive/hadoop/ResourceRecordReader.java | 5 +++-- src/main/java/org/archive/io/ArchiveReader.java | 5 +++-- .../java/org/archive/io/ArchiveReaderFactory.java | 5 +++-- src/main/java/org/archive/io/ArchiveRecord.java | 3 ++- .../org/archive/io/HeaderedArchiveRecord.java | 5 +++-- src/main/java/org/archive/io/arc/ARCReader.java | 3 ++- .../java/org/archive/io/arc/ARCReaderFactory.java | 9 +++++---- src/main/java/org/archive/io/arc/ARCRecord.java | 3 ++- src/main/java/org/archive/io/arc/ARCUtils.java | 5 +++-- src/main/java/org/archive/io/warc/WARCReader.java | 5 +++-- .../org/archive/io/warc/WARCReaderFactory.java | 7 ++++--- src/main/java/org/archive/net/PublicSuffixes.java | 3 ++- .../resource/generic/GenericResourceProducer.java | 3 ++- .../resource/gzip/GZIPResourceContainer.java | 3 ++- .../org/archive/resource/warc/WARCResource.java | 3 ++- .../org/archive/streamcontext/HTTP11Stream.java | 3 ++- .../org/archive/url/BasicURLCanonicalizer.java | 9 +++++---- src/main/java/org/archive/url/HandyURL.java | 3 ++- .../java/org/archive/url/IAURLCanonicalizer.java | 11 ++++++----- src/main/java/org/archive/url/LaxURI.java | 3 ++- src/main/java/org/archive/url/URI.java | 4 ++-- .../java/org/archive/url/URLRegexTransformer.java | 3 ++- .../java/org/archive/url/UsableURIFactory.java | 5 +++-- src/main/java/org/archive/util/ArchiveUtils.java | 6 +++--- src/main/java/org/archive/util/FileNameSpec.java | 3 ++- src/main/java/org/archive/util/FileUtils.java | 9 +++++---- src/main/java/org/archive/util/Recorder.java | 5 +++-- src/main/java/org/archive/util/SurtPrefixSet.java | 5 +++-- .../archive/util/binsearch/SortedTextFile.java | 7 ++++--- 45 files changed, 145 insertions(+), 100 deletions(-) diff --git a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java index 0afe16fb..567b1cd8 100644 --- a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java +++ b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.util.Iterator; +import java.util.Locale; import java.util.logging.Logger; import org.archive.format.arc.ARCConstants; @@ -68,14 +69,14 @@ private boolean childFieldStartsWith(MetaData m, String child, String key, String search) { String val = getChildField(m,child,key); return val == null ? false : - val.toLowerCase().startsWith(search.toLowerCase()); + val.toLowerCase(Locale.ROOT).startsWith(search.toLowerCase(Locale.ROOT)); } private boolean childFieldContains(MetaData m, String child, String key, String search) { String val = getChildField(m,child,key); return val == null ? false : - val.toLowerCase().contains(search.toLowerCase()); + val.toLowerCase(Locale.ROOT).contains(search.toLowerCase(Locale.ROOT)); } private boolean childFieldEquals(MetaData m, String child, @@ -88,7 +89,7 @@ private boolean childFieldEquals(MetaData m, String child, private String caseInsensitiveKeyScan(MetaData m, String child, String k) { try { if(m.has(child)) { - String kLC = k.toLowerCase(); + String kLC = k.toLowerCase(Locale.ROOT); JSONObject childJSObj = m.getJSONObject(child); @SuppressWarnings("rawtypes") Iterator i = childJSObj.keys(); @@ -96,7 +97,7 @@ private String caseInsensitiveKeyScan(MetaData m, String child, String k) { Object kObj = i.next(); if(kObj instanceof String) { String kString = (String) kObj; - if(kString.toLowerCase().equals(kLC)) { + if(kString.toLowerCase(Locale.ROOT).equals(kLC)) { return childJSObj.getString(kString); } } @@ -128,7 +129,7 @@ private boolean isHTTPARCResource(MetaData envelope) { private boolean isHTMLHttpResource(MetaData m) { String type = caseInsensitiveKeyScan(m,HTTP_HEADERS_LIST, "Content-Type"); - return type == null ? false : type.toLowerCase().contains("html"); + return type == null ? false : type.toLowerCase(Locale.ROOT).contains("html"); } private boolean isWARCType(MetaData envelope, WARCRecordType type) { diff --git a/src/main/java/org/archive/extract/ExtractingResourceProducer.java b/src/main/java/org/archive/extract/ExtractingResourceProducer.java index de671bee..07cdb88a 100644 --- a/src/main/java/org/archive/extract/ExtractingResourceProducer.java +++ b/src/main/java/org/archive/extract/ExtractingResourceProducer.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -33,7 +34,7 @@ public Resource getNext() throws ResourceParseException, IOException { return current; } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format("Extracting (%s) with (%s)\n", + LOG.info(String.format(Locale.ROOT, "Extracting (%s) with (%s)\n", current.getClass().toString(), f.getClass().toString())); } diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index e6f6e82f..b8f06034 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -8,6 +8,7 @@ import java.net.URISyntaxException; import java.net.URL; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -131,7 +132,7 @@ public void output(Resource resource) throws IOException { } else { meta = "-"; } - if(mime.toLowerCase().contains("html")) { + if(mime.toLowerCase(Locale.ROOT).contains("html")) { if(redir.equals("-")) { // maybe an obvious meta-refresh? redir = extractHTMLMetaRefresh(origUrl,m); @@ -202,7 +203,7 @@ public void output(Resource resource) throws IOException { } else { meta = "-"; } - if(mime.toLowerCase().contains("html")) { + if(mime.toLowerCase(Locale.ROOT).contains("html")) { if(redir.equals("-")) { // maybe an obvious meta-refresh? redir = extractHTMLMetaRefresh(origUrl,m); @@ -269,7 +270,7 @@ private String extractHTMLRobots(MetaData m) { if(meta != null) { String name = scanHeadersLC(meta, "name", null); if(name != null) { - if(name.toLowerCase().equals("robots")) { + if(name.toLowerCase(Locale.ROOT).equals("robots")) { // alright - some robot instructions: String content = scanHeadersLC(meta, "content", null); if(content != null) { @@ -291,7 +292,7 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) { if(meta != null) { String name = scanHeadersLC(meta, "http-equiv", null); if(name != null) { - if(name.toLowerCase().equals("refresh")) { + if(name.toLowerCase(Locale.ROOT).equals("refresh")) { // alright - some robot instructions: String content = scanHeadersLC(meta, "content", null); if(content != null) { @@ -330,7 +331,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) { if(o.length() == 0) { return defaultVal; } - String lc = match.toLowerCase().trim(); + String lc = match.toLowerCase(Locale.ROOT).trim(); // try { // System.err.println("REC:" + o.toString(1)); // } catch (JSONException e1) { @@ -338,7 +339,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) { // e1.printStackTrace(); // } for(String key : JSONObject.getNames(o)) { - if(lc.equals(key.toLowerCase().trim())) { + if(lc.equals(key.toLowerCase(Locale.ROOT).trim())) { try { return o.getString(key).trim(); } catch (JSONException e) { @@ -472,7 +473,7 @@ private String parseRobotInstructions(String input) { if(input == null) { return "-"; } - String up = input.replaceAll("-", "").toUpperCase(); + String up = input.replaceAll("-", "").toUpperCase(Locale.ROOT); StringBuilder sb = new StringBuilder(3); if(up.contains(NO_FOLLOW_MATCH)) { sb.append("F"); diff --git a/src/main/java/org/archive/extract/ResourceExtractor.java b/src/main/java/org/archive/extract/ResourceExtractor.java index 2812aa5b..a6fa0a00 100644 --- a/src/main/java/org/archive/extract/ResourceExtractor.java +++ b/src/main/java/org/archive/extract/ResourceExtractor.java @@ -8,6 +8,7 @@ import java.io.PrintWriter; import java.net.URISyntaxException; import java.nio.charset.Charset; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -138,7 +139,7 @@ public int run(String[] args) out.output(r); } catch(GZIPFormatException e) { - LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); @@ -147,7 +148,7 @@ public int run(String[] args) } e.printStackTrace(); } catch(ResourceParseException e) { - LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); @@ -157,7 +158,7 @@ public int run(String[] args) e.printStackTrace(); } catch(RecoverableRecordFormatException e) { // this should not get here - ResourceFactory et al should wrap as ResourceParseExceptions... - LOG.severe(String.format("RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index 4b5f72ed..dbe979e5 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -10,6 +10,7 @@ import java.text.ParseException; import java.net.UnknownHostException; import java.util.Date; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberWriter; import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream; @@ -143,7 +144,7 @@ private void writeARC(OutputStream recOut, MetaData md) throws IOException { String capDateString = extractOrIO(md, "Envelope.ARC-Header-Metadata.Date"); String filename = extractOrIO(md, "Container.Filename"); String offset = extractOrIO(md, "Container.Offset"); - String recId = String.format("",filename,offset); + String recId = String.format(Locale.ROOT, "",filename,offset); writeWARCMDRecord(recOut,md,targetURI,capDateString,recId); } diff --git a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java index d70bf394..154cf5f1 100644 --- a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java +++ b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.Inflater; @@ -227,7 +228,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Got EOF after %d bytes before finding magic in %s\n", amtSkipped * -1, streamContext)); } @@ -237,7 +238,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException if(amtSkipped > 0) { if(strict) { if(state == STATE_START) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Strict mode Skipped %d bytes in (%s) before finding magic at offset(%d)\n", amtSkipped, streamContext, offset-3)); } else { @@ -248,7 +249,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Skipped %d bytes in (%s) before finding magic at offset(%d)\n", amtSkipped, streamContext, offset-3)); } @@ -268,7 +269,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } offset = currentMemberStartOffset + 3; stream.setOffset(currentMemberStartOffset + 3); - LOG.warning(String.format( + LOG.warning(String.format(Locale.ROOT, "GZIPFormatException with record around offset(%d) in (%s)\n", offset, streamContext)); } diff --git a/src/main/java/org/archive/format/http/HttpHeader.java b/src/main/java/org/archive/format/http/HttpHeader.java index 57b70e1f..9ebe860f 100755 --- a/src/main/java/org/archive/format/http/HttpHeader.java +++ b/src/main/java/org/archive/format/http/HttpHeader.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.Locale; public class HttpHeader implements HttpConstants { private String name = null; @@ -27,7 +28,7 @@ public void write(OutputStream out) throws IOException { public String toString() { StringBuilder sb = new StringBuilder(name.length() + value.length()+20); - sb.append(String.format("HttpHeader(%s)(%s)",name,value)); + sb.append(String.format(Locale.ROOT, "HttpHeader(%s)(%s)",name,value)); return sb.toString(); } } diff --git a/src/main/java/org/archive/format/http/HttpHeaders.java b/src/main/java/org/archive/format/http/HttpHeaders.java index ed8061d7..a65dd8fb 100755 --- a/src/main/java/org/archive/format/http/HttpHeaders.java +++ b/src/main/java/org/archive/format/http/HttpHeaders.java @@ -4,6 +4,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Date; +import java.util.Locale; import java.util.logging.Logger; import org.archive.util.ByteOp; @@ -54,9 +55,9 @@ public String getValue(String name) { } public String getValueCaseInsensitive(String name) { - String lc = name.toLowerCase(); + String lc = name.toLowerCase(Locale.ROOT); for(HttpHeader h : this) { - if(h.getName().toLowerCase().equals(lc)) { + if(h.getName().toLowerCase(Locale.ROOT).equals(lc)) { return h.getValue(); } } diff --git a/src/main/java/org/archive/format/http/HttpMessageParser.java b/src/main/java/org/archive/format/http/HttpMessageParser.java index c4fcdf92..24e59e03 100644 --- a/src/main/java/org/archive/format/http/HttpMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpMessageParser.java @@ -1,5 +1,6 @@ package org.archive.format.http; +import java.util.Locale; public class HttpMessageParser implements HttpConstants { @@ -22,11 +23,11 @@ protected int parseVersionLax(byte buf[], int start, int len) throws HttpParseException { String v = new String(buf,start,len,UTF8); - if(v.toLowerCase().compareTo(VERSION_0_STATUS.toLowerCase()) == 0) { + if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_0_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_0; - } else if(v.toLowerCase().compareTo(VERSION_1_STATUS.toLowerCase()) == 0) { + } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_1_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_1; - } else if(v.toLowerCase().compareTo(VERSION_9_STATUS.toLowerCase()) == 0) { + } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_9_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_9; } return VERSION_0; diff --git a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java index f7bc43c7..759bbe5d 100644 --- a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Locale; public class HttpRequestMessageParser extends HttpMessageParser { public int maxBytes = 1024 * 1024; @@ -223,7 +224,7 @@ protected int parseMethodStrict(byte buf[], int start, int len) protected int parseMethodLax(byte buf[], int start, int len) throws HttpParseException { - String v = new String(buf,start,len,UTF8).toUpperCase(); + String v = new String(buf,start,len,UTF8).toUpperCase(Locale.ROOT); if(v.compareTo(METHOD_GET_STRING) == 0) { return METHOD_GET; } else if(v.compareTo(METHOD_HEAD_STRING) == 0) { diff --git a/src/main/java/org/archive/format/http/HttpResponseMessage.java b/src/main/java/org/archive/format/http/HttpResponseMessage.java index 0cb7b7e5..6d3f5c35 100755 --- a/src/main/java/org/archive/format/http/HttpResponseMessage.java +++ b/src/main/java/org/archive/format/http/HttpResponseMessage.java @@ -1,5 +1,7 @@ package org.archive.format.http; +import java.util.Locale; + public class HttpResponseMessage extends HttpMessage implements HttpResponseMessageObserver { private int status = 0; private String reason = null; @@ -20,10 +22,10 @@ public String getReason() { return reason; } public String toString() { - return String.format("%s %d %s%s", getVersionString(), status, reason, CRLF); + return String.format(Locale.ROOT, "%s %d %s%s", getVersionString(), status, reason, CRLF); } public String toDebugString() { - return String.format("Message(%d):(%s) (%d) (%s)\n", + return String.format(Locale.ROOT, "Message(%d):(%s) (%d) (%s)\n", reason.length(),getVersionString(),status,reason,CRLF); } diff --git a/src/main/java/org/archive/format/json/CrossProductOfLists.java b/src/main/java/org/archive/format/json/CrossProductOfLists.java index f9e2abd2..69cdae33 100644 --- a/src/main/java/org/archive/format/json/CrossProductOfLists.java +++ b/src/main/java/org/archive/format/json/CrossProductOfLists.java @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.Deque; import java.util.List; +import java.util.Locale; import java.util.Stack; import java.util.logging.Level; import java.util.logging.Logger; @@ -18,12 +19,12 @@ public List> crossProduct(List>> listOfLists) { if(LOG.isLoggable(Level.INFO)) { int count = listOfLists.size(); - LOG.info(String.format("Total of (%d) lists to cross product",count)); + LOG.info(String.format(Locale.ROOT, "Total of (%d) lists to cross product",count)); for(int i = 0; i < count; i++) { - LOG.info(String.format("Field (%d) is (%d) deep",i,listOfLists.get(i).size())); + LOG.info(String.format(Locale.ROOT, "Field (%d) is (%d) deep",i,listOfLists.get(i).size())); for(List inner : listOfLists.get(i)) { LOG.info( - String.format("----(%d):(%s)" + String.format(Locale.ROOT, "----(%d):(%s)" ,i,StringUtils.join(inner.toArray(),",") ) ); } } diff --git a/src/main/java/org/archive/format/json/JSONView.java b/src/main/java/org/archive/format/json/JSONView.java index 7a984ebe..444ea7e6 100644 --- a/src/main/java/org/archive/format/json/JSONView.java +++ b/src/main/java/org/archive/format/json/JSONView.java @@ -2,6 +2,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -28,7 +29,7 @@ public class JSONView { public JSONView(String... pathSpecs) { this.pathSpecs = new ArrayList(pathSpecs.length); if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format("Creating JSONView with(%s)", + LOG.info(String.format(Locale.ROOT, "Creating JSONView with(%s)", StringUtils.join(pathSpecs,","))); } for(String pathSpec : pathSpecs) { diff --git a/src/main/java/org/archive/format/text/charset/CharsetDetector.java b/src/main/java/org/archive/format/text/charset/CharsetDetector.java index 214fde07..49286764 100644 --- a/src/main/java/org/archive/format/text/charset/CharsetDetector.java +++ b/src/main/java/org/archive/format/text/charset/CharsetDetector.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -106,7 +107,7 @@ protected boolean isCharsetSupported(String charsetName) { } } protected String mapCharset(String orig) { - String lc = orig.toLowerCase(); + String lc = orig.toLowerCase(Locale.ROOT); if(lc.contains("iso8859-1") || lc.contains("iso-8859-1")) { return "cp1252"; } @@ -114,7 +115,7 @@ protected String mapCharset(String orig) { } protected String contentTypeToCharset(final String contentType) { int offset = - contentType.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase()); + contentType.toUpperCase(Locale.ROOT).indexOf(CHARSET_TOKEN.toUpperCase(Locale.ROOT)); if (offset != -1) { String cs = contentType.substring(offset + CHARSET_TOKEN.length()); @@ -148,7 +149,7 @@ protected String getCharsetFromHeaders(HttpHeaders headers) return null; } for(HttpHeader header : headers) { - if(header.getName().toUpperCase().trim().equals( + if(header.getName().toUpperCase(Locale.ROOT).trim().equals( HTTP_CONTENT_TYPE_HEADER)) { return contentTypeToCharset(header.getValue()); } diff --git a/src/main/java/org/archive/format/text/html/NodeUtils.java b/src/main/java/org/archive/format/text/html/NodeUtils.java index 625d9099..f231b91a 100644 --- a/src/main/java/org/archive/format/text/html/NodeUtils.java +++ b/src/main/java/org/archive/format/text/html/NodeUtils.java @@ -19,6 +19,8 @@ */ package org.archive.format.text.html; +import java.util.Locale; + import org.htmlparser.Node; import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; @@ -41,7 +43,7 @@ public static boolean isTagNodeNamed(Node node, String name) { if(isTagNode(node)) { TagNode tagNode = (TagNode) node; String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } return false; } @@ -50,7 +52,7 @@ public static boolean isOpenTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(!tagNode.isEndTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; @@ -60,7 +62,7 @@ public static boolean isNonEmptyOpenTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(!tagNode.isEndTag() && !tagNode.isEmptyXmlTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; @@ -70,7 +72,7 @@ public static boolean isCloseTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(tagNode.isEndTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; diff --git a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java index 37c8af99..a3cbb26c 100644 --- a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java +++ b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Locale; import java.util.logging.Logger; import org.apache.hadoop.mapreduce.InputFormat; @@ -54,7 +55,7 @@ public Tuple getNext() throws IOException { try { key = reader.getCurrentKey(); - LOG.info(String.format("Loaded key-offset %d\n", key.offset)); + LOG.info(String.format(Locale.ROOT, "Loaded key-offset %d\n", key.offset)); value = reader.getCurrentValue(); } catch (InterruptedException e) { // is this needed and the right way? diff --git a/src/main/java/org/archive/hadoop/ResourceRecordReader.java b/src/main/java/org/archive/hadoop/ResourceRecordReader.java index 06d3ce2e..88b93dd2 100644 --- a/src/main/java/org/archive/hadoop/ResourceRecordReader.java +++ b/src/main/java/org/archive/hadoop/ResourceRecordReader.java @@ -1,6 +1,7 @@ package org.archive.hadoop; import java.io.IOException; +import java.util.Locale; import java.util.logging.Logger; import org.apache.hadoop.fs.FSDataInputStream; @@ -111,7 +112,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException { if(r != null) { StreamCopy.readToEOF(r.getInputStream()); - LOG.info(String.format("Extracted offset %d\n", + LOG.info(String.format(Locale.ROOT, "Extracted offset %d\n", series.getCurrentMemberStartOffset())); cachedK = new ResourceContext(name, series.getCurrentMemberStartOffset()); @@ -121,7 +122,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException { } catch (ResourceParseException e) { e.printStackTrace(); throw new IOException( - String.format("ResourceParseException at(%s)(%d)", + String.format(Locale.ROOT, "ResourceParseException at(%s)(%d)", name,series.getCurrentMemberStartOffset()), e); } diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 449cdc24..53b8167b 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -615,7 +616,7 @@ protected static boolean getTrueOrFalse(final String value) { if (value == null || value.length() <= 0) { return false; } - return Boolean.TRUE.toString().equals(value.toLowerCase()); + return Boolean.TRUE.toString().equals(value.toLowerCase(Locale.ROOT)); } /** @@ -757,4 +758,4 @@ protected static Options getOptions() { "'or 'nohead'. Default: 'cdx'.")); return options; } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java index bc316893..fe72236b 100644 --- a/src/main/java/org/archive/io/ArchiveReaderFactory.java +++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java @@ -25,6 +25,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; +import java.util.Locale; import org.archive.io.arc.ARCReaderFactory; import org.archive.io.warc.WARCReaderFactory; @@ -296,7 +297,7 @@ protected void addUserAgent(final HttpURLConnection connection) { * @throws IOException */ protected boolean isCompressed(final File f) throws IOException { - return f.getName().toLowerCase(). + return f.getName().toLowerCase(Locale.ROOT). endsWith(DOT_COMPRESSED_FILE_EXTENSION); } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index 4bd1fa02..01e8d5ec 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -23,6 +23,7 @@ import java.io.OutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Locale; import java.util.logging.Level; import org.archive.format.ArchiveFileConstants; @@ -393,7 +394,7 @@ public boolean hasContentHeaders() { return false; } - if (!url.toLowerCase().startsWith("http")) { + if (!url.toLowerCase(Locale.ROOT).startsWith("http")) { return false; } diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index 809a9e54..70c4fb04 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; +import java.util.Locale; import org.archive.format.http.HttpHeader; import org.archive.format.arc.ARCConstants; @@ -156,8 +157,8 @@ private InputStream readContentHeaders() throws IOException { boolean isHttpResponse = statusLine.startsWith("HTTP"); boolean isHttpRequest = false; if (!isHttpResponse) { - isHttpRequest = statusLine.toUpperCase().startsWith("GET") || - !statusLine.toUpperCase().startsWith("POST"); + isHttpRequest = statusLine.toUpperCase(Locale.ROOT).startsWith("GET") || + !statusLine.toUpperCase(Locale.ROOT).startsWith("POST"); } if (!isHttpResponse && !isHttpRequest) { throw new UnexpectedStartLineIOException("Failed parse of " + diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java index c9a88415..ecc742a5 100644 --- a/src/main/java/org/archive/io/arc/ARCReader.java +++ b/src/main/java/org/archive/io/arc/ARCReader.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; @@ -493,7 +494,7 @@ public static void main(String [] args) break; case 'f': - format = cmdlineOptions[i].getValue().toLowerCase(); + format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT); boolean match = false; // List of supported formats. final String [] supportedFormats = diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index d2f10842..bbcc8b6f 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -27,6 +27,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import java.util.Locale; import java.util.logging.Level; import org.archive.io.ArchiveReader; @@ -230,7 +231,7 @@ public static boolean testCompressedARCFile(File arcFile, throws IOException { boolean compressedARCFile = false; FileUtils.assertReadable(arcFile); - if(!skipSuffixCheck && !arcFile.getName().toLowerCase() + if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT) .endsWith(COMPRESSED_ARC_FILE_EXTENSION)) { return compressedARCFile; } @@ -247,9 +248,9 @@ public static boolean testCompressedARCFile(File arcFile, public static boolean isARCSuffix(final String arcName) { return (arcName == null)? false: - (arcName.toLowerCase().endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))? + (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))? true: - (arcName.toLowerCase().endsWith(DOT_ARC_FILE_EXTENSION))? + (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_ARC_FILE_EXTENSION))? true: false; } @@ -452,4 +453,4 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException { logStdErr(Level.WARNING, message); } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 0815c18a..14e80728 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -376,7 +377,7 @@ private ARCRecordMetaData computeMetaData(List keys, if (keys.size() != values.size()) { // Early ARCs had a space in mimetype. if (values.size() == (keys.size() + 1) && - values.get(4).toLowerCase().startsWith("charset=")) { + values.get(4).toLowerCase(Locale.ROOT).startsWith("charset=")) { List nuvalues = new ArrayList(keys.size()); nuvalues.add(0, values.get(0)); diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java index 5bcb4cc3..05c15abb 100644 --- a/src/main/java/org/archive/io/arc/ARCUtils.java +++ b/src/main/java/org/archive/io/arc/ARCUtils.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.util.Locale; import org.archive.url.UsableURI; import org.archive.util.zip.GzipHeader; @@ -94,7 +95,7 @@ public static boolean testCompressedARCFile(File arcFile, throws IOException { boolean compressedARCFile = false; isReadable(arcFile); - if(!skipSuffixCheck && !arcFile.getName().toLowerCase() + if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT) .endsWith(COMPRESSED_ARC_FILE_EXTENSION)) { return compressedARCFile; } @@ -197,7 +198,7 @@ public static boolean testUncompressedARCFile(File arcFile) throws IOException { boolean uncompressedARCFile = false; isReadable(arcFile); - if(arcFile.getName().toLowerCase().endsWith(ARC_FILE_EXTENSION)) { + if(arcFile.getName().toLowerCase(Locale.ROOT).endsWith(ARC_FILE_EXTENSION)) { FileInputStream fis = new FileInputStream(arcFile); try { byte [] b = new byte[ARC_MAGIC_NUMBER.length()]; diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index d33874a3..02756cb1 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.util.Iterator; import java.util.List; +import java.util.Locale; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -233,7 +234,7 @@ public static void main(String [] args) break; case 'f': - format = cmdlineOptions[i].getValue().toLowerCase(); + format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT); boolean match = false; // List of supported formats. final String [] supportedFormats = @@ -286,4 +287,4 @@ public static void main(String [] args) } } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index 881da869..70b80340 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -26,6 +26,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import java.util.Locale; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveReaderFactory; @@ -307,9 +308,9 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException { public static boolean isWARCSuffix(final String f) { return (f == null)? false: - (f.toLowerCase().endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))? + (f.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))? true: - (f.toLowerCase().endsWith(DOT_WARC_FILE_EXTENSION))? + (f.toLowerCase(Locale.ROOT).endsWith(DOT_WARC_FILE_EXTENSION))? true: false; } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java index e436b8dc..a2a2bfb2 100644 --- a/src/main/java/org/archive/net/PublicSuffixes.java +++ b/src/main/java/org/archive/net/PublicSuffixes.java @@ -31,6 +31,7 @@ import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -231,7 +232,7 @@ protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws // discard utf8 notation after entry line = line.split("\\s+")[0]; // TODO: maybe we don't need to create lower-cased String - line = line.toLowerCase(); + line = line.toLowerCase(Locale.ROOT); // SURT-order domain segments String[] segs = line.split("\\."); StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java index 812a3f0d..b111dc1e 100644 --- a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java +++ b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java @@ -1,6 +1,7 @@ package org.archive.resource.generic; import java.io.IOException; +import java.util.Locale; import org.archive.resource.MetaData; import org.archive.resource.Resource; @@ -45,6 +46,6 @@ public void close() throws IOException { stream.close(); } public String getContext() { - return String.format("Context(%s)(%d)", name, stream.getOffset()); + return String.format(Locale.ROOT, "Context(%s)(%d)", name, stream.getOffset()); } } diff --git a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java index 39611ab8..5267a0f9 100644 --- a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java +++ b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java @@ -1,6 +1,7 @@ package org.archive.resource.gzip; import java.io.IOException; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; @@ -54,6 +55,6 @@ public void close() throws IOException { series.close(); } public String getContext() { - return String.format("Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset()); + return String.format(Locale.ROOT, "Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset()); } } diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index a9c3fcc3..a5e5ac35 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -5,6 +5,7 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Locale; import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; @@ -43,7 +44,7 @@ public WARCResource(MetaData metaData, ResourceContainer container, String name = h.getName(); String value = h.getValue(); fields.putString(name,value); - if(name.toLowerCase().equals("content-length")) { + if(name.toLowerCase(Locale.ROOT).equals("content-length")) { // TODO: catch formatexception length = Long.parseLong(value); } diff --git a/src/main/java/org/archive/streamcontext/HTTP11Stream.java b/src/main/java/org/archive/streamcontext/HTTP11Stream.java index 06f51409..995dc53e 100755 --- a/src/main/java/org/archive/streamcontext/HTTP11Stream.java +++ b/src/main/java/org/archive/streamcontext/HTTP11Stream.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.net.URL; import java.net.URLConnection; +import java.util.Locale; public class HTTP11Stream extends AbstractBufferingStream { private URL url; @@ -42,7 +43,7 @@ public int doRead(byte[] b, int off, int len) throws IOException { public void doSeek(long offset) throws IOException { doClose(); conn = url.openConnection(); - conn.setRequestProperty("Range", String.format("bytes=%d-", offset)); + conn.setRequestProperty("Range", String.format(Locale.ROOT, "bytes=%d-", offset)); conn.connect(); is = conn.getInputStream(); } diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index 37b448c1..632d1ea7 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -7,6 +7,7 @@ import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; import java.util.ArrayList; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -64,7 +65,7 @@ public void canonicalize(HandyURL url) { if (ip != null) { host = ip; } else if (host != null) { - host = escapeOnce(host.toLowerCase()); + host = escapeOnce(host.toLowerCase(Locale.ROOT)); } url.setHost(host); // now the path: @@ -159,7 +160,7 @@ public String attemptIPFormats(String host) { // throws URIException { } ip[i] = octet; } - return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); + return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); } else { Matcher m2 = DECIMAL_IP.matcher(host); if (m2.matches()) { @@ -190,7 +191,7 @@ public String attemptIPFormats(String host) { // throws URIException { } ip[i] = octet; } - return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2], + return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); } @@ -261,7 +262,7 @@ public String escapeOnce(String input) { } sb.append("%"); - String hex = Integer.toHexString(b).toUpperCase(); + String hex = Integer.toHexString(b).toUpperCase(Locale.ROOT); if (hex.length() == 1) { sb.append('0'); } diff --git a/src/main/java/org/archive/url/HandyURL.java b/src/main/java/org/archive/url/HandyURL.java index 91539b3f..0c2c81f7 100644 --- a/src/main/java/org/archive/url/HandyURL.java +++ b/src/main/java/org/archive/url/HandyURL.java @@ -2,6 +2,7 @@ import java.net.MalformedURLException; import java.net.URL; +import java.util.Locale; public class HandyURL { public final static int DEFAULT_PORT = -1; @@ -277,7 +278,7 @@ public void setOpaque(String opaque) { } public String toDebugString() { - return String.format("Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)", + return String.format(Locale.ROOT, "Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)", scheme, authUser, authPass, host, port, path, query, hash); } diff --git a/src/main/java/org/archive/url/IAURLCanonicalizer.java b/src/main/java/org/archive/url/IAURLCanonicalizer.java index 0cf7c8a4..e964cd00 100644 --- a/src/main/java/org/archive/url/IAURLCanonicalizer.java +++ b/src/main/java/org/archive/url/IAURLCanonicalizer.java @@ -2,6 +2,7 @@ import java.util.Arrays; import java.util.Comparator; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,11 +21,11 @@ public void canonicalize(HandyURL url) { } if (rules.isSet(SCHEME_SETTINGS, SCHEME_LOWERCASE)) { if (url.getScheme() != null) { - url.setScheme(url.getScheme().toLowerCase()); + url.setScheme(url.getScheme().toLowerCase(Locale.ROOT)); } } if(rules.isSet(HOST_SETTINGS, HOST_LOWERCASE)) { - url.setHost(url.getHost().toLowerCase()); + url.setHost(url.getHost().toLowerCase(Locale.ROOT)); } if(rules.isSet(HOST_SETTINGS, HOST_MASSAGE)) { url.setHost(massageHost(url.getHost())); @@ -46,7 +47,7 @@ public void canonicalize(HandyURL url) { url.setPath(null); } else { if(rules.isSet(PATH_SETTINGS, PATH_LOWERCASE)) { - path = path.toLowerCase(); + path = path.toLowerCase(Locale.ROOT); } if(rules.isSet(PATH_SETTINGS, PATH_STRIP_SESSION_ID)) { path = URLRegexTransformer.stripPathSessionID(path); @@ -71,7 +72,7 @@ public void canonicalize(HandyURL url) { } // lower-case: if(rules.isSet(QUERY_SETTINGS, QUERY_LOWERCASE)) { - query = query.toLowerCase(); + query = query.toLowerCase(Locale.ROOT); } // re-order? if(rules.isSet(QUERY_SETTINGS, QUERY_ALPHA_REORDER)) { @@ -155,7 +156,7 @@ public static String massageHost(String host) { return host; } public static int getDefaultPort(String scheme) { - String lcScheme = scheme.toLowerCase(); + String lcScheme = scheme.toLowerCase(Locale.ROOT); if(lcScheme.equals("http")) { return 80; } else if(lcScheme.equals("https")) { diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 57071460..4210c303 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.BitSet; +import java.util.Locale; /** * URI subclass which allows partial/inconsistent encoding, matching @@ -321,7 +322,7 @@ protected void parseUriReference(String original, boolean escaped) *

*/ if (at > 0 && at < length && tmp.charAt(at) == ':') { - char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); + char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray(); if (validate(target, scheme)) { _scheme = target; from = ++at; diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java index 374e0574..38219556 100644 --- a/src/main/java/org/archive/url/URI.java +++ b/src/main/java/org/archive/url/URI.java @@ -261,7 +261,7 @@ public URI(String scheme, String schemeSpecificPart, String fragment) if (scheme == null) { throw new URIException(URIException.PARSING, "scheme required"); } - char[] s = scheme.toLowerCase().toCharArray(); + char[] s = scheme.toLowerCase(Locale.ROOT).toCharArray(); if (validate(s, URI.scheme)) { _scheme = s; // is_absoluteURI } else { @@ -1954,7 +1954,7 @@ protected void parseUriReference(String original, boolean escaped) *

*/ if (at > 0 && at < length && tmp.charAt(at) == ':') { - char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); + char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray(); if (validate(target, scheme)) { _scheme = target; } else { diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 5f31c81c..182eb218 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -1,5 +1,6 @@ package org.archive.url; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -27,7 +28,7 @@ public class URLRegexTransformer { public static String stripOpts(String orig, OptimizedPattern op[]) { - String origLC = orig.toLowerCase(); + String origLC = orig.toLowerCase(Locale.ROOT); StringBuilder sb = null; int i = 0; int max = op.length; diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 08f18999..3038ada5 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -23,6 +23,7 @@ import java.io.UnsupportedEncodingException; import java.util.BitSet; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -609,7 +610,7 @@ private String fixupDomainlabel(String label) throw ue; } } - label = label.toLowerCase(); + label = label.toLowerCase(Locale.ROOT); return label; } @@ -755,6 +756,6 @@ private String checkUriElement(String element) { */ private String checkUriElementAndLowerCase(String element) { String tmp = checkUriElement(element); - return (tmp != null)? tmp.toLowerCase(): tmp; + return (tmp != null)? tmp.toLowerCase(Locale.ROOT): tmp; } } diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java index 22ba2787..50307b43 100644 --- a/src/main/java/org/archive/util/ArchiveUtils.java +++ b/src/main/java/org/archive/util/ArchiveUtils.java @@ -900,7 +900,7 @@ private static String loadVersion() { if (line.startsWith("#")) { continue; } - TLDS.add(line.trim().toLowerCase()); + TLDS.add(line.trim().toLowerCase(Locale.ROOT)); } } catch (Exception e) { LOGGER.log(Level.SEVERE,"TLD list unavailable",e); @@ -917,7 +917,7 @@ private static String loadVersion() { * @return boolean true if recognized as TLD */ public static boolean isTld(String dom) { - return TLDS.contains(dom.toLowerCase()); + return TLDS.contains(dom.toLowerCase(Locale.ROOT)); } public static void closeQuietly(Object input) { @@ -981,7 +981,7 @@ public static int readFully(InputStream input, byte[] buf) */ public static BufferedReader getBufferedReader(File source) throws IOException { InputStream is = new BufferedInputStream(new FileInputStream(source)); - boolean isGzipped = source.getName().toLowerCase(). + boolean isGzipped = source.getName().toLowerCase(Locale.ROOT). endsWith(GZIP_SUFFIX); if(isGzipped) { is = new GZIPInputStream(is); diff --git a/src/main/java/org/archive/util/FileNameSpec.java b/src/main/java/org/archive/util/FileNameSpec.java index a3312cfc..7ace8b59 100644 --- a/src/main/java/org/archive/util/FileNameSpec.java +++ b/src/main/java/org/archive/util/FileNameSpec.java @@ -1,5 +1,6 @@ package org.archive.util; +import java.util.Locale; import java.util.concurrent.atomic.AtomicInteger; public class FileNameSpec { @@ -15,7 +16,7 @@ public FileNameSpec(String prefix, String suffix) { public String getNextName() { StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(String.format("%06d",aInt.incrementAndGet())); + sb.append(String.format(Locale.ROOT, "%06d",aInt.incrementAndGet())); sb.append(suffix); return sb.toString(); } diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java index 70b5ffae..6886e08c 100644 --- a/src/main/java/org/archive/util/FileUtils.java +++ b/src/main/java/org/archive/util/FileUtils.java @@ -32,6 +32,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; @@ -219,8 +220,8 @@ protected static void workaroundCopyFile(final File src, FileFilter prefixFilter = new FileFilter() { public boolean accept(File pathname) { - return pathname.getName().toLowerCase(). - startsWith(prefix.toLowerCase()); + return pathname.getName().toLowerCase(Locale.ROOT). + startsWith(prefix.toLowerCase(Locale.ROOT)); } }; return dir.listFiles(prefixFilter); @@ -283,7 +284,7 @@ public static boolean isReadableWithExtensionAndMagic(final File f, throws IOException { boolean result = false; FileUtils.assertReadable(f); - if(f.getName().toLowerCase().endsWith(uncompressedExtension)) { + if(f.getName().toLowerCase(Locale.ROOT).endsWith(uncompressedExtension)) { FileInputStream fis = new FileInputStream(f); try { byte [] b = new byte[magic.length()]; @@ -708,4 +709,4 @@ public static void appendTo(File fileToAppendTo, File fileToAppendFrom) throws I out.flush(); } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index 6a7a53d7..6f9e0117 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -26,6 +26,7 @@ import java.io.OutputStream; import java.nio.charset.Charset; import java.util.HashSet; +import java.util.Locale; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -338,8 +339,8 @@ public void setInputIsChunked(boolean chunked) { * @param contentEncoding declared content-encoding of input recording. */ public void setContentEncoding(String contentEncoding) { - String lowerCoding = contentEncoding.toLowerCase(); - if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase())) { + String lowerCoding = contentEncoding.toLowerCase(Locale.ROOT); + if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase(Locale.ROOT))) { throw new IllegalArgumentException("contentEncoding unsupported: "+contentEncoding); } this.contentEncoding = lowerCoding; diff --git a/src/main/java/org/archive/util/SurtPrefixSet.java b/src/main/java/org/archive/util/SurtPrefixSet.java index 6925cc83..32a34d53 100644 --- a/src/main/java/org/archive/util/SurtPrefixSet.java +++ b/src/main/java/org/archive/util/SurtPrefixSet.java @@ -31,6 +31,7 @@ import java.io.PrintStream; import java.io.Reader; import java.util.Iterator; +import java.util.Locale; import org.archive.url.UsableURI; import org.archive.util.iterator.LineReadingIterator; @@ -70,7 +71,7 @@ public void importFrom(Reader r) { while (iter.hasNext()) { s = (String) iter.next(); - add(s.toLowerCase()); + add(s.toLowerCase(Locale.ROOT)); } } @@ -145,7 +146,7 @@ public boolean considerAsAddDirective(String suri) { } if(u.indexOf("(")>0) { // formal SURT prefix; toLowerCase just in case - add(u.toLowerCase()); + add(u.toLowerCase(Locale.ROOT)); } else { // hostname/normal form URI from which // to deduce SURT prefix diff --git a/src/main/java/org/archive/util/binsearch/SortedTextFile.java b/src/main/java/org/archive/util/binsearch/SortedTextFile.java index ab8118b7..a4326dc0 100644 --- a/src/main/java/org/archive/util/binsearch/SortedTextFile.java +++ b/src/main/java/org/archive/util/binsearch/SortedTextFile.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -142,14 +143,14 @@ public long binaryFindOffset(SeekableLineReader slr, final String key, Comparato if (comparator.compare(key, line) > 0) { if(LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Search(%d) (%s)/(%s) : After", + LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : After", mid * blockSize, key,line)); } min = mid; } else { if(LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Search(%d) (%s)/(%s) : Before", + LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : Before", mid * blockSize, key,line)); } max = mid; @@ -391,7 +392,7 @@ private CloseableIterator search(SeekableLineReader slr, long min = binaryFindOffset(slr, key, comparator); if (LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Aligning(%d)",min)); + LOGGER.fine(String.format(Locale.ROOT, "Aligning(%d)",min)); } slr.seek(min); From 72d8a808e7d61173a435cca7ee5a7ae2b24b61d1 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 18:27:41 +0100 Subject: [PATCH 116/169] Add Locale.ROOT as parameter to all occurrences of PrintStream.format(...) and number formatters --- .../java/org/archive/extract/RealCDXExtractorOutput.java | 6 ++++-- src/main/java/org/archive/extract/ResourceExtractor.java | 6 +++--- .../extract/WARCMetadataRecordExtractorOutput.java | 5 +++-- .../java/org/archive/extract/WATExtractorOutput.java | 2 +- .../archive/format/http/DumpingHTTPParseObserver.java | 5 +++-- .../java/org/archive/io/GenericReplayCharSequence.java | 9 +++++---- src/main/java/org/archive/io/WriterPoolMember.java | 9 ++++++++- .../java/org/archive/resource/html/HTMLMetaData.java | 3 ++- 8 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index b8f06034..ff0b9e83 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -223,7 +223,8 @@ public void output(Resource resource) throws IOException { canUrl = keyMaker.makeKey(origUrl); // URL DATE OURL MIME HTTP-CODE SHA1 META REDIR OFFSET LENGTH FILE if(dumpJSON) { - out.format("%s %s %s %s %s %s %s %s %s %s %s %s\n", + out.format(Locale.ROOT, + "%s %s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, @@ -237,7 +238,8 @@ public void output(Resource resource) throws IOException { filename, m.toString(1)); } else { - out.format("%s %s %s %s %s %s %s %s %s %s %s\n", + out.format(Locale.ROOT, + "%s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, diff --git a/src/main/java/org/archive/extract/ResourceExtractor.java b/src/main/java/org/archive/extract/ResourceExtractor.java index a6fa0a00..dcbfc122 100644 --- a/src/main/java/org/archive/extract/ResourceExtractor.java +++ b/src/main/java/org/archive/extract/ResourceExtractor.java @@ -141,7 +141,7 @@ public int run(String[] args) } catch(GZIPFormatException e) { LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); if(ProducerUtils.STRICT_GZ) { throw e; @@ -150,7 +150,7 @@ public int run(String[] args) } catch(ResourceParseException e) { LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); if(ProducerUtils.STRICT_GZ) { throw e; @@ -160,7 +160,7 @@ public int run(String[] args) // this should not get here - ResourceFactory et al should wrap as ResourceParseExceptions... LOG.severe(String.format(Locale.ROOT, "RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); e.printStackTrace(); diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 68f9d1c8..426acb02 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -7,6 +7,7 @@ import java.net.URISyntaxException; import java.net.URL; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -87,7 +88,7 @@ public void output(Resource resource) throws IOException { String[] linkParts = outLinkValue.split(" "); if(linkParts.length > 2) //'outlinks': 'origUrl date origOutlinkUrl linktype linktext' - out.format("%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]); + out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]); } } else if(outputType.equals("hopinfo")) { String key = obj.get("Name").toString(); @@ -103,7 +104,7 @@ public void output(Resource resource) throws IOException { } if(outputType.equals("hopinfo")) { //'hopinfo': 'origCrawledUrl date origViaUrl hopPathFromVia sourceTag' - out.format("%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag); + out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag); } } } diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index dbe979e5..79cb0870 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -157,7 +157,7 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException { targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI"); } // handle date of generation in WARC format - DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); + DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.ROOT); String capDateString = dateFormat.format(new Date()); String recId = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Record-ID"); writeWARCMDRecord(recOut,md,targetURI,capDateString,recId); diff --git a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java index ed5dfcb2..11cd9276 100755 --- a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java +++ b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java @@ -2,6 +2,7 @@ import java.io.PrintStream; import java.nio.charset.Charset; +import java.util.Locale; public class DumpingHTTPParseObserver implements HttpHeaderObserver { private static final Charset UTF8 = Charset.forName("UTF-8"); @@ -15,13 +16,13 @@ public DumpingHTTPParseObserver(PrintStream ps) { public void headerParsed(byte[] name, int ns, int nl, byte[] value, int vs, int vl) { - ps.format("headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n", + ps.format(Locale.ROOT,"headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n", ns,nl,new String(name,0,nl,UTF8), vs,vl,new String(value,0,vl,UTF8)); } public void headersComplete(int bytesRead) { - ps.format("headersComplete(%d)\n",bytesRead); + ps.format(Locale.ROOT,"headersComplete(%d)\n",bytesRead); } public void headersCorrupt() { ps.println("headersCorrupted\n"); diff --git a/src/main/java/org/archive/io/GenericReplayCharSequence.java b/src/main/java/org/archive/io/GenericReplayCharSequence.java index c427550b..7aacb25a 100644 --- a/src/main/java/org/archive/io/GenericReplayCharSequence.java +++ b/src/main/java/org/archive/io/GenericReplayCharSequence.java @@ -34,6 +34,7 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.text.NumberFormat; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -168,8 +169,8 @@ private void updateMemoryMappedBuffer() { long charLength = (long) this.length() - (long) prefixBuffer.limit(); // in characters long mapSize = Math.min((charLength * bytesPerChar) - mapByteOffset, MAP_MAX_BYTES); logger.fine("updateMemoryMappedBuffer: mapOffset=" - + NumberFormat.getInstance().format(mapByteOffset) - + " mapSize=" + NumberFormat.getInstance().format(mapSize)); + + NumberFormat.getInstance(Locale.ROOT).format(mapByteOffset) + + " mapSize=" + NumberFormat.getInstance(Locale.ROOT).format(mapSize)); try { // TODO: stress-test without these possibly-costly requests! // System.gc(); @@ -255,9 +256,9 @@ protected void decode(InputStream inStream, int prefixMax, this.length = Ints.saturatedCast(count); if(count>Integer.MAX_VALUE) { logger.warning("input stream is longer than Integer.MAX_VALUE=" - + NumberFormat.getInstance().format(Integer.MAX_VALUE) + + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE) + " characters -- only first " - + NumberFormat.getInstance().format(Integer.MAX_VALUE) + + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE) + " are accessible through this GenericReplayCharSequence"); } diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index a488354a..4679ea78 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -26,9 +26,11 @@ import java.io.InputStream; import java.io.OutputStream; import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; @@ -103,12 +105,17 @@ public abstract class WriterPoolMember { */ protected static int roundRobinIndex = 0; + /** + * Symbol set for serial number formatter. + */ + protected static DecimalFormatSymbols serialNoFormatterSymbols = new DecimalFormatSymbols(Locale.ROOT); + /** * NumberFormat instance for formatting serial number. * * Pads serial number with zeros. */ - protected static NumberFormat serialNoFormatter = new DecimalFormat("00000"); + protected static NumberFormat serialNoFormatter = new DecimalFormat("00000", serialNoFormatterSymbols); /** diff --git a/src/main/java/org/archive/resource/html/HTMLMetaData.java b/src/main/java/org/archive/resource/html/HTMLMetaData.java index 024d9677..d995cf65 100644 --- a/src/main/java/org/archive/resource/html/HTMLMetaData.java +++ b/src/main/java/org/archive/resource/html/HTMLMetaData.java @@ -1,6 +1,7 @@ package org.archive.resource.html; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import org.archive.resource.MetaData; @@ -98,7 +99,7 @@ private void appendObj2(JSONObject o, String arr, String... a) { } catch(JSONException e) { try { - System.err.format("GotErr(%s) JSON(%s)(%s)", e.getMessage(), + System.err.format(Locale.ROOT, "GotErr(%s) JSON(%s)(%s)", e.getMessage(), o.toString(1),a.toString()); } catch (JSONException e1) { // TODO Auto-generated catch block From 56941573a8ea7ef729b550581aadc45647f9826f Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 18:34:34 +0100 Subject: [PATCH 117/169] Initialize InputStreamReaders using UTF-8 charset --- src/main/java/org/archive/format/cdx/CDXFile.java | 4 +++- .../record/WARCJSONMetaDataResourceFactory.java | 7 +++---- src/main/java/org/archive/util/ArchiveUtils.java | 14 ++++++++------ src/main/java/org/archive/util/DevUtils.java | 3 ++- src/main/java/org/archive/util/IAUtils.java | 4 +++- src/main/java/org/archive/util/ProcessUtils.java | 4 +++- 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/archive/format/cdx/CDXFile.java b/src/main/java/org/archive/format/cdx/CDXFile.java index 7dca0464..612f7454 100644 --- a/src/main/java/org/archive/format/cdx/CDXFile.java +++ b/src/main/java/org/archive/format/cdx/CDXFile.java @@ -18,6 +18,8 @@ import org.archive.util.iterator.CloseableIterator; import org.archive.util.zip.OpenJDK7GZIPInputStream; +import static java.nio.charset.StandardCharsets.UTF_8; + public class CDXFile extends SortedTextFile implements CDXInputSource { public CDXFile(String uri) throws IOException { @@ -94,7 +96,7 @@ public static BufferedReader createStreamingLineReader(String uri, boolean gzipp input = new OpenJDK7GZIPInputStream(swis); } - BufferedReader reader = new BufferedReader(new InputStreamReader(input)); + BufferedReader reader = new BufferedReader(new InputStreamReader(input, UTF_8)); return reader; } diff --git a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java index 43041efb..8cc8c146 100644 --- a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java +++ b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; import org.archive.resource.MetaData; import org.archive.resource.Resource; @@ -14,9 +13,9 @@ import org.json.JSONException; import org.json.JSONTokener; -public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants { - private static final Charset UTF8 = Charset.forName("UTF-8"); +import static java.nio.charset.StandardCharsets.UTF_8; +public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants { public WARCJSONMetaDataResourceFactory() { } @@ -27,7 +26,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData, MetaData md; try { - md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF8))); + md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF_8))); } catch (JSONException e) { throw new ResourceParseException(e); } diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java index 50307b43..cce411df 100644 --- a/src/main/java/org/archive/util/ArchiveUtils.java +++ b/src/main/java/org/archive/util/ArchiveUtils.java @@ -49,6 +49,8 @@ import org.archive.format.gzip.GZIPDecoder; import org.archive.format.gzip.GZIPFormatException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Miscellaneous useful methods. * @@ -851,7 +853,7 @@ private static String loadVersion() { BufferedReader br = null; String version; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); version = br.readLine(); br.readLine(); } catch (IOException e) { @@ -873,7 +875,7 @@ private static String loadVersion() { br = null; String timestamp; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); timestamp = br.readLine(); } catch (IOException e) { return version; @@ -894,7 +896,7 @@ private static String loadVersion() { TLDS = new HashSet(); InputStream is = ArchiveUtils.class.getResourceAsStream("tlds-alpha-by-domain.txt"); try { - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8)); String line; while((line = reader.readLine())!=null) { if (line.startsWith("#")) { @@ -986,7 +988,7 @@ public static BufferedReader getBufferedReader(File source) throws IOException { if(isGzipped) { is = new GZIPInputStream(is); } - return new BufferedReader(new InputStreamReader(is)); + return new BufferedReader(new InputStreamReader(is, UTF_8)); } /** @@ -1002,8 +1004,8 @@ public static BufferedReader getBufferedReader(URL source) throws IOException { || conn.getContentEncoding() != null && conn.getContentEncoding().equalsIgnoreCase("gzip"); InputStream uis = conn.getInputStream(); return new BufferedReader(isGzipped? - new InputStreamReader(new GZIPInputStream(uis)): - new InputStreamReader(uis)); + new InputStreamReader(new GZIPInputStream(uis), UTF_8): + new InputStreamReader(uis, UTF_8)); } /** diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java index f2a1d044..7ee4b13a 100644 --- a/src/main/java/org/archive/util/DevUtils.java +++ b/src/main/java/org/archive/util/DevUtils.java @@ -25,6 +25,7 @@ import java.io.StringWriter; import java.util.logging.Logger; +import static java.nio.charset.StandardCharsets.UTF_8; /** * Write a message and stack trace to the 'org.archive.util.DevUtils' logger. @@ -92,7 +93,7 @@ public static void sigquitSelf() { Process p = Runtime.getRuntime().exec( new String[] {"perl", "-e", "print getppid(). \"\n\";"}); BufferedReader br = - new BufferedReader(new InputStreamReader(p.getInputStream())); + new BufferedReader(new InputStreamReader(p.getInputStream(), UTF_8)); String ppid = br.readLine(); Runtime.getRuntime().exec( new String[] {"sh", "-c", "kill -3 "+ppid}).waitFor(); diff --git a/src/main/java/org/archive/util/IAUtils.java b/src/main/java/org/archive/util/IAUtils.java index 4597d723..b0c448f0 100644 --- a/src/main/java/org/archive/util/IAUtils.java +++ b/src/main/java/org/archive/util/IAUtils.java @@ -29,6 +29,8 @@ import java.nio.charset.Charset; import java.util.Properties; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Miscellaneous useful methods. * @@ -53,7 +55,7 @@ public static String loadCommonsVersion() { BufferedReader br = null; String version; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); version = br.readLine(); br.readLine(); } catch (IOException e) { diff --git a/src/main/java/org/archive/util/ProcessUtils.java b/src/main/java/org/archive/util/ProcessUtils.java index af792981..0a3eeb67 100644 --- a/src/main/java/org/archive/util/ProcessUtils.java +++ b/src/main/java/org/archive/util/ProcessUtils.java @@ -26,6 +26,8 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Class to run an external process. * @author stack @@ -55,7 +57,7 @@ protected StreamGobbler(InputStream is, String name) { public void run() { try { BufferedReader br = - new BufferedReader(new InputStreamReader(this.is)); + new BufferedReader(new InputStreamReader(this.is, UTF_8)); for (String line = null; (line = br.readLine()) != null;) { this.sink.append(line); } From c013b258be71c1c00b2a016641d60a2fc65195ff Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 19:44:46 +0100 Subject: [PATCH 118/169] Add charset to invocations of String constructor --- .../java/org/archive/format/http/HttpHeaderParser.java | 4 +++- .../archive/format/http/HttpResponseMessageParser.java | 5 +++-- src/main/java/org/archive/io/CompositeFileReader.java | 4 +++- .../java/org/archive/io/HeaderedArchiveRecord.java | 5 +++-- src/main/java/org/archive/io/arc/ARCRecord.java | 4 ++-- src/main/java/org/archive/url/LaxURI.java | 9 ++++++--- src/main/java/org/archive/url/URI.java | 10 +++++++--- src/main/java/org/archive/util/LaxHttpParser.java | 3 ++- 8 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/archive/format/http/HttpHeaderParser.java b/src/main/java/org/archive/format/http/HttpHeaderParser.java index bee3c28b..ddbb6e47 100755 --- a/src/main/java/org/archive/format/http/HttpHeaderParser.java +++ b/src/main/java/org/archive/format/http/HttpHeaderParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class HttpHeaderParser implements HttpConstants { private static final int DEFAULT_MAX_NAME_LENGTH = 1024 * 100; @@ -288,7 +289,8 @@ public ParseState handleByte(byte b, HttpHeaderParser parser) return parser.postColonState; } if(parser.isStrict) { - throw new HttpParseException("Illegal char after name("+new String(name,0,nameLength)+")"); + throw new HttpParseException("Illegal char after name(" + + new String(name, 0, nameLength, StandardCharsets.ISO_8859_1) + ")"); } parser.headersCorrupted(); return parser.laxLineEatParseState; diff --git a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java index 3aee7c48..4ddef2ad 100755 --- a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class HttpResponseMessageParser extends HttpMessageParser { public int maxBytes = 1024 * 128; @@ -97,7 +98,7 @@ public int parseStrict(byte buf[], int len, HttpResponseMessageObserver obs) version = parseVersionStrict(buf, vs, vl); status = parseStatusStrict(buf,ss,sl); - reason = new String(buf,idx+1,(len - idx)-1); + reason = new String(buf,idx+1,(len - idx)-1,StandardCharsets.ISO_8859_1); obs.messageParsed(version, status, reason, len); @@ -155,7 +156,7 @@ private int parseLax(byte buf[], int len, HttpResponseMessageObserver obs) idx++; int reasonLen = bufferEnd - idx; if(reasonLen > 0) { - reason = new String(buf,idx,reasonLen); + reason = new String(buf,idx,reasonLen,StandardCharsets.ISO_8859_1); } } else { // missed some: diff --git a/src/main/java/org/archive/io/CompositeFileReader.java b/src/main/java/org/archive/io/CompositeFileReader.java index 14b56219..6e331565 100644 --- a/src/main/java/org/archive/io/CompositeFileReader.java +++ b/src/main/java/org/archive/io/CompositeFileReader.java @@ -23,6 +23,8 @@ import java.io.InputStreamReader; import java.util.List; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * @author gojomo @@ -34,7 +36,7 @@ public class CompositeFileReader extends InputStreamReader { * @throws IOException */ public CompositeFileReader(List filenames) throws IOException { - super(new CompositeFileInputStream(filenames)); + super(new CompositeFileInputStream(filenames), UTF_8); } } diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index 70c4fb04..a149acac 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; +import java.nio.charset.StandardCharsets; import java.util.Locale; import org.archive.format.http.HttpHeader; @@ -145,7 +146,7 @@ private InputStream readContentHeaders() throws IOException { int eolCharCount = getEolCharsCount(statusBytes); if (eolCharCount <= 0) { throw new IOException("Failed to read raw lie where one " + - " was expected: " + new String(statusBytes)); + " was expected: " + new String(statusBytes, ARCConstants.DEFAULT_ENCODING)); } String statusLine = new String(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); @@ -186,7 +187,7 @@ private InputStream readContentHeaders() throws IOException { eolCharCount = getEolCharsCount(lineBytes); if (eolCharCount <= 0) { throw new IOException("Failed reading headers: " + - ((lineBytes != null)? new String(lineBytes): null)); + ((lineBytes != null)? new String(lineBytes, StandardCharsets.ISO_8859_1): null)); } // Save the bytes read. baos.write(lineBytes); diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 14e80728..c14426a5 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -589,7 +589,7 @@ private InputStream readHttpHeader() throws IOException { if (eolCharCount <= 0) { throw new RecoverableIOException( "Failed to read http status where one was expected: " - + ((statusBytes == null) ? "" : new String(statusBytes))); + + ((statusBytes == null) ? "" : new String(statusBytes, DEFAULT_ENCODING))); } statusLine = new String(statusBytes, 0, @@ -659,7 +659,7 @@ private InputStream readHttpHeader() throws IOException { break; } else { throw new IOException("Failed reading http headers: " + - ((lineBytes != null)? new String(lineBytes): null)); + ((lineBytes != null)? new String(lineBytes, DEFAULT_ENCODING): null)); } } else { httpHeaderBytesRead += lineBytes.length; diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 4210c303..3b27e045 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -19,6 +19,8 @@ package org.archive.url; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.BitSet; @@ -122,9 +124,10 @@ protected static String decode(String component, String charset) byte[] rawdata = null; rawdata = LaxURLCodec.decodeUrlLoose(component.getBytes(StandardCharsets.US_ASCII)); try { - return new String(rawdata, charset); - } catch (UnsupportedEncodingException e) { - return new String(rawdata); + Charset cs = Charset.forName(charset); + return new String(rawdata, cs); + } catch (IllegalCharsetNameException e) { + return new String(rawdata, StandardCharsets.US_ASCII); } } diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java index 38219556..ff53775e 100644 --- a/src/main/java/org/archive/url/URI.java +++ b/src/main/java/org/archive/url/URI.java @@ -34,6 +34,8 @@ import org.apache.commons.codec.net.URLCodec; import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.BitSet; @@ -1780,11 +1782,13 @@ protected static String decode(String component, String charset) throw new URIException(e.getMessage()); } try { - return new String(rawdata, charset); - } catch (UnsupportedEncodingException e) { - return new String(rawdata); + Charset cs = Charset.forName(charset); + return new String(rawdata, cs); + } catch (IllegalCharsetNameException e) { + return new String(rawdata, StandardCharsets.US_ASCII); } } + /** * Pre-validate the unescaped URI string within a specific component. * diff --git a/src/main/java/org/archive/util/LaxHttpParser.java b/src/main/java/org/archive/util/LaxHttpParser.java index 0545fd95..05d2469c 100644 --- a/src/main/java/org/archive/util/LaxHttpParser.java +++ b/src/main/java/org/archive/util/LaxHttpParser.java @@ -36,6 +36,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.logging.Logger; @@ -127,7 +128,7 @@ public static String readLine(InputStream inputStream, String charset) throws IO try { return new String(rawdata, 0, len - offset, charset); } catch (UnsupportedEncodingException e) { - return new String(rawdata, 0, len - offset); + return new String(rawdata, 0, len - offset, StandardCharsets.ISO_8859_1); } } From 88ac2989028ed35d52e0e46076d1322040362de3 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 22:38:54 +0100 Subject: [PATCH 119/169] Initialize PrintStreams using UTF-8 charset, call String.getBytes() with charset. --- .../archive/extract/DumpingExtractorOutput.java | 8 +++++++- .../archive/extract/JSONViewExtractorOutput.java | 8 +++++++- .../format/gzip/zipnum/ZipNumCluster.java | 3 +++ src/main/java/org/archive/io/arc/ARC2WCDX.java | 4 +++- .../java/org/archive/io/warc/WARCWriter.java | 6 ++++-- src/main/java/org/archive/url/URI.java | 4 +++- src/main/java/org/archive/util/SURT.java | 4 +++- .../java/org/archive/util/SurtPrefixSet.java | 6 ++++-- src/main/java/org/archive/util/TextUtils.java | 16 ++++++---------- .../archive/util/binsearch/SortedTextFile.java | 6 ++++-- 10 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java index 69591931..1ccbf771 100644 --- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java +++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.logging.Logger; import org.archive.resource.Resource; @@ -12,13 +13,18 @@ import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; +import static java.nio.charset.StandardCharsets.UTF_8; + public class DumpingExtractorOutput implements ExtractorOutput { private static final Logger LOG = Logger.getLogger(DumpingExtractorOutput.class.getName()); private PrintStream out; public DumpingExtractorOutput(OutputStream out) { - this.out = new PrintStream(out); + try { + this.out = new PrintStream(out, false, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + } } public void output(Resource resource) throws IOException { diff --git a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java index fb6dc847..6cb7c445 100644 --- a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java +++ b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.List; import org.apache.commons.lang3.StringUtils; @@ -10,12 +11,17 @@ import org.archive.resource.Resource; import org.archive.util.StreamCopy; +import static java.nio.charset.StandardCharsets.UTF_8; + public class JSONViewExtractorOutput implements ExtractorOutput { private PrintStream out; private JSONView view; public JSONViewExtractorOutput(OutputStream out, String filterPath) { view = new JSONView(filterPath.split(",")); - this.out = new PrintStream(out); + try { + this.out = new PrintStream(out, false, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + } } public void output(Resource resource) throws IOException { StreamCopy.readToEOF(resource.getInputStream()); diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java index a3d34a4b..edf5857c 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java @@ -13,6 +13,7 @@ import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -35,6 +36,8 @@ import org.archive.util.binsearch.impl.HTTPSeekableLineReader; import org.archive.util.iterator.CloseableIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + public class ZipNumCluster extends ZipNumIndex { final static Logger LOGGER = Logger.getLogger(ZipNumCluster.class.getName()); diff --git a/src/main/java/org/archive/io/arc/ARC2WCDX.java b/src/main/java/org/archive/io/arc/ARC2WCDX.java index f0515694..aec571e9 100644 --- a/src/main/java/org/archive/io/arc/ARC2WCDX.java +++ b/src/main/java/org/archive/io/arc/ARC2WCDX.java @@ -32,6 +32,8 @@ import org.archive.util.ArchiveUtils; import org.archive.util.SURT; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Create a 'Wide' CDX from an ARC. Takes one argument, the path to the ARC. * Writes .wcdx.gz in same directory. @@ -61,7 +63,7 @@ public static Object[] createWcdx(ARCReader reader) { PrintStream writer = null; long count = 0; try { - writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile))); + writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile)), false, UTF_8.name()); // write header: legend + timestamp StringBuilder legend = new StringBuilder(); diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index 5c6a6854..8b571fad 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -45,6 +45,8 @@ import static org.archive.format.warc.WARCConstants.*; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * WARC implementation. @@ -357,12 +359,12 @@ public URI writeWarcinfoRecord(String filename, final String description) byte [] warcinfoBody = null; if (settings.getMetadata() == null) { // TODO: What to write into a warcinfo? What to associate? - warcinfoBody = "TODO: Unimplemented".getBytes(); + warcinfoBody = "TODO: Unimplemented".getBytes(UTF_8); } else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (final Iterator i = settings.getMetadata().iterator(); i.hasNext();) { - baos.write(i.next().toString().getBytes(UTF8Bytes.UTF8)); + baos.write(i.next().toString().getBytes(UTF_8)); } warcinfoBody = baos.toByteArray(); } diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java index ff53775e..b19151cd 100644 --- a/src/main/java/org/archive/url/URI.java +++ b/src/main/java/org/archive/url/URI.java @@ -42,6 +42,8 @@ import java.util.Hashtable; import java.util.Locale; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. * This class has the purpose of supportting of parsing a URI reference to @@ -1696,7 +1698,7 @@ private static byte[] getBytes(String original, String charset) { try { return original.getBytes(charset); } catch (UnsupportedEncodingException e) { - return original.getBytes(); + return original.getBytes(UTF_8); } } diff --git a/src/main/java/org/archive/util/SURT.java b/src/main/java/org/archive/util/SURT.java index 059b2ec6..c52582e1 100644 --- a/src/main/java/org/archive/util/SURT.java +++ b/src/main/java/org/archive/util/SURT.java @@ -32,6 +32,8 @@ import org.archive.url.URIException; import org.archive.url.UsableURIFactory; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Sort-friendly URI Reordering Transform. * @@ -238,7 +240,7 @@ public static void main(String[] args) throws IOException { InputStream in = args.length > 0 ? new BufferedInputStream( new FileInputStream(args[0])) : System.in; PrintStream out = args.length > 1 ? new PrintStream( - new BufferedOutputStream(new FileOutputStream(args[1]))) + new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name()) : System.out; BufferedReader br = new BufferedReader(new InputStreamReader(in)); diff --git a/src/main/java/org/archive/util/SurtPrefixSet.java b/src/main/java/org/archive/util/SurtPrefixSet.java index 32a34d53..b2f0ea4f 100644 --- a/src/main/java/org/archive/util/SurtPrefixSet.java +++ b/src/main/java/org/archive/util/SurtPrefixSet.java @@ -37,6 +37,8 @@ import org.archive.util.iterator.LineReadingIterator; import org.archive.util.iterator.RegexLineIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Specialized TreeSet for keeping a set of String prefixes. * @@ -343,10 +345,10 @@ public static void main(String[] args) throws IOException { InputStream in = args.length > 0 ? new BufferedInputStream( new FileInputStream(args[0])) : System.in; PrintStream out = args.length > 1 ? new PrintStream( - new BufferedOutputStream(new FileOutputStream(args[1]))) + new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name()) : System.out; BufferedReader br = - new BufferedReader(new InputStreamReader(in)); + new BufferedReader(new InputStreamReader(in, UTF_8.name())); String line; while((line = br.readLine())!=null) { if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#")); diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 98b471f8..df3de58b 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -40,6 +40,8 @@ import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; +import static java.nio.charset.StandardCharsets.UTF_8; + public class TextUtils { private static final String FIRSTWORD = "^([^\\s]*).*$"; @@ -279,14 +281,11 @@ public static String exceptionToString(String message, Throwable e) { * @param s String to escape * @return URL-escaped string */ - @SuppressWarnings("deprecation") public static String urlEscape(String s) { try { - return URLEncoder.encode(s,"UTF8"); + return URLEncoder.encode(s, UTF_8.name()); } catch (UnsupportedEncodingException e) { - // should be impossible; all JVMs must support UTF8 - // but have a fallback just in case - return URLEncoder.encode(s); + return s; } } @@ -296,14 +295,11 @@ public static String urlEscape(String s) { * @param s String do unescape * @return URL-unescaped String */ - @SuppressWarnings("deprecation") public static String urlUnescape(String s) { try { - return URLDecoder.decode(s, "UTF8"); + return URLDecoder.decode(s, UTF_8.name()); } catch (UnsupportedEncodingException e) { - // should be impossible; all JVMs must support UTF8 - // but have a fallback just in case - return URLDecoder.decode(s); + return s; } } } \ No newline at end of file diff --git a/src/main/java/org/archive/util/binsearch/SortedTextFile.java b/src/main/java/org/archive/util/binsearch/SortedTextFile.java index a4326dc0..bb4a1f66 100644 --- a/src/main/java/org/archive/util/binsearch/SortedTextFile.java +++ b/src/main/java/org/archive/util/binsearch/SortedTextFile.java @@ -9,6 +9,8 @@ import org.archive.util.GeneralURIStreamFactory; import org.archive.util.iterator.CloseableIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + public class SortedTextFile { public static class NumericComparator implements Comparator @@ -371,7 +373,7 @@ private long searchOffset(SeekableLineReader slr, String prev = null; while(true) { if (line != null) { - offset += line.getBytes().length + 1; + offset += line.getBytes(UTF_8).length + 1; } line = slr.readLine(); if(line == null) break; @@ -380,7 +382,7 @@ private long searchOffset(SeekableLineReader slr, } if (lessThan && prev != null) { - offset -= prev.getBytes().length + 1; + offset -= prev.getBytes(UTF_8).length + 1; } return offset; From c1e4cd323b73715e04f17502f3abcb12a09da89c Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 22:54:57 +0100 Subject: [PATCH 120/169] Replace Charset.forName("utf-8") by StandardCharsets.UTF-8 --- src/main/java/org/archive/extract/ResourceExtractor.java | 5 ++--- src/main/java/org/archive/extract/WATExtractorOutput.java | 7 +++---- src/main/java/org/archive/format/arc/ARCConstants.java | 3 ++- .../java/org/archive/format/gzip/zipnum/ZipNumWriter.java | 6 +++--- .../org/archive/format/http/DumpingHTTPParseObserver.java | 3 +-- src/main/java/org/archive/format/http/HttpConstants.java | 3 ++- src/main/java/org/archive/url/BasicURLCanonicalizer.java | 6 ++---- src/main/java/org/archive/url/SURT.java | 4 ++-- src/main/java/org/archive/util/IAUtils.java | 2 +- .../archive/util/binsearch/AbstractSeekableLineReader.java | 3 ++- 10 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/archive/extract/ResourceExtractor.java b/src/main/java/org/archive/extract/ResourceExtractor.java index dcbfc122..d9b9f396 100644 --- a/src/main/java/org/archive/extract/ResourceExtractor.java +++ b/src/main/java/org/archive/extract/ResourceExtractor.java @@ -7,7 +7,7 @@ import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.net.URISyntaxException; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -27,7 +27,6 @@ public class ResourceExtractor implements ResourceConstants, Tool { private final static Logger LOG = Logger.getLogger(ResourceExtractor.class.getName()); - Charset UTF8 = Charset.forName("utf-8"); public final static String TOOL_NAME = "extractor"; public static final String TOOL_DESCRIPTION = "A tool for extracting metadata from WARC, ARC, and WAT files"; @@ -66,7 +65,7 @@ public static void main(String[] args) throws Exception { private PrintWriter makePrintWriter(OutputStream os) { - return new PrintWriter(new OutputStreamWriter(os, Charset.forName("UTF-8"))); + return new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); } public int run(String[] args) diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index 79cb0870..bb179fd1 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -1,12 +1,10 @@ package org.archive.extract; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.nio.charset.Charset; import java.text.ParseException; import java.net.UnknownHostException; import java.util.Date; @@ -31,13 +29,14 @@ import java.util.logging.Logger; +import static java.nio.charset.StandardCharsets.UTF_8; + public class WATExtractorOutput implements ExtractorOutput { WARCRecordWriter recW; private boolean wroteFirst; private GZIPMemberWriter gzW; private static int DEFAULT_BUFFER_RAM = 1024 * 1024; private int bufferRAM = DEFAULT_BUFFER_RAM; - private final static Charset UTF8 = Charset.forName("UTF-8"); private String outputFile; private static final Logger LOG = Logger.getLogger(WATExtractorOutput.class.getName()); @@ -169,7 +168,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md, ByteArrayOutputStream bos = new ByteArrayOutputStream(); - OutputStreamWriter osw = new OutputStreamWriter(bos, UTF8); + OutputStreamWriter osw = new OutputStreamWriter(bos, UTF_8); try { md.write(osw); } catch (JSONException e1) { diff --git a/src/main/java/org/archive/format/arc/ARCConstants.java b/src/main/java/org/archive/format/arc/ARCConstants.java index 5987b49f..39dbf7ed 100755 --- a/src/main/java/org/archive/format/arc/ARCConstants.java +++ b/src/main/java/org/archive/format/arc/ARCConstants.java @@ -1,6 +1,7 @@ package org.archive.format.arc; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.zip.Deflater; @@ -16,7 +17,7 @@ */ public interface ARCConstants extends ArchiveFileConstants { public final static int MAX_META_LENGTH = 1024 * 32; - public final static Charset ARC_META_CHARSET = Charset.forName("utf-8"); + public final static Charset ARC_META_CHARSET = StandardCharsets.UTF_8; public final static int NEW_LINE_ORD = 10; public static final int CARRIAGE_RETURN_ORD = 13; public final static String DELIMITER = " "; diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java index a104244a..c0e4e01d 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java @@ -3,18 +3,18 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; import org.archive.format.gzip.GZIPMemberWriter; import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream; +import static java.nio.charset.StandardCharsets.UTF_8; + public class ZipNumWriter extends GZIPMemberWriterCommittedOutputStream { int limit; int count; OutputStream manifestOut; ByteArrayOutputStream manifestBuffer; char delimiter = '\t'; - private static final Charset UTF8 = Charset.forName("utf-8"); public ZipNumWriter(OutputStream main, OutputStream manifest, int limit) { super(new GZIPMemberWriter(main)); manifestOut = manifest; @@ -51,7 +51,7 @@ private void finishCurrent() throws IOException { sb.append(delimiter); sb.append(len); sb.append(delimiter); - manifestOut.write(sb.toString().getBytes(UTF8)); + manifestOut.write(sb.toString().getBytes(UTF_8)); manifestBuffer.writeTo(manifestOut); manifestOut.flush(); count = 0; diff --git a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java index 11cd9276..f1ac16c6 100755 --- a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java +++ b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java @@ -1,11 +1,10 @@ package org.archive.format.http; import java.io.PrintStream; -import java.nio.charset.Charset; import java.util.Locale; + public class DumpingHTTPParseObserver implements HttpHeaderObserver { - private static final Charset UTF8 = Charset.forName("UTF-8"); private PrintStream ps = null; public DumpingHTTPParseObserver() { ps = System.out; diff --git a/src/main/java/org/archive/format/http/HttpConstants.java b/src/main/java/org/archive/format/http/HttpConstants.java index fa0a7e10..8ae4d4db 100755 --- a/src/main/java/org/archive/format/http/HttpConstants.java +++ b/src/main/java/org/archive/format/http/HttpConstants.java @@ -1,9 +1,10 @@ package org.archive.format.http; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; public interface HttpConstants { - public static final Charset UTF8 = Charset.forName("UTF-8"); + public static final Charset UTF8 = StandardCharsets.UTF_8; public static final byte CR = 13; public static final byte LF = 10; public static final byte SP = 32; diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index 632d1ea7..dd0d9ac7 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -6,6 +6,7 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Locale; import java.util.regex.Matcher; @@ -204,12 +205,9 @@ public String minimalEscape(String input) { return escapeOnce(unescapeRepeatedly(input)); } - protected static Charset _UTF8 = null; + protected static Charset _UTF8 = StandardCharsets.UTF_8; protected static Charset UTF8() { - if (_UTF8 == null) { - _UTF8 = Charset.forName("UTF-8"); - } return _UTF8; } diff --git a/src/main/java/org/archive/url/SURT.java b/src/main/java/org/archive/url/SURT.java index 3e0bcd55..9598f458 100644 --- a/src/main/java/org/archive/url/SURT.java +++ b/src/main/java/org/archive/url/SURT.java @@ -2,7 +2,7 @@ import java.io.BufferedReader; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.logging.Logger; @@ -33,7 +33,7 @@ public static String toSURT(String input) { } public static void main(String[] args) { String line; - InputStreamReader isr = new InputStreamReader(System.in,Charset.forName("UTF-8")); + InputStreamReader isr = new InputStreamReader(System.in, StandardCharsets.UTF_8); BufferedReader br = new BufferedReader(isr); Iterator i = AbstractPeekableIterator.wrapReader(br); while(i.hasNext()) { diff --git a/src/main/java/org/archive/util/IAUtils.java b/src/main/java/org/archive/util/IAUtils.java index b0c448f0..1d15256e 100644 --- a/src/main/java/org/archive/util/IAUtils.java +++ b/src/main/java/org/archive/util/IAUtils.java @@ -37,7 +37,7 @@ * @author gojomo & others */ public class IAUtils { - public final static Charset UTF8 = Charset.forName("utf-8"); + public final static Charset UTF8 = UTF_8; final public static String COMMONS_VERSION = loadCommonsVersion(); final public static String PUBLISHER = loadCommons("publisher"); diff --git a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java index de57278e..17d411fa 100644 --- a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java @@ -7,13 +7,14 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.ByteStreams; public abstract class AbstractSeekableLineReader implements SeekableLineReader { - public final static Charset UTF8 = Charset.forName("UTF-8"); + public final static Charset UTF8 = StandardCharsets.UTF_8; protected int blockSize = 128 * 1024; From ed0070b7f6486fe48df0c00b03a9385fbd608fe5 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 11 Nov 2025 23:29:57 +0100 Subject: [PATCH 121/169] Replace FileReader and FileWriter using classes allowing to configure the charset. Use default charset for main methods when reading from stdin. --- .../org/archive/format/gzip/zipnum/ZipNumCluster.java | 6 +++--- src/main/java/org/archive/io/ArchiveReader.java | 7 +++++-- src/main/java/org/archive/net/PublicSuffixes.java | 9 ++++++--- src/main/java/org/archive/util/Grep.java | 11 +++++++---- src/main/java/org/archive/util/SURT.java | 3 ++- .../archive/util/binsearch/SeekCDXBenchmarker.java | 3 ++- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java index edf5857c..0a3fa1bf 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java @@ -11,9 +11,9 @@ */ import java.io.BufferedReader; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; -import java.nio.charset.StandardCharsets; +import java.io.InputStreamReader; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -370,7 +370,7 @@ protected void loadLastBlockSizes(String filename) totalAdjustment = 0; try { - reader = new BufferedReader(new FileReader(filename)); + reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), UTF_8)); while ((line = reader.readLine()) != null) { String[] splits = line.split("\t"); diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 53b8167b..070455a5 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -26,9 +26,10 @@ import java.io.EOFException; import java.io.File; import java.io.FileInputStream; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -45,6 +46,8 @@ import static org.archive.format.ArchiveFileConstants.*; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Reader for an Archive file of Archive {@link ArchiveRecord}s. @@ -660,7 +663,7 @@ protected void cdxOutput(boolean toFile) DOT_COMPRESSED_FILE_EXTENSION); cdxFilename = stripExtension(cdxFilename, getDotFileExtension()); cdxFilename += ('.' + CDX); - cdxWriter = new BufferedWriter(new FileWriter(cdxFilename)); + cdxWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(cdxFilename), UTF_8)); } String header = "CDX b e a m s c " + ((isCompressed()) ? "V" : "v") diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java index a2a2bfb2..5b3219d5 100644 --- a/src/main/java/org/archive/net/PublicSuffixes.java +++ b/src/main/java/org/archive/net/PublicSuffixes.java @@ -22,13 +22,14 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.Locale; @@ -38,6 +39,8 @@ import org.apache.commons.io.IOUtils; import org.archive.util.TextUtils; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Utility class for making use of the information about 'public suffixes' at * http://publicsuffix.org. @@ -198,11 +201,11 @@ public static void main(String args[]) throws IOException { BufferedWriter writer; if (args.length >= 2) { // write to specified file - writer = new BufferedWriter(new FileWriter(args[1])); + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), UTF_8)); needsClose = true; } else { // write to stdout - writer = new BufferedWriter(new OutputStreamWriter(System.out)); + writer = new BufferedWriter(new OutputStreamWriter(System.out, Charset.defaultCharset())); } writer.append(regex); writer.flush(); diff --git a/src/main/java/org/archive/util/Grep.java b/src/main/java/org/archive/util/Grep.java index e446e47e..892429bd 100644 --- a/src/main/java/org/archive/util/Grep.java +++ b/src/main/java/org/archive/util/Grep.java @@ -1,10 +1,13 @@ package org.archive.util; +import static java.nio.charset.StandardCharsets.UTF_8; + import java.io.BufferedReader; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; +import java.nio.charset.Charset; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; @@ -119,14 +122,14 @@ protected void doTheGrepThing() throws Exception { if (files != null) { if (files.size() == 1) { - grep(new BufferedReader(new FileReader(files.get(0))), ""); + grep(new BufferedReader(new InputStreamReader(new FileInputStream(files.get(0)), UTF_8)), ""); } else { for (String path : files) { - grep(new BufferedReader(new FileReader(path)), path + ": "); + grep(new BufferedReader(new InputStreamReader(new FileInputStream(path), UTF_8)), path + ": "); } } } else { - grep(new BufferedReader(new InputStreamReader(System.in)), ""); + grep(new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset())), ""); } } diff --git a/src/main/java/org/archive/util/SURT.java b/src/main/java/org/archive/util/SURT.java index c52582e1..99347e9f 100644 --- a/src/main/java/org/archive/util/SURT.java +++ b/src/main/java/org/archive/util/SURT.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintStream; +import java.nio.charset.Charset; import java.util.regex.Matcher; import org.archive.url.URIException; @@ -243,7 +244,7 @@ public static void main(String[] args) throws IOException { new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name()) : System.out; BufferedReader br = - new BufferedReader(new InputStreamReader(in)); + new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())); String line; while((line = br.readLine())!=null) { if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#")); diff --git a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java index 76b7b2b9..45c2ee04 100644 --- a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java +++ b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java @@ -3,6 +3,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.Charset; import org.archive.url.WaybackURLKeyMaker; import org.archive.util.binsearch.impl.MappedSeekableLineReaderFactory; @@ -52,7 +53,7 @@ public static void main(String[] args) throws IOException { SortedTextFile sorted = new SortedTextFile(factory); sorted.setBinsearchBlockSize(blocksize); - BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); + BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset())); WaybackURLKeyMaker keymaker = new WaybackURLKeyMaker(true); From e3c06efb091377fd0474edd8eb18e0e67b80c3b3 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 12:51:15 +0100 Subject: [PATCH 122/169] Unit tests: pass charset to all occurrences of String.getBytes() --- .../archive/io/HeaderedArchiveRecordTest.java | 10 +++-- .../archive/io/RecordingInputStreamTest.java | 8 ++-- .../archive/io/RecordingOutputStreamTest.java | 40 ++++++++++--------- .../archive/io/ReplayCharSequenceTest.java | 6 ++- .../org/archive/io/arc/ARCWriterPoolTest.java | 8 ++-- .../org/archive/io/arc/ARCWriterTest.java | 12 +++--- .../org/archive/io/warc/WARCWriterTest.java | 8 ++-- 7 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java index 005e2c49..65027395 100644 --- a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java +++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java @@ -31,6 +31,8 @@ import org.archive.io.warc.WARCRecord; import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -68,7 +70,7 @@ public void testParseHttpHeadersInWARC() throws IOException { final String hdr = warcHeader + HTTPHEADER + BODY; - WARCRecord r = new WARCRecord(new ByteArrayInputStream(hdr.getBytes()), + WARCRecord r = new WARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), "READER_IDENTIFIER", 0, false, true); HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); @@ -156,7 +158,7 @@ public String getVersion() { } }; - ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes()), + ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), arh, 0, false, true, false); HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); @@ -175,7 +177,7 @@ public void testEasierParseHttpHeadersInARC() throws IOException { + " 192.168.0.1 20070515111004 text/html 167568\n"; final String hdr = arcHeader + HTTPHEADER + BODY; - ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes()), + ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), "READER_IDENTIFIER", 0, false, true, false); HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); @@ -205,7 +207,7 @@ public void testNoheaderWARC() throws IOException { String c = "WARC/0.12\r\nContent-Type: text/plain\r\n" + "Content-Length: " + b.length() + "\r\n\r\n" + b; org.archive.io.warc.WARCRecord r = new org.archive.io.warc.WARCRecord( - new ByteArrayInputStream(c.getBytes()), "READER_IDENTIFIER", 0, + new ByteArrayInputStream(c.getBytes(UTF_8)), "READER_IDENTIFIER", 0, false, true); HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); assertTrue(har.isStrict()); diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java index 49160aa3..8ccee986 100644 --- a/src/test/java/org/archive/io/RecordingInputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -28,6 +28,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -55,7 +57,7 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, RecordingInputStream ris = new RecordingInputStream(16384, (new File( tempDir, "testReadFullyOrUntil").getAbsolutePath())); ByteArrayInputStream bais = new ByteArrayInputStream( - "abcdefghijklmnopqrstuvwxyz".getBytes()); + "abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8)); // test soft max ris.open(bais); ris.setLimits(10,0,0); @@ -87,7 +89,7 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, PipedOutputStream pout = new PipedOutputStream(pin); ris.open(pin); exceptionThrown = false; - trickle("abcdefghijklmnopqrstuvwxyz".getBytes(),pout); + trickle("abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8),pout); int timeout = 200; try { ris.setLimits(0, timeout,0); @@ -133,7 +135,7 @@ public void testAsOutputStream() throws IOException { RecordingInputStream ris = new RecordingInputStream(16384, (new File( tempDir, "testAsOutputStream").getAbsolutePath())); ris.open(null); - ris.asOutputStream().write("hello".getBytes()); + ris.asOutputStream().write("hello".getBytes(UTF_8)); ris.close(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ris.getReplayInputStream().readFullyTo(baos); diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java index c94f8245..0dba910e 100644 --- a/src/test/java/org/archive/io/RecordingOutputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -28,6 +28,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -266,61 +268,61 @@ public void testMessageBodyBegin() throws IOException { ros.setSha1Digest(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n\nabcdefghij".getBytes()); + ros.write("0123456789\n\nabcdefghij".getBytes(UTF_8)); assertEquals(12, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\r\n\r\nabcdefghij".getBytes()); + ros.write("0123456789\r\n\r\nabcdefghij".getBytes(UTF_8)); assertEquals(14, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n\r\nabcdefghij".getBytes()); + ros.write("0123456789\n\r\nabcdefghij".getBytes(UTF_8)); assertEquals(13, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n".getBytes()); + ros.write("0123456789\n".getBytes(UTF_8)); assertEquals(-1, ros.getMessageBodyBegin()); - ros.write("\nabcdefghij".getBytes()); + ros.write("\nabcdefghij".getBytes(UTF_8)); assertEquals(12, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n".getBytes()); + ros.write("0123456789\n".getBytes(UTF_8)); assertEquals(-1, ros.getMessageBodyBegin()); - ros.write("\r\nabcdefghij".getBytes()); + ros.write("\r\nabcdefghij".getBytes(UTF_8)); assertEquals(13, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n\r".getBytes()); + ros.write("0123456789\n\r".getBytes(UTF_8)); assertEquals(-1, ros.getMessageBodyBegin()); - ros.write("\nabcdefghij".getBytes()); + ros.write("\nabcdefghij".getBytes(UTF_8)); assertEquals(13, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789".getBytes()); + ros.write("0123456789".getBytes(UTF_8)); ros.write('\n'); assertEquals(-1, ros.getMessageBodyBegin()); - ros.write("\nabcdefghij".getBytes()); + ros.write("\nabcdefghij".getBytes(UTF_8)); assertEquals(12, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789".getBytes()); + ros.write("0123456789".getBytes(UTF_8)); ros.write('\n'); ros.write('\n'); - for (int b: "abcdefghij".getBytes()) { + for (int b: "abcdefghij".getBytes(UTF_8)) { ros.write(b); } assertEquals(12, ros.getMessageBodyBegin()); @@ -328,11 +330,11 @@ public void testMessageBodyBegin() throws IOException { ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789".getBytes()); + ros.write("0123456789".getBytes(UTF_8)); ros.write('\n'); ros.write('\r'); ros.write('\n'); - for (int b: "abcdefghij".getBytes()) { + for (int b: "abcdefghij".getBytes(UTF_8)) { ros.write(b); } assertEquals(13, ros.getMessageBodyBegin()); @@ -340,17 +342,17 @@ public void testMessageBodyBegin() throws IOException { ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n".getBytes()); + ros.write("0123456789\n".getBytes(UTF_8)); ros.write('\n'); - ros.write("abcdefghij".getBytes()); + ros.write("abcdefghij".getBytes(UTF_8)); assertEquals(12, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); ros.open(new ByteArrayOutputStream()); - ros.write("0123456789\n\r".getBytes()); + ros.write("0123456789\n\r".getBytes(UTF_8)); ros.write('\n'); - ros.write("abcdefghij".getBytes()); + ros.write("abcdefghij".getBytes(UTF_8)); assertEquals(13, ros.getMessageBodyBegin()); assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); ros.close(); diff --git a/src/test/java/org/archive/io/ReplayCharSequenceTest.java b/src/test/java/org/archive/io/ReplayCharSequenceTest.java index 3234259c..f0b688a9 100644 --- a/src/test/java/org/archive/io/ReplayCharSequenceTest.java +++ b/src/test/java/org/archive/io/ReplayCharSequenceTest.java @@ -36,6 +36,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.*; /** @@ -143,7 +145,7 @@ public void testGetReplayCharSequenceMultiByteZeroOffset() @Test public void testReplayCharSequenceByteToString() throws IOException { String fileContent = "Some file content"; - byte [] buffer = fileContent.getBytes(); + byte [] buffer = fileContent.getBytes(UTF_8); RecordingOutputStream ros = writeTestStream( buffer,1, "testReplayCharSequenceByteToString.txt",0); @@ -207,7 +209,7 @@ public void testSingleByteEncodings() throws IOException { @Test public void testReplayCharSequenceByteToStringOverflow() throws IOException { String fileContent = "Some file content. "; // ascii - byte [] buffer = fileContent.getBytes(); + byte [] buffer = fileContent.getBytes(UTF_8); RecordingOutputStream ros = writeTestStream( buffer,1, "testReplayCharSequenceByteToStringOverflow.txt",1); diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java index 954da636..f6820337 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java @@ -30,6 +30,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.archive.format.arc.ARCConstants.*; @@ -51,7 +53,7 @@ public void testARCWriterPool() WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; final String CONTENT = "Any old content"; ByteArrayOutputStream baos = new ByteArrayOutputStream(); - baos.write(CONTENT.getBytes()); + baos.write(CONTENT.getBytes(UTF_8)); for (int i = 0; i < MAX_ACTIVE; i++) { writers[i] = pool.borrowFile(); assertEquals(i + 1, pool.getNumActive(), "Number active"); @@ -81,7 +83,7 @@ public void testInvalidate() throws Exception { WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; final String CONTENT = "Any old content"; ByteArrayOutputStream baos = new ByteArrayOutputStream(); - baos.write(CONTENT.getBytes()); + baos.write(CONTENT.getBytes(UTF_8)); for (int i = 0; i < MAX_ACTIVE; i++) { writers[i] = pool.borrowFile(); assertEquals(i + 1, pool.getNumActive(), "Number active"); @@ -124,4 +126,4 @@ private WriterPoolSettings getSettings(final boolean isCompressed) { Arrays.asList(files), null); } -} \ No newline at end of file +} diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java index ca300697..8b2f7d64 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -47,6 +47,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.*; import static org.archive.format.arc.ARCConstants.*; @@ -122,11 +124,11 @@ protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) // Start the record with an arbitrary 14-digit date per RFC2540 String now = ArchiveUtils.get14DigitDate(); int recordLength = 0; - byte[] record = (getContent(indexStr)).getBytes(); + byte[] record = (getContent(indexStr)).getBytes(UTF_8); recordLength += record.length; baos.write(record); // Add the newline between records back in - baos.write("\n".getBytes()); + baos.write("\n".getBytes(UTF_8)); recordLength += 1; arcWriter.write("http://www.one.net/id=" + indexStr, "text/html", "0.1.2.3", Long.parseLong(now), recordLength, baos); @@ -305,7 +307,7 @@ protected CorruptibleARCWriter createARCWriter(String name, boolean compress) { protected static ByteArrayInputStream getBais(String str) throws IOException { - return new ByteArrayInputStream(str.getBytes()); + return new ByteArrayInputStream(str.getBytes(UTF_8)); } /** @@ -417,7 +419,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict) ByteArrayInputStream bais = getBais(content+"SOME TRAILING BYTES"); writeRecord(writer, SOME_URL, "text/html", content.length(), bais); - writer.setEndJunk("SOME TRAILING BYTES".getBytes()); + writer.setEndJunk("SOME TRAILING BYTES".getBytes(UTF_8)); writeRecord(writer, SOME_URL, "text/html", content.length(), getBais(content)); } finally { @@ -518,7 +520,7 @@ public void testGapError() throws IOException { String content = getContent(); // Make a 'weird' RIS that returns bad 'remaining' length // awhen remaining should be 0 - ReplayInputStream ris = new ReplayInputStream(content.getBytes(), + ReplayInputStream ris = new ReplayInputStream(content.getBytes(UTF_8), content.length(), null) { public long remaining() { return (super.remaining()==0) ? -1 : super.remaining(); diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java index c0ace5f0..d2684fa4 100644 --- a/src/test/java/org/archive/io/warc/WARCWriterTest.java +++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java @@ -42,6 +42,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.*; import static org.archive.format.warc.WARCConstants.*; @@ -228,7 +230,7 @@ protected int writeRandomHTTPRecord(WARCWriter w, int index) String indexStr = Integer.toString(index); recordInfo.setUrl("http://www.one.net/id=" + indexStr); - byte[] record = (getContent(indexStr)).getBytes(); + byte[] record = (getContent(indexStr)).getBytes(UTF_8); recordInfo.setContentLength((long) record.length); ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -385,7 +387,7 @@ protected WARCWriter createWARCWriter(String name, protected static ByteArrayOutputStream getBaos(String str) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); - baos.write(str.getBytes()); + baos.write(str.getBytes(UTF_8)); return baos; } @@ -524,4 +526,4 @@ public void testArcRecordOffsetReads() throws Exception { assertTrue(totalRead > 0); } } -} \ No newline at end of file +} From 6b0f0f29f8193118396d1cd693dc1a086c63d755 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 13:20:24 +0100 Subject: [PATCH 123/169] Unit tests: add Locale.ROOT as parameter to all occurrences of PrintStream.format(...) and number formatters. Unify usage charset constants. --- .../format/gzip/zipnum/ZipNumWriterTest.java | 10 ++++---- .../org/archive/format/json/JSONViewTest.java | 8 ++++--- .../format/text/html/CDATALexerTest.java | 4 +++- .../archive/io/HeaderedArchiveRecordTest.java | 6 ++--- .../archive/io/RecordingInputStreamTest.java | 9 ++++---- .../archive/io/ReplayCharSequenceTest.java | 23 ++++++++++--------- .../io/RepositionableInputStreamTest.java | 4 +++- .../html/ExtractingParseObserverTest.java | 3 ++- .../resource/html/HTMLMetaDataTest.java | 4 +++- .../url/BasicURLCanonicalizerTest.java | 5 ++-- .../java/org/archive/url/URLParserTest.java | 10 +++++--- .../archive/url/URLRegexTransformerTest.java | 4 +++- .../java/org/archive/util/ByteOpTest.java | 5 ++-- .../org/archive/util/CrossProductTest.java | 8 ++++++- src/test/java/org/archive/util/TestUtils.java | 5 ++-- .../util/binsearch/SortedTextFileTest.java | 5 +++- 16 files changed, 72 insertions(+), 41 deletions(-) diff --git a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java index 25a5eaa7..13658bcb 100644 --- a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java +++ b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java @@ -10,7 +10,7 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.StandardCharsets; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; @@ -18,6 +18,8 @@ import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; public class ZipNumWriterTest { @@ -28,16 +30,16 @@ public void testAddRecord() throws IOException { File summ = File.createTempFile("test-znw",".summ"); main.deleteOnExit(); summ.deleteOnExit(); - System.out.format("Summ: %s\n", summ.getAbsolutePath()); + System.out.format(Locale.ROOT, "Summ: %s\n", summ.getAbsolutePath()); int limit = 10; ZipNumWriter znw = new ZipNumWriter(new FileOutputStream(main,false), new FileOutputStream(summ,false), limit); for(int i = 0; i < 1000; i++) { - znw.addRecord(String.format("%06d\n",i).getBytes(StandardCharsets.UTF_8)); + znw.addRecord(String.format(Locale.ROOT,"%06d\n",i).getBytes(UTF_8)); } znw.close(); InputStreamReader isr = - new InputStreamReader(new FileInputStream(summ), StandardCharsets.UTF_8); + new InputStreamReader(new FileInputStream(summ), UTF_8); BufferedReader br = new BufferedReader(isr); String line = null; int count = 0; diff --git a/src/test/java/org/archive/format/json/JSONViewTest.java b/src/test/java/org/archive/format/json/JSONViewTest.java index aabbe7df..6d199025 100644 --- a/src/test/java/org/archive/format/json/JSONViewTest.java +++ b/src/test/java/org/archive/format/json/JSONViewTest.java @@ -1,5 +1,7 @@ package org.archive.format.json; +import java.util.Locale; + import org.archive.util.TestUtils; import org.json.JSONException; import org.json.JSONObject; @@ -17,16 +19,16 @@ public void testBytes() throws JSONException { JSONObject o = new JSONObject(); o.append("name1", "val\\rue1"); String json = o.toString(); - System.out.format("once: (%s)\n",json); + System.out.format(Locale.ROOT, "once: (%s)\n", json); JSONObject o2 = new JSONObject(json); - System.out.format("twice: (%s)\n",o2.toString()); + System.out.format(Locale.ROOT, "twice: (%s)\n", o2.toString()); byte b[] = new byte[2]; for(int i = 0; i < 256; i++) { b[0] = (byte) i; int gi = getInt(b); - System.out.format("I(%d) gi(%d)\n",i,gi); + System.out.format(Locale.ROOT, "I(%d) gi(%d)\n", i, gi); } } diff --git a/src/test/java/org/archive/format/text/html/CDATALexerTest.java b/src/test/java/org/archive/format/text/html/CDATALexerTest.java index 856576ba..7c9f24f3 100644 --- a/src/test/java/org/archive/format/text/html/CDATALexerTest.java +++ b/src/test/java/org/archive/format/text/html/CDATALexerTest.java @@ -10,6 +10,8 @@ import static org.junit.jupiter.api.Assertions.*; +import java.util.Locale; + public class CDATALexerTest { CDATALexer l; Node n; @@ -102,7 +104,7 @@ public void testInJSComment() throws ParserException { } private void assertJSContentWorks(String js) throws ParserException { - String html = String.format("",js); + String html = String.format(Locale.ROOT,"",js); l = makeLexer(html); assertFalse(l.inCSS()); assertFalse(l.inJS()); diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java index 65027395..5d31b890 100644 --- a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java +++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java @@ -78,7 +78,7 @@ public void testParseHttpHeadersInWARC() throws IOException { byte[] b = new byte[BODY.length()]; har.read(b); - String bodyRead = new String(b); + String bodyRead = new String(b, UTF_8); assertEquals(BODY, bodyRead); assertHeaderCorrectlyParsed(har.getContentHeaders()); assertEquals(har.getHeader().getUrl(), url, @@ -165,7 +165,7 @@ public String getVersion() { har.skipHttpHeader(); byte[] b = new byte[BODY.length()]; har.read(b); - String bodyRead = new String(b); + String bodyRead = new String(b, UTF_8); assertEquals(BODY, bodyRead); assertHeaderCorrectlyParsed(har.getContentHeaders()); } @@ -184,7 +184,7 @@ public void testEasierParseHttpHeadersInARC() throws IOException { har.skipHttpHeader(); byte[] b = new byte[BODY.length()]; har.read(b); - String bodyRead = new String(b); + String bodyRead = new String(b, UTF_8); assertEquals(BODY, bodyRead); assertHeaderCorrectlyParsed(har.getContentHeaders()); assertEquals(har.getHeader().getUrl(), url, "failed to retrieve Url from metadata"); diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java index 8ccee986..d794d925 100644 --- a/src/test/java/org/archive/io/RecordingInputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -66,8 +66,9 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, ReplayInputStream res = ris.getReplayInputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); res.readFullyTo(baos); - assertEquals("abcdefg",new String(baos.toByteArray()),"soft max cutoff"); - // test hard max + assertEquals("abcdefg", new String(baos.toByteArray(), UTF_8), + "soft max cutoff"); + // test hard max bais.reset(); baos.reset(); ris.open(bais); @@ -82,8 +83,8 @@ public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, ris.close(); res = ris.getReplayInputStream(); res.readFullyTo(baos); - assertEquals("abcdefghijk",new String(baos.toByteArray()), - "hard max cutoff"); + assertEquals("abcdefghijk", new String(baos.toByteArray(), UTF_8), + "hard max cutoff"); // test timeout PipedInputStream pin = new PipedInputStream(); PipedOutputStream pout = new PipedOutputStream(pin); diff --git a/src/test/java/org/archive/io/ReplayCharSequenceTest.java b/src/test/java/org/archive/io/ReplayCharSequenceTest.java index f0b688a9..3935837b 100644 --- a/src/test/java/org/archive/io/ReplayCharSequenceTest.java +++ b/src/test/java/org/archive/io/ReplayCharSequenceTest.java @@ -25,17 +25,19 @@ import java.nio.charset.StandardCharsets; import java.text.NumberFormat; import java.util.Date; +import java.util.Locale; import java.util.Random; import java.util.logging.Logger; import org.archive.util.FileUtils; -import com.google.common.base.Charsets; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.*; @@ -135,7 +137,7 @@ public void testGetReplayCharSequenceMultiByteZeroOffset() RecordingOutputStream ros = writeTestStream( regularBuffer,MULTIPLIER, "testGetReplayCharSequenceMultiByteZeroOffset",MULTIPLIER); - ReplayCharSequence rcs = getReplayCharSequence(ros,Charsets.UTF_8); + ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8); for (int i = 0; i < MULTIPLIER; i++) { accessingCharacters(rcs); @@ -181,7 +183,7 @@ public void testSingleByteEncodings() throws IOException { String latin1String = new String(bytes, "latin1"); RecordingOutputStream ros = writeTestStream( bytes, 1, "testSingleByteEncodings-latin1.txt", 0); - ReplayCharSequence rcs = getReplayCharSequence(ros,Charsets.ISO_8859_1); + ReplayCharSequence rcs = getReplayCharSequence(ros, ISO_8859_1); String result = rcs.toString(); logger.fine("latin1[0] " + toHexString(latin1String)); logger.fine("latin1[1] " + toHexString(result)); @@ -219,8 +221,8 @@ public void testReplayCharSequenceByteToStringOverflow() throws IOException { // both encodings because they exercise different code paths. UTF-8 is // decoded to UTF-16 while windows-1252 is memory mapped directly. See // GenericReplayCharSequence - ReplayCharSequence rcsUtf8 = getReplayCharSequence(ros,Charsets.UTF_8); - ReplayCharSequence rcs1252 = getReplayCharSequence(ros,Charset.forName("windows-1252")); + ReplayCharSequence rcsUtf8 = getReplayCharSequence(ros, UTF_8); + ReplayCharSequence rcs1252 = getReplayCharSequence(ros, Charset.forName("windows-1252")); String result = rcsUtf8.toString(); assertEquals(expectedContent, result, "Strings don't match"); @@ -244,7 +246,7 @@ public void testReplayCharSequenceByteToStringMulti() throws IOException { buffer,1, "testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1); for (int i = 0; i < 3; i++) { - ReplayCharSequence rcs = getReplayCharSequence(ros,StandardCharsets.UTF_8); + ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8); String result = rcs.toString(); assertEquals(result, expectedResult, "Strings don't match"); rcs.close(); @@ -257,8 +259,7 @@ public void testReplayCharSequenceByteToStringMulti() throws IOException { @Disabled public void xestHugeReplayCharSequence() throws IOException { String fileContent = "01234567890123456789"; - String characterEncoding = "ascii"; - byte[] buffer = fileContent.getBytes(characterEncoding); + byte[] buffer = fileContent.getBytes(US_ASCII); long reps = (long) Integer.MAX_VALUE / (long) buffer.length + 1000000l; @@ -266,7 +267,7 @@ public void xestHugeReplayCharSequence() throws IOException { + " bytes to testHugeReplayCharSequence.txt"); RecordingOutputStream ros = writeTestStream(buffer, 0, "testHugeReplayCharSequence.txt", reps); - ReplayCharSequence rcs = getReplayCharSequence(ros,Charset.forName(characterEncoding)); + ReplayCharSequence rcs = getReplayCharSequence(ros, US_ASCII); if (reps * fileContent.length() > (long) Integer.MAX_VALUE) { assertEquals(Integer.MAX_VALUE, rcs.length(), "ReplayCharSequence has wrong length (length()=" @@ -285,7 +286,7 @@ public void xestHugeReplayCharSequence() throws IOException { // NumberFormat.getInstance().format(index)); assertEquals(fileContent.charAt(index % fileContent.length()), rcs.charAt(index), "Characters don't match (index=" - + NumberFormat.getInstance().format(index) + ")"); + + NumberFormat.getInstance(Locale.ROOT).format(index) + ")"); } // check that out of bounds indices throw exception @@ -309,7 +310,7 @@ public void xestHugeReplayCharSequence() throws IOException { // NumberFormat.getInstance().format(index)); assertEquals(fileContent.charAt(index % fileContent.length()), rcs.charAt(index), "Characters don't match (index=" - + NumberFormat.getInstance().format(index) + ")"); + + NumberFormat.getInstance(Locale.ROOT).format(index) + ")"); } } diff --git a/src/test/java/org/archive/io/RepositionableInputStreamTest.java b/src/test/java/org/archive/io/RepositionableInputStreamTest.java index 228c9042..08143d01 100644 --- a/src/test/java/org/archive/io/RepositionableInputStreamTest.java +++ b/src/test/java/org/archive/io/RepositionableInputStreamTest.java @@ -27,6 +27,8 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; public class RepositionableInputStreamTest { @@ -63,7 +65,7 @@ public void testname() throws Exception { long offset = 0; for (int i = 0; i < 10; i++) { ris.read(bytes, 0, LINE.length()); - assertEquals(LINE, new String(bytes)); + assertEquals(LINE, new String(bytes, UTF_8)); offset += LINE.length(); assertEquals(offset, ris.position()); } diff --git a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java index 157499ff..e34d4e6f 100644 --- a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java +++ b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import org.archive.extract.ExtractingResourceFactoryMapper; @@ -52,7 +53,7 @@ public void testHandleStyleNodeExceptions() throws Exception { TextNode tn = new TextNode(css); epo.handleStyleNode(tn); } catch(Exception e) { - System.err.format("And the winner is....(%s)\n", css); + System.err.format(Locale.ROOT, "And the winner is....(%s)\n", css); e.printStackTrace(); except = true; throw e; diff --git a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java index 3b4193b9..a3c8c1c9 100644 --- a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java +++ b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java @@ -1,5 +1,7 @@ package org.archive.resource.html; +import java.util.Locale; + import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -59,7 +61,7 @@ private void appendStrArr(JSONObject o, String a[][]) throws JSONException { } private void appendStrArr2(JSONObject o, String k, String... a) throws JSONException { - System.out.format("A length(%d)\n", a.length); + System.out.format(Locale.ROOT, "A length(%d)\n", a.length); JSONObject n = new JSONObject(); if((a.length & 1) == 1) { throw new IllegalArgumentException(); diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index 19b1984f..45989416 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -1,6 +1,7 @@ package org.archive.url; import java.net.URISyntaxException; +import java.util.Locale; import org.junit.jupiter.api.Test; @@ -204,12 +205,12 @@ public void testFoo() { String path = "/a/b/c/"; String[] paths = path.split("/",-1); for(String p : paths) { - System.out.format("(%s)",p); + System.out.format(Locale.ROOT, "(%s)", p); } System.out.println(); paths = path.split("/"); for(String p : paths) { - System.out.format("(%s)",p); + System.out.format(Locale.ROOT, "(%s)", p); } System.out.println(); } diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java index bc8fc3a5..c942a260 100644 --- a/src/test/java/org/archive/url/URLParserTest.java +++ b/src/test/java/org/archive/url/URLParserTest.java @@ -3,10 +3,14 @@ import java.io.UnsupportedEncodingException; import java.net.URISyntaxException; import java.net.URLDecoder; +import java.util.Locale; import com.google.common.net.InetAddresses; + import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; public class URLParserTest { @@ -15,7 +19,7 @@ public void testGuava() throws URIException, UnsupportedEncodingException { Long l = Long.parseLong("3279880203"); int i2 = l.intValue(); // int i = Integer.decode("3279880203"); - System.err.format("FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress()); + System.err.format(Locale.ROOT, "FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress()); } @Test @@ -30,7 +34,7 @@ public void testAddDefaultSchemeIfNeeded() { @Test public void testParse() throws UnsupportedEncodingException, URISyntaxException { - System.out.format("O(%s) E(%s)\n","%66",URLDecoder.decode("%66","UTF-8")); + System.out.format(Locale.ROOT, "O(%s) E(%s)\n","%66", URLDecoder.decode("%66", UTF_8.name())); checkParse("http://www.archive.org/index.html#foo", null, "http", null, null, "www.archive.org", -1, "/index.html", null, "foo", "http://www.archive.org/index.html#foo", "/index.html"); @@ -96,7 +100,7 @@ private void checkParse(String s, String opaque, String scheme, String authUser, String authPass, String host, int port, String path, String query, String fragment, String urlString, String pathQuery) throws URISyntaxException { HandyURL h = URLParser.parse(s); - System.out.format("Input:(%s)\nHandyURL\t%s\n",s,h.toDebugString()); + System.out.format(Locale.ROOT, "Input:(%s)\nHandyURL\t%s\n", s, h.toDebugString()); assertEquals(scheme, h.getScheme()); assertEquals(authUser, h.getAuthUser()); assertEquals(authPass, h.getAuthPass()); diff --git a/src/test/java/org/archive/url/URLRegexTransformerTest.java b/src/test/java/org/archive/url/URLRegexTransformerTest.java index 73c43f96..d5c98f6a 100644 --- a/src/test/java/org/archive/url/URLRegexTransformerTest.java +++ b/src/test/java/org/archive/url/URLRegexTransformerTest.java @@ -5,6 +5,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Locale; + public class URLRegexTransformerTest { @Test @@ -49,7 +51,7 @@ public void testStripPathSessionID() { private static void checkStripPathSessionID(String orig, String want) { String got = URLRegexTransformer.stripPathSessionID(orig); - assertEquals(want, got, String.format("FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want)); + assertEquals(want, got, String.format(Locale.ROOT, "FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want)); } // private static final String BASE = "http://www.archive.org/index.html"; diff --git a/src/test/java/org/archive/util/ByteOpTest.java b/src/test/java/org/archive/util/ByteOpTest.java index 49781c36..eb89353e 100644 --- a/src/test/java/org/archive/util/ByteOpTest.java +++ b/src/test/java/org/archive/util/ByteOpTest.java @@ -4,6 +4,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.IOException; +import java.util.Locale; import com.google.common.io.LittleEndianDataOutputStream; @@ -18,10 +19,10 @@ public void testReadShort() throws IOException { byte a[] = new byte[]{0,1,2,3}; ByteArrayInputStream bais = new ByteArrayInputStream(a); int bos = ByteOp.readShort(bais); - System.out.format("BO.Read short(%d)\n", bos); + System.out.format(Locale.ROOT, "BO.Read short(%d)\n", bos); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(a)); int disv = dis.readUnsignedShort(); - System.out.format("DI.Read short(%d)\n", disv); + System.out.format(Locale.ROOT, "DI.Read short(%d)\n", disv); for(int i = 0; i < 256 * 256; i++) { ByteArrayOutputStream baos = new ByteArrayOutputStream(2); LittleEndianDataOutputStream dos = new LittleEndianDataOutputStream(baos); diff --git a/src/test/java/org/archive/util/CrossProductTest.java b/src/test/java/org/archive/util/CrossProductTest.java index 211fa65e..a487ab15 100644 --- a/src/test/java/org/archive/util/CrossProductTest.java +++ b/src/test/java/org/archive/util/CrossProductTest.java @@ -2,10 +2,12 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import org.junit.jupiter.api.Test; public class CrossProductTest { + private void dumpC(List a) { StringBuilder sb = new StringBuilder(); boolean first = false; @@ -19,16 +21,19 @@ private void dumpC(List a) { } System.out.println("Dump:" + sb.toString()); } + private void dumpLOL(List> coc) { for(List co : coc) { dumpC(co); } } + @Test public void testVersion() { String version = IAUtils.loadCommonsVersion(); - System.out.format("Loaded version(%s)\n", version); + System.out.format(Locale.ROOT, "Loaded version(%s)\n", version); } + @Test public void testCrossProduct() { ArrayList> input = new ArrayList>(); @@ -40,6 +45,7 @@ public void testCrossProduct() { List> cross = xp.crossProduct(input); dumpLOL(cross); } + private List AtoL(Object... a) { ArrayList al = new ArrayList(a.length); for(Object s : a) { diff --git a/src/test/java/org/archive/util/TestUtils.java b/src/test/java/org/archive/util/TestUtils.java index 01b0d099..b8fee0f4 100644 --- a/src/test/java/org/archive/util/TestUtils.java +++ b/src/test/java/org/archive/util/TestUtils.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.List; +import java.util.Locale; import com.google.common.io.ByteStreams; @@ -12,9 +13,9 @@ public class TestUtils { public static void dumpMatch(String context, List> res) { - System.out.format("Context(%s) Found (%d) matches\n", context, res.size()); + System.out.format(Locale.ROOT, "Context(%s) Found (%d) matches\n", context, res.size()); for(List r : res) { - System.out.format("Match(%s)\n", StringParse.join(r)); + System.out.format(Locale.ROOT, "Match(%s)\n", StringParse.join(r)); } } diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 5e8889e5..ab8ca627 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -4,6 +4,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; +import java.util.Locale; import org.archive.util.binsearch.impl.RandomAccessFileSeekableLineReaderFactory; import org.archive.util.iterator.CloseableIterator; @@ -13,9 +14,11 @@ import static org.junit.jupiter.api.Assertions.assertFalse; public class SortedTextFileTest { + private static String formatS(int i) { - return String.format("%07d",i); + return String.format(Locale.ROOT, "%07d", i); } + private void createFile(File target, int max) throws FileNotFoundException { PrintWriter pw = new PrintWriter(target); for(int i = 0; i < max; i++) { From 1c116c7305c36fc24f56f916e9cb3fa87723b1f0 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 13:27:03 +0100 Subject: [PATCH 124/169] Replace all occurences of com.google.common.base.Charsets by java.nio.StandardCharsets --- src/main/java/org/archive/io/GenericReplayCharSequence.java | 4 ++-- src/main/java/org/archive/io/ReplayCharSequence.java | 5 ++--- src/main/java/org/archive/url/LaxURLCodec.java | 5 ++--- src/main/java/org/archive/util/Recorder.java | 5 ++--- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/archive/io/GenericReplayCharSequence.java b/src/main/java/org/archive/io/GenericReplayCharSequence.java index 7aacb25a..ff96717c 100644 --- a/src/main/java/org/archive/io/GenericReplayCharSequence.java +++ b/src/main/java/org/archive/io/GenericReplayCharSequence.java @@ -33,6 +33,7 @@ import java.nio.channels.FileChannel; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.NumberFormat; import java.util.Locale; import java.util.logging.Level; @@ -41,7 +42,6 @@ import org.apache.commons.io.IOUtils; import org.archive.util.DevUtils; -import com.google.common.base.Charsets; import com.google.common.primitives.Ints; /** @@ -68,7 +68,7 @@ public class GenericReplayCharSequence implements ReplayCharSequence { * *

See Encoding. */ - public static final Charset WRITE_ENCODING = Charsets.UTF_16BE; + public static final Charset WRITE_ENCODING = StandardCharsets.UTF_16BE; private static final long MAP_MAX_BYTES = 64 * 1024 * 1024; // 64M diff --git a/src/main/java/org/archive/io/ReplayCharSequence.java b/src/main/java/org/archive/io/ReplayCharSequence.java index e456e293..bd74f2f8 100644 --- a/src/main/java/org/archive/io/ReplayCharSequence.java +++ b/src/main/java/org/archive/io/ReplayCharSequence.java @@ -23,8 +23,7 @@ import java.io.IOException; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; - -import com.google.common.base.Charsets; +import java.nio.charset.StandardCharsets; /** @@ -40,7 +39,7 @@ public interface ReplayCharSequence extends CharSequence, Closeable { /** charset to use in replay when declared value * is absent/illegal/unavailable */ - public Charset FALLBACK_CHARSET = Charsets.ISO_8859_1; // TODO: should this be UTF-8? + public Charset FALLBACK_CHARSET = StandardCharsets.ISO_8859_1; // TODO: should this be UTF-8? /** * Call this method when done so implementation has chance to clean up diff --git a/src/main/java/org/archive/url/LaxURLCodec.java b/src/main/java/org/archive/url/LaxURLCodec.java index e27d9de0..92c7cae6 100644 --- a/src/main/java/org/archive/url/LaxURLCodec.java +++ b/src/main/java/org/archive/url/LaxURLCodec.java @@ -20,12 +20,11 @@ import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.BitSet; import org.apache.commons.codec.net.URLCodec; -import com.google.common.base.Charsets; - /** * @author gojomo */ @@ -155,6 +154,6 @@ public String encode(BitSet safe, String pString, String cs) if (pString == null) { return null; } - return new String(encodeUrl(safe,pString.getBytes(cs)), Charsets.US_ASCII); + return new String(encodeUrl(safe,pString.getBytes(cs)), StandardCharsets.US_ASCII); } } diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index 6f9e0117..9f10ec92 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -25,6 +25,7 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Locale; import java.util.Set; @@ -42,8 +43,6 @@ import org.archive.io.ReplayCharSequence; import org.archive.io.ReplayInputStream; -import com.google.common.base.Charsets; - /** * Pairs together a RecordingInputStream and RecordingOutputStream @@ -96,7 +95,7 @@ public class Recorder { * (current behavior is for consistency with our prior but perhaps not * optimal behavior) */ - protected Charset charset = Charsets.UTF_8; + protected Charset charset = StandardCharsets.UTF_8; /** whether recording-input (ris) message-body is chunked */ protected boolean inputIsChunked = false; From 4a12fa43ed02512219b089af3708189afdd2b8a2 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 13:51:40 +0100 Subject: [PATCH 125/169] Unit tests: replace FileReader and FileWriter using classes allowing to configure the charset. Add charset to toString() methods of OutputStreams --- .../org/archive/io/RecordingInputStreamTest.java | 2 +- .../archive/io/RepositionableInputStreamTest.java | 3 ++- .../java/org/archive/io/arc/ARCWriterTest.java | 10 +++++----- src/test/java/org/archive/util/FileUtilsTest.java | 2 -- .../util/binsearch/SortedTextFileTest.java | 7 +++++-- .../util/iterator/FilterStringIteratorTest.java | 1 - .../iterator/SortedCompositeIteratorTest.java | 15 +++++++++------ 7 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java index d794d925..74e92024 100644 --- a/src/test/java/org/archive/io/RecordingInputStreamTest.java +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -140,6 +140,6 @@ public void testAsOutputStream() throws IOException { ris.close(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ris.getReplayInputStream().readFullyTo(baos); - assertEquals("hello", baos.toString()); + assertEquals("hello", baos.toString(UTF_8.name())); } } diff --git a/src/test/java/org/archive/io/RepositionableInputStreamTest.java b/src/test/java/org/archive/io/RepositionableInputStreamTest.java index 08143d01..4aad11b9 100644 --- a/src/test/java/org/archive/io/RepositionableInputStreamTest.java +++ b/src/test/java/org/archive/io/RepositionableInputStreamTest.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; +import java.io.OutputStreamWriter; import java.io.PrintWriter; import org.junit.jupiter.api.BeforeEach; @@ -40,7 +41,7 @@ public class RepositionableInputStreamTest { @BeforeEach protected void setUp() throws Exception { this.testFile = new File(tempDir, this.getClass().getName()); - PrintWriter pw = new PrintWriter(new FileOutputStream(testFile)); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(testFile), UTF_8)); for (int i = 0; i < 100; i++) { pw.print(LINE); } diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java index 8b2f7d64..f6c48462 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -262,7 +262,7 @@ public void testWriteRecordCompressed() throws IOException { } public void testWriteGiantRecord() throws IOException { - PrintStream dummyStream = new PrintStream(new NullOutputStream()); + PrintStream dummyStream = new PrintStream(new NullOutputStream(), false, UTF_8.name()); ARCWriter arcWriter = new ARCWriter( SERIAL_NO, @@ -431,7 +431,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict) PrintStream origErr = System.err; ARCReader r = null; try { - System.setErr(new PrintStream(os)); + System.setErr(new PrintStream(os, false, UTF_8.name())); r = ARCReaderFactory.get(writer.getFile()); r.setStrict(strict); @@ -440,7 +440,7 @@ protected void lengthTooShort(String name, boolean compress, boolean strict) // Make sure we get the warning string which complains about the // trailing bytes. - String err = os.toString(); + String err = os.toString(UTF_8.name()); assertTrue(err.startsWith("WARNING") && (err.indexOf("Record STARTING at") > 0), "No message " + err); r.close(); @@ -496,7 +496,7 @@ protected void lengthTooLong(String name, boolean compress, PrintStream origErr = System.err; ARCReader r = null; try { - System.setErr(new PrintStream(os)); + System.setErr(new PrintStream(os, false, UTF_8.name())); r = ARCReaderFactory.get(writer.getFile()); r.setStrict(strict); @@ -505,7 +505,7 @@ protected void lengthTooLong(String name, boolean compress, // Make sure we get the warning string which complains about the // trailing bytes. - String err = os.toString(); + String err = os.toString(UTF_8.name()); assertTrue(err.startsWith("WARNING Premature EOF before end-of-record"), "No message " + err); } finally { diff --git a/src/test/java/org/archive/util/FileUtilsTest.java b/src/test/java/org/archive/util/FileUtilsTest.java index bd58bd09..51c416f0 100644 --- a/src/test/java/org/archive/util/FileUtilsTest.java +++ b/src/test/java/org/archive/util/FileUtilsTest.java @@ -185,7 +185,6 @@ public void testTailLinesNakedWindows() throws IOException { verifyTailLines(nakedLastLineWindows); } - @SuppressWarnings("unchecked") private void verifyTailLines(File file) throws IOException { List lines = org.apache.commons.io.FileUtils.readLines(file); verifyTailLines(file, lines, 1, 80); @@ -263,7 +262,6 @@ public void testHeadLinesNakedWindows() throws IOException { } - @SuppressWarnings("unchecked") private void verifyHeadLines(File file) throws IOException { List lines = org.apache.commons.io.FileUtils.readLines(file); verifyHeadLines(file, lines, 1, 80); diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index ab8ca627..26d7a16d 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -4,12 +4,15 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; import java.util.Locale; import org.archive.util.binsearch.impl.RandomAccessFileSeekableLineReaderFactory; import org.archive.util.iterator.CloseableIterator; import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -19,8 +22,8 @@ private static String formatS(int i) { return String.format(Locale.ROOT, "%07d", i); } - private void createFile(File target, int max) throws FileNotFoundException { - PrintWriter pw = new PrintWriter(target); + private void createFile(File target, int max) throws FileNotFoundException, UnsupportedEncodingException { + PrintWriter pw = new PrintWriter(target, UTF_8.name()); for(int i = 0; i < max; i++) { pw.println(formatS(i)); } diff --git a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java index 20143289..6d5685ad 100644 --- a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java @@ -5,7 +5,6 @@ import java.util.List; import java.util.TreeSet; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index 98de1416..fa1213f7 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -2,14 +2,17 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.Comparator; import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -21,19 +24,19 @@ public void testHasNext() throws FileNotFoundException, IOException { File a = File.createTempFile("filea", null); File b = File.createTempFile("fileb", null); - PrintWriter apw = new PrintWriter(a); - PrintWriter bpw = new PrintWriter(b); + PrintWriter apw = new PrintWriter(a, UTF_8.name()); + PrintWriter bpw = new PrintWriter(b, UTF_8.name()); apw.println("1"); apw.println("3"); bpw.println("2"); bpw.println("4"); apw.close(); bpw.close(); - BufferedReader abr = new BufferedReader(new FileReader(a)); - BufferedReader bbr = new BufferedReader(new FileReader(b)); + BufferedReader abr = new BufferedReader(new InputStreamReader(new FileInputStream(a), UTF_8)); + BufferedReader bbr = new BufferedReader(new InputStreamReader(new FileInputStream(b), UTF_8)); SortedCompositeIterator sci = new SortedCompositeIterator(new Comparator() { - @Override + @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } From 62341dafc3a15ab200e7e5724bf1b9e1f774ce55 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 13:55:54 +0100 Subject: [PATCH 126/169] Code quality: replace obsolete imports and suppressed warnings --- .../extract/WARCMetadataRecordExtractorOutput.java | 8 -------- src/main/java/org/archive/hadoop/FilenameInputFormat.java | 1 - src/main/java/org/archive/hadoop/PerMapOutputFormat.java | 1 - src/main/java/org/archive/io/HeaderedArchiveRecord.java | 3 --- src/main/java/org/archive/io/arc/ARCReader.java | 1 - src/main/java/org/archive/io/warc/WARCReader.java | 1 - src/main/java/org/archive/io/warc/WARCWriter.java | 1 - src/main/java/org/archive/uid/RecordIDGenerator.java | 1 - src/main/java/org/archive/url/LaxURI.java | 1 - src/main/java/org/archive/util/FileUtils.java | 1 - src/main/java/org/archive/util/IterableLineIterator.java | 1 - src/main/java/org/archive/util/TextUtils.java | 1 - 12 files changed, 21 deletions(-) diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 426acb02..b1050a14 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -3,24 +3,16 @@ import java.io.IOException; import java.io.OutputStream; import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.net.URISyntaxException; -import java.net.URL; import java.util.List; import java.util.Locale; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.archive.format.gzip.GZIPFormatException; -import org.archive.format.json.JSONUtils; import org.archive.format.json.SimpleJSONPathSpec; import org.archive.resource.MetaData; import org.archive.resource.Resource; -import org.archive.util.IAUtils; import org.archive.util.StreamCopy; import org.json.JSONArray; -import org.json.JSONException; import org.json.JSONObject; import com.google.common.io.ByteStreams; diff --git a/src/main/java/org/archive/hadoop/FilenameInputFormat.java b/src/main/java/org/archive/hadoop/FilenameInputFormat.java index 5893afb1..3f41cdee 100644 --- a/src/main/java/org/archive/hadoop/FilenameInputFormat.java +++ b/src/main/java/org/archive/hadoop/FilenameInputFormat.java @@ -17,7 +17,6 @@ package org.archive.hadoop; import java.io.*; -import java.util.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java index 28ebca73..684202bb 100644 --- a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java +++ b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java @@ -17,7 +17,6 @@ package org.archive.hadoop; import java.io.*; -import java.util.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index a149acac..858edb4d 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -150,9 +150,6 @@ private InputStream readContentHeaders() throws IOException { } String statusLine = new String(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); - if (statusLine == null) { - throw new NullPointerException("Expected status line is null"); - } statusLine = statusLine.trim(); // TODO: Tighten up this test. boolean isHttpResponse = statusLine.startsWith("HTTP"); diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java index ecc742a5..f8935e79 100644 --- a/src/main/java/org/archive/io/arc/ARCReader.java +++ b/src/main/java/org/archive/io/arc/ARCReader.java @@ -448,7 +448,6 @@ public static void createCDXIndexFile(String urlOrPath) * @throws IOException * @throws java.text.ParseException */ - @SuppressWarnings("unchecked") public static void main(String [] args) throws ParseException, IOException, java.text.ParseException { Options options = getOptions(); diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index 02756cb1..34583e58 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -199,7 +199,6 @@ public static void main(String [] args) Options options = getOptions(); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); - @SuppressWarnings("unchecked") List cmdlineArgs = cmdline.getArgList(); Option [] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index 8b571fad..65eb3346 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -38,7 +38,6 @@ import org.apache.commons.lang3.StringUtils; import org.archive.format.ArchiveFileConstants; -import org.archive.io.UTF8Bytes; import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; import org.archive.util.anvl.Element; diff --git a/src/main/java/org/archive/uid/RecordIDGenerator.java b/src/main/java/org/archive/uid/RecordIDGenerator.java index 4f16c5ab..80cc5565 100644 --- a/src/main/java/org/archive/uid/RecordIDGenerator.java +++ b/src/main/java/org/archive/uid/RecordIDGenerator.java @@ -19,7 +19,6 @@ package org.archive.uid; import java.net.URI; -import java.net.URISyntaxException; import java.util.Map; /** diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 3b27e045..9b7485c7 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -18,7 +18,6 @@ */ package org.archive.url; -import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java index 6886e08c..271d0212 100644 --- a/src/main/java/org/archive/util/FileUtils.java +++ b/src/main/java/org/archive/util/FileUtils.java @@ -393,7 +393,6 @@ public static boolean moveAsideIfExists(File file) throws IOException { * after the end of the last line returned * @throws IOException */ - @SuppressWarnings("unchecked") public static LongRange pagedLines(File file, long position, int signedDesiredLineCount, List lines, int lineEstimate) throws IOException { diff --git a/src/main/java/org/archive/util/IterableLineIterator.java b/src/main/java/org/archive/util/IterableLineIterator.java index 33efa1fd..c9010031 100644 --- a/src/main/java/org/archive/util/IterableLineIterator.java +++ b/src/main/java/org/archive/util/IterableLineIterator.java @@ -19,7 +19,6 @@ public IterableLineIterator(final Reader reader) super(reader); } - @SuppressWarnings("unchecked") public Iterator iterator() { return this; } diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index df3de58b..627d411a 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -30,7 +30,6 @@ import java.net.URLEncoder; import java.util.HashMap; import java.util.Map; -import java.util.concurrent.ConcurrentMap; import java.util.regex.Matcher; import java.util.regex.Pattern; From 97f7eb0035f921986b9a82441af3a6603c77961d Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 14:14:37 +0100 Subject: [PATCH 127/169] Use StandardCharsets to replace or initialize String constants defining character sets --- .../archive/extract/WATExtractorOutput.java | 2 +- .../format/arc/FiledescRecordParser.java | 3 ++- .../archive/format/dns/DNSResponseParser.java | 3 ++- .../format/text/charset/CharsetDetector.java | 3 ++- .../org/archive/format/warc/WARCConstants.java | 4 +++- src/main/java/org/archive/io/UTF8Bytes.java | 3 ++- .../java/org/archive/io/WriterPoolMember.java | 3 ++- .../java/org/archive/net/PublicSuffixes.java | 18 ++++++------------ .../archive/resource/gzip/GZIPMetaData.java | 8 ++++---- .../resource/html/HTMLResourceFactory.java | 3 ++- src/main/java/org/archive/url/LaxURLCodec.java | 2 +- src/main/java/org/archive/url/URI.java | 2 +- .../org/archive/util/ChunkedInputStream.java | 3 +-- src/main/java/org/archive/util/HMACSigner.java | 6 ++++-- src/main/java/org/archive/util/IAUtils.java | 6 +----- .../java/org/archive/util/LaxHttpParser.java | 4 ++-- .../format/dns/DNSResponseParserTest.java | 4 +++- 17 files changed, 39 insertions(+), 38 deletions(-) diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index bb179fd1..621656b7 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -176,7 +176,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md, throw new IOException(e1); } osw.flush(); -// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes("UTF-8")); +// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes(UTF_8)); Date capDate; try { capDate = DateUtils.getSecondsSinceEpoch(capDateString); diff --git a/src/main/java/org/archive/format/arc/FiledescRecordParser.java b/src/main/java/org/archive/format/arc/FiledescRecordParser.java index c2d7bb65..6a34eb5d 100644 --- a/src/main/java/org/archive/format/arc/FiledescRecordParser.java +++ b/src/main/java/org/archive/format/arc/FiledescRecordParser.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; public class FiledescRecordParser { public boolean strict = false; @@ -12,7 +13,7 @@ public FiledescRecord parse(InputStream is) throws IOException { FiledescRecord rec = new FiledescRecord(); try { // TODO: count input bytes read... - BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line = br.readLine(); parseLine1(rec,line); line = br.readLine(); diff --git a/src/main/java/org/archive/format/dns/DNSResponseParser.java b/src/main/java/org/archive/format/dns/DNSResponseParser.java index b5f81633..3e868ccf 100644 --- a/src/main/java/org/archive/format/dns/DNSResponseParser.java +++ b/src/main/java/org/archive/format/dns/DNSResponseParser.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; public class DNSResponseParser { @@ -28,7 +29,7 @@ public void parse(InputStream is, DNSResponse response) throws IOException, DNSP try { // TODO: should we wrap in a CountingInputStream and indicate // observed octet-length? - BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String date = br.readLine().trim(); if(isDate(date)) { response.setDate(date); diff --git a/src/main/java/org/archive/format/text/charset/CharsetDetector.java b/src/main/java/org/archive/format/text/charset/CharsetDetector.java index 49286764..08aac469 100644 --- a/src/main/java/org/archive/format/text/charset/CharsetDetector.java +++ b/src/main/java/org/archive/format/text/charset/CharsetDetector.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -92,7 +93,7 @@ public abstract class CharsetDetector { // ...and if the chardet library fails, use the Content-Type header protected final static String HTTP_CONTENT_TYPE_HEADER = "CONTENT-TYPE"; /** the default charset name to use when giving up */ - public final static String DEFAULT_CHARSET = "UTF-8"; + public final static String DEFAULT_CHARSET = StandardCharsets.UTF_8.name(); protected boolean isCharsetSupported(String charsetName) { // can you believe that this throws a runtime? Just asking if it's diff --git a/src/main/java/org/archive/format/warc/WARCConstants.java b/src/main/java/org/archive/format/warc/WARCConstants.java index 72dad45a..a6bdb3f4 100644 --- a/src/main/java/org/archive/format/warc/WARCConstants.java +++ b/src/main/java/org/archive/format/warc/WARCConstants.java @@ -19,6 +19,8 @@ package org.archive.format.warc; +import java.nio.charset.StandardCharsets; + import org.archive.format.ArchiveFileConstants; /** @@ -93,7 +95,7 @@ public interface WARCConstants extends ArchiveFileConstants { * till we figure it, DEFAULT_ENCODING is single-byte charset -- same as * ARCs. */ - public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); public static final String HEADER_LINE_ENCODING = DEFAULT_ENCODING; // TODO: Revisit. 8859 isn't correct, especially if we settle on RFC822 diff --git a/src/main/java/org/archive/io/UTF8Bytes.java b/src/main/java/org/archive/io/UTF8Bytes.java index c280b08d..4dc0144b 100644 --- a/src/main/java/org/archive/io/UTF8Bytes.java +++ b/src/main/java/org/archive/io/UTF8Bytes.java @@ -19,6 +19,7 @@ package org.archive.io; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; /** * Marker Interface for instances that can be serialized as UTF8 bytes. @@ -27,7 +28,7 @@ * @version $Date$ $Version$ */ public interface UTF8Bytes { - public static final String UTF8 = "UTF-8"; + public static final String UTF8 = StandardCharsets.UTF_8.name(); /** * @return Instance as UTF-8 bytes. diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index 4679ea78..5d350534 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.NumberFormat; @@ -54,7 +55,7 @@ public abstract class WriterPoolMember { private final Logger logger = Logger.getLogger(this.getClass().getName()); - public static final String UTF8 = "UTF-8"; + public static final String UTF8 = StandardCharsets.UTF_8.name(); /** * Default archival-aggregate filename template. diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java index 5b3219d5..79130332 100644 --- a/src/main/java/org/archive/net/PublicSuffixes.java +++ b/src/main/java/org/archive/net/PublicSuffixes.java @@ -28,7 +28,6 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; @@ -193,7 +192,7 @@ public static void main(String args[]) throws IOException { } else { is = new FileInputStream(args[0]); } - BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8)); String regex = getTopmostAssignedSurtPrefixRegex(reader); IOUtils.closeQuietly(is); @@ -335,16 +334,11 @@ public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { public static synchronized String getTopmostAssignedSurtPrefixRegex() { if (topmostAssignedSurtPrefixRegex == null) { // use bundled list - try { - BufferedReader reader = new BufferedReader(new InputStreamReader( - PublicSuffixes.class.getResourceAsStream( - "/org/archive/effective_tld_names.dat"), "UTF-8")); - topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(reader); - } catch (UnsupportedEncodingException ex) { - // should never happen - throw new RuntimeException(ex); - } + BufferedReader reader = new BufferedReader(new InputStreamReader( + PublicSuffixes.class.getResourceAsStream( + "/org/archive/effective_tld_names.dat"), UTF_8)); + topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(reader); } return topmostAssignedSurtPrefixRegex; } diff --git a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java index 0fc18162..1058b01b 100644 --- a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java +++ b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java @@ -15,6 +15,8 @@ import org.json.JSONException; import org.json.JSONObject; +import static java.nio.charset.StandardCharsets.UTF_8; + public class GZIPMetaData extends MetaData implements ResourceConstants { private static final Logger LOG = Logger.getLogger(GZIPMetaData.class.getName()); @@ -26,7 +28,7 @@ public void setData(GZIPSeriesMember member) { GZIPHeader header = member.getHeader(); GZIPStaticHeader staticH = header.getStaticHeader(); if(staticH.isFNameSet()) { - putString(GZIP_FILENAME,new String(header.getFileName(),"UTF-8")); + putString(GZIP_FILENAME, new String(header.getFileName(), UTF_8)); } if(staticH.isFCommentSet()) { putLong(GZIP_COMMENT_LENGTH,header.getCommentLength()); @@ -39,7 +41,7 @@ public void setData(GZIPSeriesMember member) { for(int i = 0; i < records; i++) { GZIPFExtraRecord rec = header.getRecord(i); JSONObject recJO = new JSONObject(); - String name = new String(rec.getName(),"UTF-8"); + String name = new String(rec.getName(), UTF_8); recJO.put(GZIP_FEXTRA_NAME, name); if(name.equals("SL") || name.equals("LX")) { recJO.put(GZIP_FEXTRA_VALUE, ByteOp.bytesToInt(rec.getValue())); @@ -55,8 +57,6 @@ public void setData(GZIPSeriesMember member) { putLong(GZIP_INFLATED_CRC,footer.getCRC()); putLong(GZIP_INFLATED_LENGTH,footer.getLength()); - } catch (UnsupportedEncodingException e) { - LOG.warning(e.getMessage()); } catch (JSONException e) { LOG.warning(e.getMessage()); } diff --git a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java index 6e95270c..410449a1 100644 --- a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java +++ b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.logging.Logger; import org.archive.format.http.HttpHeaders; @@ -40,7 +41,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData, CDATALexer lex = new CDATALexer(); // guess charset based on HTTP header and sniffed content chunk - String charset = "UTF-8"; + String charset = StandardCharsets.UTF_8.name(); is = new BufferedInputStream(is, CHARSET_GUESS_CHUNK_SIZE); byte[] chunk = new byte[CHARSET_GUESS_CHUNK_SIZE]; is.mark(0); diff --git a/src/main/java/org/archive/url/LaxURLCodec.java b/src/main/java/org/archive/url/LaxURLCodec.java index 92c7cae6..b68a0c19 100644 --- a/src/main/java/org/archive/url/LaxURLCodec.java +++ b/src/main/java/org/archive/url/LaxURLCodec.java @@ -29,7 +29,7 @@ * @author gojomo */ public class LaxURLCodec extends URLCodec { - public static LaxURLCodec DEFAULT = new LaxURLCodec("UTF-8"); + public static LaxURLCodec DEFAULT = new LaxURLCodec(StandardCharsets.UTF_8.name()); // passthrough constructor public LaxURLCodec(String encoding) { diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java index b19151cd..492f7772 100644 --- a/src/main/java/org/archive/url/URI.java +++ b/src/main/java/org/archive/url/URI.java @@ -626,7 +626,7 @@ public URI(URI base, URI relative) throws URIException { /** * The default charset of the protocol. RFC 2277, 2396 */ - protected static String defaultProtocolCharset = "UTF-8"; + protected static String defaultProtocolCharset = UTF_8.name(); /** diff --git a/src/main/java/org/archive/util/ChunkedInputStream.java b/src/main/java/org/archive/util/ChunkedInputStream.java index 69b23047..b6a604c8 100644 --- a/src/main/java/org/archive/util/ChunkedInputStream.java +++ b/src/main/java/org/archive/util/ChunkedInputStream.java @@ -280,8 +280,7 @@ private static int getChunkSizeFromInputStream(final InputStream in) * @throws IOException If an IO problem occurs */ private void parseTrailerHeaders() throws IOException { - String charset = "US-ASCII"; - LaxHttpParser.parseHeaders(in, charset); + LaxHttpParser.parseHeaders(in, StandardCharsets.US_ASCII.name()); } /** diff --git a/src/main/java/org/archive/util/HMACSigner.java b/src/main/java/org/archive/util/HMACSigner.java index d7a5208e..b502b4fb 100644 --- a/src/main/java/org/archive/util/HMACSigner.java +++ b/src/main/java/org/archive/util/HMACSigner.java @@ -1,5 +1,7 @@ package org.archive.util; +import java.nio.charset.StandardCharsets; + /** * Generate an HMAC key given a secret sig, key name and optional id and an expiration time * @@ -63,11 +65,11 @@ public static String hmacDigest(String msg, String keyString, String algo) { String digest = null; try { SecretKeySpec key = new SecretKeySpec( - (keyString).getBytes("UTF-8"), algo); + (keyString).getBytes(StandardCharsets.UTF_8), algo); Mac mac = Mac.getInstance(algo); mac.init(key); - byte[] bytes = mac.doFinal(msg.getBytes("ASCII")); + byte[] bytes = mac.doFinal(msg.getBytes(StandardCharsets.US_ASCII)); StringBuilder hash = new StringBuilder(); diff --git a/src/main/java/org/archive/util/IAUtils.java b/src/main/java/org/archive/util/IAUtils.java index 1d15256e..334a31b4 100644 --- a/src/main/java/org/archive/util/IAUtils.java +++ b/src/main/java/org/archive/util/IAUtils.java @@ -73,11 +73,7 @@ public static String loadCommons(String id) { if (input == null) { return "UNKNOWN"; } - try { - reader = new InputStreamReader(input, "UTF-8"); - } catch (UnsupportedEncodingException e) { - return "UNKNOWN"; - } + reader = new InputStreamReader(input, UTF_8); Properties prop = new Properties(); try { prop.load(reader); diff --git a/src/main/java/org/archive/util/LaxHttpParser.java b/src/main/java/org/archive/util/LaxHttpParser.java index 05d2469c..434522c8 100644 --- a/src/main/java/org/archive/util/LaxHttpParser.java +++ b/src/main/java/org/archive/util/LaxHttpParser.java @@ -148,7 +148,7 @@ public static String readLine(InputStream inputStream, String charset) throws IO public static String readLine(InputStream inputStream) throws IOException { LOG.finest("enter LaxHttpParser.readLine(InputStream)"); - return readLine(inputStream, "US-ASCII"); + return readLine(inputStream, StandardCharsets.US_ASCII.name()); } /** @@ -238,6 +238,6 @@ public static HttpHeader[] parseHeaders(InputStream is, String charset) throws I */ public static HttpHeader[] parseHeaders(InputStream is) throws IOException { LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)"); - return parseHeaders(is, "US-ASCII"); + return parseHeaders(is, StandardCharsets.US_ASCII.name()); } } diff --git a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java index 7ade0ad5..73e1fda8 100644 --- a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java +++ b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java @@ -5,6 +5,8 @@ import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.UTF_8; + import static org.junit.jupiter.api.Assertions.assertEquals; public class DNSResponseParserTest { @@ -20,7 +22,7 @@ public void testParse() throws DNSParseException, IOException { } private void verifyResults(String res, String date, String d[][]) throws DNSParseException, IOException { ByteArrayInputStream is = - new ByteArrayInputStream(res.getBytes("UTF-8")); + new ByteArrayInputStream(res.getBytes(UTF_8)); DNSResponse response = new DNSResponse(); parser.parse(is, response); verifyResults(response,date,d); From 3f58e5a8161892aedac0dc2435a998dd71eb1a85 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Wed, 12 Nov 2025 14:17:48 +0100 Subject: [PATCH 128/169] Github workflow: call `mvn verify` to run package build and forbiddenAPIs checks --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 2421cef3..bb63cd56 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,4 +34,4 @@ jobs: restore-keys: | ${{ runner.os }}-maven- - name: Build with Maven - run: mvn -B package --file pom.xml + run: mvn -B verify --file pom.xml From 44ec22772a24d5dc916b0b0730625988e20cc865 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 13 Nov 2025 21:13:58 +0100 Subject: [PATCH 129/169] BasicURLCanonicalizer: more efficient normalization of dots in host name Add unit test and prevent from StringIndexOutOfBoundsException. --- .../java/org/archive/url/BasicURLCanonicalizer.java | 4 ++-- .../org/archive/url/BasicURLCanonicalizerTest.java | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index fe2e0d42..f2cee60f 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -88,11 +88,11 @@ private String normalizeDots(String host) { } int start = 0, end = host.length(); boolean changed = false; - while (host.charAt(start) == '.') { + while (start < end && host.charAt(start) == '.') { start++; changed = true; } - while (host.charAt(end - 1) == '.') { + while (end > start && host.charAt(end - 1) == '.') { end--; changed = true; } diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index 19b1984f..0ab1e3b9 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -286,6 +286,16 @@ public void testUnicodeEscaping() throws URISyntaxException { checkCanonicalization("http://example.org/%F0%9F%82%A1", "http://example.org/%F0%9F%82%A1"); } + @Test + public void testHostDots() throws URISyntaxException { + checkCanonicalization("https://foobar.org./", "https://foobar.org/"); + checkCanonicalization("https://.foobar.org/", "https://foobar.org/"); + checkCanonicalization("https://foo...bar.org/", "https://foo.bar.org/"); + checkCanonicalization("https://...foo...bar.org.../", "https://foo.bar.org/"); + checkCanonicalization("https://localhost/path/file.txt", "https://localhost/path/file.txt"); + checkCanonicalization("https://....../path/file.txt", "https:///path/file.txt"); + } + private void checkCanonicalization(String in, String want) throws URISyntaxException { HandyURL h = URLParser.parse(in); guc.canonicalize(h); From ad6e62d9c233c71798c39e3626396bf58a97271d Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Thu, 13 Nov 2025 21:25:50 +0100 Subject: [PATCH 130/169] Fix forbiddenAPIs check for Java 8 - explicetly pass Java version - ignore signatures of missing classes --- pom.xml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c1c17e9b..3dca19e1 100644 --- a/pom.xml +++ b/pom.xml @@ -48,6 +48,7 @@ UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss + 8 @@ -164,8 +165,8 @@ maven-compiler-plugin 3.14.1 - 8 - 8 + ${java.version} + ${java.version} @@ -178,6 +179,8 @@ forbiddenapis 3.10 + ${java.version} + true false From e1e4d749841540fb778ce6e5bd33908a86883353 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:55:35 +0000 Subject: [PATCH 131/169] Bump commons-io:commons-io from 2.20.0 to 2.21.0 Bumps [commons-io:commons-io](https://github.com/apache/commons-io) from 2.20.0 to 2.21.0. - [Changelog](https://github.com/apache/commons-io/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-io/compare/rel/commons-io-2.20.0...rel/commons-io-2.21.0) --- updated-dependencies: - dependency-name: commons-io:commons-io dependency-version: 2.21.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3dca19e1..939ed535 100644 --- a/pom.xml +++ b/pom.xml @@ -142,7 +142,7 @@ commons-io commons-io - 2.20.0 + 2.21.0 From c43c85e7718cc78c802ef2edf5e5af9c5be3b8d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:55:42 +0000 Subject: [PATCH 132/169] Bump org.junit.jupiter:junit-jupiter from 5.13.3 to 5.14.1 Bumps [org.junit.jupiter:junit-jupiter](https://github.com/junit-team/junit-framework) from 5.13.3 to 5.14.1. - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r5.13.3...r5.14.1) --- updated-dependencies: - dependency-name: org.junit.jupiter:junit-jupiter dependency-version: 5.14.1 dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 939ed535..b86aea71 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ org.junit.jupiter junit-jupiter - 5.13.3 + 5.14.1 test From 3123d2eced7dd70963dd57345acd49c65d676478 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:55:45 +0000 Subject: [PATCH 133/169] Bump commons-cli:commons-cli from 1.10.0 to 1.11.0 Bumps [commons-cli:commons-cli](https://github.com/apache/commons-cli) from 1.10.0 to 1.11.0. - [Changelog](https://github.com/apache/commons-cli/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-cli/compare/rel/commons-cli-1.10.0...rel/commons-cli-1.11.0) --- updated-dependencies: - dependency-name: commons-cli:commons-cli dependency-version: 1.11.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b86aea71..cafb13c9 100644 --- a/pom.xml +++ b/pom.xml @@ -91,7 +91,7 @@ commons-cli commons-cli - 1.10.0 + 1.11.0 From f8d711c445ee7a1fc6187432435715df86fe137b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:55:52 +0000 Subject: [PATCH 134/169] Bump commons-codec:commons-codec from 1.19.0 to 1.20.0 Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.19.0 to 1.20.0. - [Changelog](https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-codec/compare/rel/commons-codec-1.19.0...rel/commons-codec-1.20.0) --- updated-dependencies: - dependency-name: commons-codec:commons-codec dependency-version: 1.20.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index cafb13c9..9965d195 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,7 @@ commons-codec commons-codec - 1.19.0 + 1.20.0 From 1da570d56d31b2cd354e2f72f8552f1cd1190de6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:56:00 +0000 Subject: [PATCH 135/169] Bump org.apache.maven.plugins:maven-release-plugin from 3.1.1 to 3.2.0 Bumps [org.apache.maven.plugins:maven-release-plugin](https://github.com/apache/maven-release) from 3.1.1 to 3.2.0. - [Release notes](https://github.com/apache/maven-release/releases) - [Commits](https://github.com/apache/maven-release/compare/maven-release-3.1.1...maven-release-3.2.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-release-plugin dependency-version: 3.2.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9965d195..27dfeb0a 100644 --- a/pom.xml +++ b/pom.xml @@ -229,7 +229,7 @@ org.apache.maven.plugins maven-release-plugin - 3.1.1 + 3.2.0 org.apache.maven.plugins From cba305ae49e77536abb26d89edbaf3aa7115196c Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 14 Nov 2025 10:30:36 +0900 Subject: [PATCH 136/169] Update CHANGES.md for 3.0.2 release --- CHANGES.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 21b30eb1..0db08eb5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,22 @@ Unreleased ---------- +3.0.2 (2025-11-14) +------------------ + +### Fixes + +- Avoid relying on the default locale or charset. [#128](https://github.com/iipc/webarchive-commons/pull/128) +- BasicURLCanonicalizer: more efficient normalization of dots in host names. [#129](https://github.com/iipc/webarchive-commons/pull/129) + +### Dependency upgrades + +* **commons-cli**: 1.10.0 → 1.11.0 +* **commons-codec**: 1.19.0 → 1.20.0 +* **commons-io**: 2.20.0 → 2.21.0 +* **junit-jupiter**: 5.13.3 → 5.14.1 +* **maven-release-plugin**: 3.1.1 → 3.2.0 + 3.0.1 (2025-10-27) ------------------ From 2d8afa3d2123c40d6d4f9fdf225c36e8bf8309d1 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 14 Nov 2025 10:31:43 +0900 Subject: [PATCH 137/169] [maven-release-plugin] prepare release webarchive-commons-3.0.2 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 27dfeb0a..34e00d09 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.2-SNAPSHOT + 3.0.2 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-3.0.2 From 71b88c3e28e8a74e33988f41324fc124b14aa8d4 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 14 Nov 2025 10:31:48 +0900 Subject: [PATCH 138/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 34e00d09..51ae1fca 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.2 + 3.0.3-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-3.0.2 + webarchive-commons-2.0.0 From 7d359c5b279cf690e7fee5994a6ee9d2b6c05cb1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 19:07:15 +0000 Subject: [PATCH 139/169] Bump org.apache.commons:commons-lang3 from 3.19.0 to 3.20.0 Bumps org.apache.commons:commons-lang3 from 3.19.0 to 3.20.0. --- updated-dependencies: - dependency-name: org.apache.commons:commons-lang3 dependency-version: 3.20.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 51ae1fca..c88cbbf9 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ org.apache.commons commons-lang3 - 3.19.0 + 3.20.0 From c7bdf2a2f52eb12eeb284ee17c8afd460370d9dd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 19:07:10 +0000 Subject: [PATCH 140/169] Bump org.apache.maven.plugins:maven-source-plugin from 3.3.1 to 3.4.0 Bumps [org.apache.maven.plugins:maven-source-plugin](https://github.com/apache/maven-source-plugin) from 3.3.1 to 3.4.0. - [Release notes](https://github.com/apache/maven-source-plugin/releases) - [Commits](https://github.com/apache/maven-source-plugin/compare/maven-source-plugin-3.3.1...maven-source-plugin-3.4.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-source-plugin dependency-version: 3.4.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c88cbbf9..052abd24 100644 --- a/pom.xml +++ b/pom.xml @@ -234,7 +234,7 @@ org.apache.maven.plugins maven-source-plugin - 3.3.1 + 3.4.0 attach-sources From 5b91dbb0e89db9b952a10d2207a4978209e10dfe Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jan 2026 17:00:54 +0000 Subject: [PATCH 141/169] Bump org.json:json from 20250517 to 20251224 Bumps [org.json:json](https://github.com/douglascrockford/JSON-java) from 20250517 to 20251224. - [Release notes](https://github.com/douglascrockford/JSON-java/releases) - [Changelog](https://github.com/stleary/JSON-java/blob/master/docs/RELEASES.md) - [Commits](https://github.com/douglascrockford/JSON-java/commits) --- updated-dependencies: - dependency-name: org.json:json dependency-version: '20251224' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 052abd24..913690e4 100644 --- a/pom.xml +++ b/pom.xml @@ -68,7 +68,7 @@ org.json json - 20250517 + 20251224 org.htmlparser From 60b38fd3fc01004d84b5b479613595f71243b630 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Jan 2026 17:00:49 +0000 Subject: [PATCH 142/169] Bump org.apache.maven.plugins:maven-release-plugin from 3.2.0 to 3.3.1 Bumps [org.apache.maven.plugins:maven-release-plugin](https://github.com/apache/maven-release) from 3.2.0 to 3.3.1. - [Release notes](https://github.com/apache/maven-release/releases) - [Commits](https://github.com/apache/maven-release/compare/maven-release-3.2.0...maven-release-3.3.1) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-release-plugin dependency-version: 3.3.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 913690e4..dec9c6e8 100644 --- a/pom.xml +++ b/pom.xml @@ -229,7 +229,7 @@ org.apache.maven.plugins maven-release-plugin - 3.2.0 + 3.3.1 org.apache.maven.plugins From d9331f7555e9a781d72f3e0cdde30df340d96f13 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:02:13 +0000 Subject: [PATCH 143/169] Bump org.sonatype.central:central-publishing-maven-plugin Bumps [org.sonatype.central:central-publishing-maven-plugin](https://github.com/sonatype/central-publishing-maven-plugin) from 0.9.0 to 0.10.0. - [Commits](https://github.com/sonatype/central-publishing-maven-plugin/commits) --- updated-dependencies: - dependency-name: org.sonatype.central:central-publishing-maven-plugin dependency-version: 0.10.0 dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index dec9c6e8..000e0ab3 100644 --- a/pom.xml +++ b/pom.xml @@ -220,7 +220,7 @@ org.sonatype.central central-publishing-maven-plugin - 0.9.0 + 0.10.0 true central From 7af54a1dfd41e891c00f065b0b420eb9fc0e00d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Nov 2025 00:55:55 +0000 Subject: [PATCH 144/169] Bump org.apache.maven.plugins:maven-surefire-plugin from 3.2.5 to 3.5.4 Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.2.5 to 3.5.4. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.2.5...surefire-3.5.4) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-version: 3.5.4 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 000e0ab3..3e8bdbe8 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.2.5 + 3.5.4 de.thetaphi From 3ff84c50982593b05cc4b1eddd412bc80068fef9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:02:21 +0000 Subject: [PATCH 145/169] Bump org.junit.jupiter:junit-jupiter from 5.14.1 to 5.14.2 Bumps [org.junit.jupiter:junit-jupiter](https://github.com/junit-team/junit-framework) from 5.14.1 to 5.14.2. - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r5.14.1...r5.14.2) --- updated-dependencies: - dependency-name: org.junit.jupiter:junit-jupiter dependency-version: 5.14.2 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3e8bdbe8..9cf5474c 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ org.junit.jupiter junit-jupiter - 5.14.1 + 5.14.2 test From 8b1ddde87ee6a3d2c4122bb5ee6b0cbde332eb67 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Feb 2026 17:02:24 +0000 Subject: [PATCH 146/169] Bump commons-codec:commons-codec from 1.20.0 to 1.21.0 Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.20.0 to 1.21.0. - [Changelog](https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-codec/compare/rel/commons-codec-1.20.0...rel/commons-codec-1.21.0) --- updated-dependencies: - dependency-name: commons-codec:commons-codec dependency-version: 1.21.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9cf5474c..0874e9b7 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,7 @@ commons-codec commons-codec - 1.20.0 + 1.21.0 From 89e9c2880c1d39145351fcedc9760b6fdcc497de Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 6 Feb 2026 23:01:47 +0900 Subject: [PATCH 147/169] Update CHANGES.md for 3.0.3 release --- CHANGES.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 0db08eb5..e7f36e57 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,16 @@ Unreleased ---------- +3.0.3 (2025-02-06) +------------------ + +### Dependency upgrades + +* **commons-codec**: 1.20.0 → 1.21.0 +* **commons-lang3**: 3.19.0 → 3.20.0 +* **json**: 20250517 → 20251224 +* **junit-jupiter**: 5.14.1 → 5.14.2 + 3.0.2 (2025-11-14) ------------------ From 359837f9f22654b642a4a30def1ef43b59addb59 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 6 Feb 2026 23:02:52 +0900 Subject: [PATCH 148/169] [maven-release-plugin] prepare release webarchive-commons-3.0.3 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 0874e9b7..68b3a83a 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.3-SNAPSHOT + 3.0.3 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-3.0.3 From 48c4573b2878292aecb1bfa88ae3bb53eff162f6 Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 6 Feb 2026 23:02:58 +0900 Subject: [PATCH 149/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 68b3a83a..9a05c9e7 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.3 + 3.0.4-SNAPSHOT jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-3.0.3 + webarchive-commons-2.0.0 From c344ebd2d222b4d1ceb7c2a92445fca6efc9c51b Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Fri, 6 Feb 2026 23:06:32 +0900 Subject: [PATCH 150/169] Enable auto-publishing in Maven configuration --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 9a05c9e7..8eed1059 100644 --- a/pom.xml +++ b/pom.xml @@ -224,6 +224,7 @@ true central + true From 870b9a1d3d4b4b797ae899eb8616d22cb6bf01e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:02:12 +0000 Subject: [PATCH 151/169] Bump org.apache.hadoop:hadoop-mapreduce-client-core from 3.4.2 to 3.4.3 Bumps org.apache.hadoop:hadoop-mapreduce-client-core from 3.4.2 to 3.4.3. --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-mapreduce-client-core dependency-version: 3.4.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8eed1059..19ec2241 100644 --- a/pom.xml +++ b/pom.xml @@ -110,7 +110,7 @@ org.apache.hadoop hadoop-mapreduce-client-core - 3.4.2 + 3.4.3 true From e747c3f33a394b0772e18f1c0f528565c2902290 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:02:15 +0000 Subject: [PATCH 152/169] Bump org.apache.hadoop:hadoop-common from 3.4.2 to 3.4.3 Bumps org.apache.hadoop:hadoop-common from 3.4.2 to 3.4.3. --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-common dependency-version: 3.4.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 19ec2241..6da7e42d 100644 --- a/pom.xml +++ b/pom.xml @@ -97,7 +97,7 @@ org.apache.hadoop hadoop-common - 3.4.2 + 3.4.3 true From afc2cf443723a80ad6fa1a693766e45523d3d15c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:02:21 +0000 Subject: [PATCH 153/169] Bump org.apache.maven.plugins:maven-surefire-plugin from 3.5.4 to 3.5.5 Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.5.4 to 3.5.5. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.5.4...surefire-3.5.5) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-version: 3.5.5 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6da7e42d..55d0601e 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.5.4 + 3.5.5 de.thetaphi From cc017a4b729d07d54d082a8b68bd7152e02884bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:02:30 +0000 Subject: [PATCH 154/169] Bump org.junit.jupiter:junit-jupiter from 5.14.2 to 5.14.3 Bumps [org.junit.jupiter:junit-jupiter](https://github.com/junit-team/junit-framework) from 5.14.2 to 5.14.3. - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r5.14.2...r5.14.3) --- updated-dependencies: - dependency-name: org.junit.jupiter:junit-jupiter dependency-version: 5.14.3 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 55d0601e..3aeda946 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ org.junit.jupiter junit-jupiter - 5.14.2 + 5.14.3 test From 8f344213336f550bc98cb5f0a5f9d60a0ed61dfb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Mar 2026 17:02:33 +0000 Subject: [PATCH 155/169] Bump org.apache.maven.plugins:maven-compiler-plugin Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.14.1 to 3.15.0. - [Release notes](https://github.com/apache/maven-compiler-plugin/releases) - [Commits](https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.14.1...maven-compiler-plugin-3.15.0) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-compiler-plugin dependency-version: 3.15.0 dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 3aeda946..47ca51ee 100644 --- a/pom.xml +++ b/pom.xml @@ -163,7 +163,7 @@ org.apache.maven.plugins maven-compiler-plugin - 3.14.1 + 3.15.0 ${java.version} ${java.version} From 165370774ced7a7ca2faa5982e6307246559fa4d Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 14 Apr 2026 19:06:57 +0200 Subject: [PATCH 156/169] WAT extractor not to fail on metadata records without WARC-Target-URI The WARC spec does not require a WARC-Target-URI for metadata records. The WAT extractor should not fail if a metadata record has no target URI, but simply not add one to the JSON blob. --- src/main/java/org/archive/extract/WATExtractorOutput.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index 621656b7..f695796f 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -152,6 +152,9 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException { String targetURI; if(warcType.equals("warcinfo")) { targetURI = JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); + } else if (warcType.equals("metadata")) { + // WARC-Target-URI is optional in metadata records + targetURI = JSONUtils.extractSingle(md, "Envelope.Metadata-Header-Metadata.WARC-Target-URI"); } else { targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI"); } From 2ecf6f49b53efaf46cc0d248a7a2d5308b859681 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 20:22:38 +0000 Subject: [PATCH 157/169] Bump com.google.guava:guava from 33.5.0-jre to 33.6.0-jre Bumps [com.google.guava:guava](https://github.com/google/guava) from 33.5.0-jre to 33.6.0-jre. - [Release notes](https://github.com/google/guava/releases) - [Commits](https://github.com/google/guava/commits) --- updated-dependencies: - dependency-name: com.google.guava:guava dependency-version: 33.6.0-jre dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 47ca51ee..03a3023d 100644 --- a/pom.xml +++ b/pom.xml @@ -62,7 +62,7 @@ com.google.guava guava - 33.5.0-jre + 33.6.0-jre From 9c1059908a58028e47fe4e3ae666b32cc44b3f31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 20:22:42 +0000 Subject: [PATCH 158/169] Bump commons-codec:commons-codec from 1.21.0 to 1.22.0 Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.21.0 to 1.22.0. - [Changelog](https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt) - [Commits](https://github.com/apache/commons-codec/compare/rel/commons-codec-1.21.0...rel/commons-codec-1.22.0) --- updated-dependencies: - dependency-name: commons-codec:commons-codec dependency-version: 1.22.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 03a3023d..75f618bf 100644 --- a/pom.xml +++ b/pom.xml @@ -85,7 +85,7 @@ commons-codec commons-codec - 1.21.0 + 1.22.0 From e6383f75f1b8c37cd16abd04a9c2627029904104 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 20:22:56 +0000 Subject: [PATCH 159/169] Bump org.junit.jupiter:junit-jupiter from 5.14.3 to 5.14.4 Bumps [org.junit.jupiter:junit-jupiter](https://github.com/junit-team/junit-framework) from 5.14.3 to 5.14.4. - [Release notes](https://github.com/junit-team/junit-framework/releases) - [Commits](https://github.com/junit-team/junit-framework/compare/r5.14.3...r5.14.4) --- updated-dependencies: - dependency-name: org.junit.jupiter:junit-jupiter dependency-version: 5.14.4 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 75f618bf..884a05dc 100644 --- a/pom.xml +++ b/pom.xml @@ -55,7 +55,7 @@ org.junit.jupiter junit-jupiter - 5.14.3 + 5.14.4 test From 6bcd9cfacdfeee5788480bf4b7d835cf49a6f3de Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 1 May 2026 20:23:01 +0000 Subject: [PATCH 160/169] Bump commons-io:commons-io from 2.21.0 to 2.22.0 Bumps commons-io:commons-io from 2.21.0 to 2.22.0. --- updated-dependencies: - dependency-name: commons-io:commons-io dependency-version: 2.22.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 884a05dc..7414ddcf 100644 --- a/pom.xml +++ b/pom.xml @@ -142,7 +142,7 @@ commons-io commons-io - 2.21.0 + 2.22.0 From e474ba179fa3e199430933ddde6c3dd6ee201b3c Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Sat, 2 May 2026 11:07:26 +0900 Subject: [PATCH 161/169] Release 3.0.4 --- CHANGES.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index e7f36e57..18fb8290 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,20 @@ Unreleased ---------- +3.0.4 (2026-06-02) +------------------ + +### Fixes + +* WAT extractor not to fail on metadata records without WARC-Target-URI [#150](https://github.com/iipc/webarchive-commons/pull/150) + +### Dependency upgrades + +* **commons-codec**: 1.21.0 → 1.22.0 +* **commons-io**: 2.21.0 → 2.22.0 +* **guava**: 33.5.0-jre → 33.6.0-jre +* **hadoop** (hadoop-common, hadoop-mapreduce-client-core): 3.4.2 → 3.4.3 + 3.0.3 (2025-02-06) ------------------ From 249d86fbb97580bd1016c71d827c9c8d8b17cb9c Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Sat, 2 May 2026 11:08:27 +0900 Subject: [PATCH 162/169] [maven-release-plugin] prepare release webarchive-commons-3.0.4 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7414ddcf..6d992b70 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.4-SNAPSHOT + 3.0.4 jar webarchive-commons @@ -41,7 +41,7 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git https://github.com/iipc/webarchive-commons - webarchive-commons-2.0.0 + webarchive-commons-3.0.4 From 562f3414378f2650c1bb9c5a8154d3268bfba79f Mon Sep 17 00:00:00 2001 From: Alex Osborne Date: Sat, 2 May 2026 11:08:33 +0900 Subject: [PATCH 163/169] [maven-release-plugin] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6d992b70..fdc2dd01 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ org.netpreserve.commons webarchive-commons - 3.0.4 + 3.0.5-SNAPSHOT jar webarchive-commons From caf1d008c2a1245688d2bfbb3a0c0ea128345110 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Fri, 12 Jun 2026 15:53:20 +0200 Subject: [PATCH 164/169] Add GitHub Dependabot configuration to update GitHub workflow actions --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 69a75a8b..209bb31e 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,3 +5,7 @@ updates: open-pull-requests-limit: 10 schedule: interval: "monthly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" From ceb7fabadb1f0175ec1ab95ac22fc16b654f2742 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 00:22:51 +0000 Subject: [PATCH 165/169] Bump actions/checkout from 4 to 6 Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index bb63cd56..9474a060 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -19,7 +19,7 @@ jobs: timeout-minutes: 30 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up JDK ${{ matrix.jdk }} uses: actions/setup-java@v4 with: From 0b99f0a8a14e50818b41d3dfe6d16f14aab54eae Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 00:22:49 +0000 Subject: [PATCH 166/169] Bump actions/cache from 4 to 5 Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 9474a060..f7df222f 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -27,7 +27,7 @@ jobs: distribution: 'temurin' cache: maven - name: Cache local Maven repository - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} From a80a0b198d672cd4904e8b1d5a3c299d3d0f501e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 14 Jun 2026 00:22:46 +0000 Subject: [PATCH 167/169] Bump actions/setup-java from 4 to 5 Bumps [actions/setup-java](https://github.com/actions/setup-java) from 4 to 5. - [Release notes](https://github.com/actions/setup-java/releases) - [Commits](https://github.com/actions/setup-java/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-java dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index f7df222f..64b395c2 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v6 - name: Set up JDK ${{ matrix.jdk }} - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: java-version: ${{ matrix.jdk }} distribution: 'temurin' From e97096212c56a916d751fe707ccdb9c5ec6015ca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 00:40:22 +0000 Subject: [PATCH 168/169] Bump org.apache.maven.plugins:maven-surefire-plugin from 3.5.5 to 3.5.6 Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.5.5 to 3.5.6. - [Release notes](https://github.com/apache/maven-surefire/releases) - [Commits](https://github.com/apache/maven-surefire/compare/surefire-3.5.5...surefire-3.5.6) --- updated-dependencies: - dependency-name: org.apache.maven.plugins:maven-surefire-plugin dependency-version: 3.5.6 dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index fdc2dd01..41119b4e 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.5.5 + 3.5.6 de.thetaphi From e31daca222cff2735f2dc143798379e42720d849 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 00:40:08 +0000 Subject: [PATCH 169/169] Bump org.json:json from 20251224 to 20260522 Bumps [org.json:json](https://github.com/douglascrockford/JSON-java) from 20251224 to 20260522. - [Release notes](https://github.com/douglascrockford/JSON-java/releases) - [Changelog](https://github.com/stleary/JSON-java/blob/master/docs/RELEASES.md) - [Commits](https://github.com/douglascrockford/JSON-java/compare/20251224...20260522) --- updated-dependencies: - dependency-name: org.json:json dependency-version: '20260522' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 41119b4e..4c019aae 100644 --- a/pom.xml +++ b/pom.xml @@ -68,7 +68,7 @@ org.json json - 20251224 + 20260522 org.htmlparser