diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..209bb31e --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +version: 2 +updates: + - package-ecosystem: "maven" + directory: "/" + open-pull-requests-limit: 10 + schedule: + interval: "monthly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml new file mode 100644 index 00000000..64b395c2 --- /dev/null +++ b/.github/workflows/maven.yml @@ -0,0 +1,37 @@ +name: Java CI with Maven + +permissions: + contents: read + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build: + strategy: + matrix: + jdk: [8, 11, 17, 21, 25] + + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v6 + - name: Set up JDK ${{ matrix.jdk }} + uses: actions/setup-java@v5 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + cache: maven + - name: Cache local Maven repository + uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Build with Maven + run: mvn -B verify --file pom.xml diff --git a/.gitignore b/.gitignore index fc8f67e9..feee77d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea *.pydevproject .project .metadata diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 0dfd3f7f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,26 +0,0 @@ -language: java - -jdk: - - oraclejdk7 - -before_install: - - "git clone https://github.com/iipc/travis.git target/travis" - -before_script: - - "export JAVA_OPTS=-Xmx1024m" - - "export MAVEN_OPTS=-Xmx512m" - - "ulimit -u 2048" - -script: - - "target/travis/deploy-if.sh" - -# whitelist in the master branch only -branches: - only: - - master - -env: - global: - - secure: "qDKjVdoe4Qcz4WfXiQydU7tyl51T62FUJrjqu4FUPBcgeQhFQiggwhpaE6xCOzOpxbsuBi2R1c8gMQf5esE5iDL5jZMu+kz++dYbuzMTd13ttvZWMW5wRPH0H8iHk609FP/RDtVKKBr7WO0JvvIAZEhWNHZrLXBrrKgdTey171g=" - - secure: "FXGBKJNP9X7ePJfS4eYTZtoFo4RT1sxor34XxncSJr7uV6ggtZb4B4WNd16IlLcDk6E32sx8YoWdltaOGwQ5Vg/kux5Ko/wKZCoccS018Ln1bRT86dD1KoPY34rGoNJVQxe7J/1MPqpBKwmi2XCKfzpsEh3W7bbIqg8w9MEOOZA=" - diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 00000000..18fb8290 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,320 @@ +Unreleased +---------- + +3.0.4 (2026-06-02) +------------------ + +### Fixes + +* WAT extractor not to fail on metadata records without WARC-Target-URI [#150](https://github.com/iipc/webarchive-commons/pull/150) + +### Dependency upgrades + +* **commons-codec**: 1.21.0 → 1.22.0 +* **commons-io**: 2.21.0 → 2.22.0 +* **guava**: 33.5.0-jre → 33.6.0-jre +* **hadoop** (hadoop-common, hadoop-mapreduce-client-core): 3.4.2 → 3.4.3 + +3.0.3 (2025-02-06) +------------------ + +### Dependency upgrades + +* **commons-codec**: 1.20.0 → 1.21.0 +* **commons-lang3**: 3.19.0 → 3.20.0 +* **json**: 20250517 → 20251224 +* **junit-jupiter**: 5.14.1 → 5.14.2 + +3.0.2 (2025-11-14) +------------------ + +### Fixes + +- Avoid relying on the default locale or charset. [#128](https://github.com/iipc/webarchive-commons/pull/128) +- BasicURLCanonicalizer: more efficient normalization of dots in host names. [#129](https://github.com/iipc/webarchive-commons/pull/129) + +### Dependency upgrades + +* **commons-cli**: 1.10.0 → 1.11.0 +* **commons-codec**: 1.19.0 → 1.20.0 +* **commons-io**: 2.20.0 → 2.21.0 +* **junit-jupiter**: 5.13.3 → 5.14.1 +* **maven-release-plugin**: 3.1.1 → 3.2.0 + +3.0.1 (2025-10-27) +------------------ + +### Fixes + +* Fixed a file handle leak in `FileUtils.pagedLines()` and `FileUtils.appendTo()` that could occur during I/O errors. + +### Dependency Upgrades + +* **commons-codec**: 1.18.0 → 1.19.0 +* **commons-lang3**: 3.18.0 → 3.19.0 +* **commons-cli**: 1.9.0 → 1.10.0 +* **guava**: 33.4.8-jre → 33.5.0-jre +* **hadoop**: 3.4.1 → 3.4.2 +* **pig**: 0.17.0 → 0.18.0 + +3.0.0 (2025-07-21) +------------------ + +### Changes + +`FileUtils.pagedLines()` and `FileUtils.expandRange()` now return the Apache Commons Lang 3 version of `LongRange`. +Users of these methods may need to make the following changes: + +| Old | New | +|-------------------------------------------------|---------------------------------------------| +| `import org.apache.commons.lang.math.LongRange` | `import org.apache.commons.lang3.LongRange` | +| `new LongRange(min, max)` | `LongRange.of(min, max)` | +| `longRange.getMaximumLong()` | `longRange.getMaximum()` | +| `longRange.getMinimumLong()` | `longRange.getMinimum()` | + +### Dependency upgrades + +- **commons-io**: 2.19.0 → 2.20.0 +- **commons-lang**: 2.6 → 3.18.0 + +2.0.2 (2025-07-15) +------------------ + +### Fixes + +* Fixes for `org.archive.net.PublicSuffixes` [#110](https://github.com/iipc/webarchive-commons/pull/110) + * Updated to the latest version of the public suffix list. + * Fixed parsing failures with newer list versions. + * Moved `effective_tld_names.dat` to `org/archive/effective_tld_names.dat` to prevent conflict with `crawler-commons`. + +2.0.1 (2025-05-21) +------------------ + +### Changes + +* Re-added `Reporter.shortReportLineTo(PrintWriter)` as it turned out to be important to Heritrix. + + +2.0.0 (2025-05-21) +------------------ + +### New features + +- Added `RecordingInputStream.asOutputStream()` for direct writing of recorded data without an input stream. [#108](https://github.com/iipc/webarchive-commons/pull/108) + +### Removals + +#### Removed Apache HttpClient 3.1 + +`HTTPSeekableLineReaderFactory` and `ZipNumBlockLoader` now default to HttpClient 4.3. + +| Removed | Replacement | +|-----------------------------------------------------------|--------------------------------------| +| `org.apache.commons.httpclient.URIException` | `org.archive.url.URIException` | +| `org.apache.commons.httpclient.Header` | `org.archive.format.http.HttpHeader` | +| `org.archive.httpclient.HttpRecorderGetMethod` | | +| `org.archive.httpclient.HttpRecorderMethod` | | +| `org.archive.httpclient.HttpRecorderPostMethod` | | +| `org.archive.httpclient.SingleHttpConnectionManager` | | +| `org.archive.httpclient.ThreadLocalHttpConnectionManager` | | + +#### Removed deprecated versions of renamed classes + +| Removed | Replacement | +|-----------------------------------------------|--------------------------------------------------| +| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | +| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | +| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | +| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | +| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +| `org.archive.url.DefaultIACanonicalizerRules` | `org.archive.url.AggressiveIACanonicalizerRules` | +| `org.archive.url.DefaultIAURLCanonicalizer` | `org.archive.url.AggressiveIAURLCanonicalizer` | +| `org.archive.url.GoogleURLCanonicalizer` | `org.archive.url.BasicURLCanonicalizer` | + +#### Removed deprecated methods + +| Removed | Replacement | +|-----------------------------------------------|-------------------------------------------| +| `ANVLRecord(int)` | `ANVLRecord()` | +| `DevUtils.betterPrintStack(RuntimeException)` | `Throwable.printStackStrace()` | +| `Recorder.getReplayCharSequence()` | `Recorder.getContentReplayCharSequence()` | +| `Reporter.shortReportLineTo(PrintWriter)` | `Reporter.reportTo(PrintWriter)` | + +##### Removed usages of constant interfaces + +Static imports should be used instead. + +* `ArchiveFileConstants` is no longer implemented by: + * `ArchiveReader` + * `ArchiveReaderFactory` + * `WARCWriter` + * `WriterPool` + * `WriterPoolMember` +* `ARCConstants` is no longer implemented by: + * `ARCReader` + * `ARCReaderFactory` + * `ARCRecord` + * `ARCRecordMetaData` + * `ARCUtils` + * `ARCWriter` +* `WARCConstants` is no longer implemented by: + * `WARCReader` + * `WARCReaderFactory` + * `WARCRecord` + * `WARCWriter` + +### Dependency upgrades + +- **commons-io**: 2.18.0 → 2.19.0 +- **guava**: 33.3.1-jre → 33.4.8-jre +- **json**: 20240303 → 20250517 +- **junit**: 4.13.2 → 5.12.2 + +1.3.0 (2024-12-20) +------------------ + +#### URL Canonicalization Changed + +The output of WaybackURLKeyMaker and other canonicalizers based on BasicURLCanonicalizer has changed for URLs that +contain non UTF-8 percent encoded sequences. For example when a URL contains "%C3%23" it will now be normalised to +"%c3%23" whereas previous releases produced "%25c3%23". This change brings webarchive-commons more inline with pywb, +surt (Python), warcio.js and RFC 3986. While CDX file compatibility with these newer tools should improve, note that CDX +files generated by the new release which contain such URLs may not work correctly with existing versions of +OpenWayback that use the older webarchive-commons. [#102](https://github.com/iipc/webarchive-commons/pull/102) + +#### Bug fixes + +* WAT: Duplicated payload metadata values for "Actual-Content-Length" and "Trailing-Slop-Length" [#103](https://github.com/iipc/webarchive-commons/pull/103) +* ObjectPlusFilesOutputStream.hardlinkOrCopy now uses `Files.createLink()` instead of executing `ln`. This + prevents the potential for security vulnerabilities from command line option injection and improves portability. + +#### Dependency upgrades + +* fastutil removed +* dsiutils removed + +#### Deprecations + +The following classes and enum members have been marked deprecated as a step towards removal of the dependency on +Apache Commons HttpClient 3.1. + +* org.archive.httpclient.HttpRecorderGetMethod +* org.archive.httpclient.HttpRecorderMethod +* org.archive.httpclient.HttpRecorderPostMethod +* org.archive.httpclient.SingleHttpConnectionManager +* org.archive.httpclient.ThreadLocalHttpConnectionManager +* org.archive.util.binsearch.impl.http.ApacheHttp31SLR +* org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory +* org.archive.util.binsearch.impl.http.HTTPSeekableLineReaderFactory.HttpLibs.APACHE_31 + +1.2.0 (2024-11-29) +------------------ + +#### New features + +* MetaData is now multivalued to support repeated WARC and HTTP headers. [#98](https://github.com/iipc/webarchive-commons/pull/98/files) + +#### Dependency upgrades + +* commons-io 2.18.0 +* commons-lang 2.6 +* guava 33.3.1-jre +* hadoop 3.4.1 +* htmlparser 2.1 +* httpcore 4.4.16 +* json 20240303 +* junit 4.13.2 + +1.1.11 (2024-11-27) +------------------- + +#### Bug fixes + +* Fixed URLParser and WaybackURLKeyMaker failing on URLs with IPv6 address hostnames [#100](https://github.com/iipc/webarchive-commons/pull/100) + +1.1.10 (2024-10-15) +------------------- + +#### Bug fixes + +* [WAT extractor: do not fail on missing WARC-Filename in warcinfo record](https://github.com/iipc/webarchive-commons/pull/89) +* [ExtractingParseObserver: extract rel, hreflang and type attributes](https://github.com/iipc/webarchive-commons/pull/86) +* [ExtractingParseObserver: extract links from onClick attributes](https://github.com/iipc/webarchive-commons/pull/85) + +#### Dependency Upgrades + +* commons-collections 3.2.2 +* commons-io 2.7 +* dsiutils 2.2.8 +* guava 33.3.0-jre +* hadoop 3.4.0 (now optional) +* pig 0.17.0 +* org.json 20231013 + +#### Dependency Removals + +* joda-time (was unused) + +1.1.9 +----- +* [Use commons-collections v3.2.2 to avoid v3.2.1 vulnerability](https://github.com/iipc/webarchive-commons/pull/77) +* [Extract `property` attributes of HTML meta elements](https://github.com/iipc/webarchive-commons/pull/75) +* [Do not add value of preceding HTTP header field if there is no value](https://github.com/iipc/webarchive-commons/pull/74) +* [Fix WAT records corresponding to response records of Wget generated WARCs](https://github.com/iipc/webarchive-commons/pull/74) + +1.1.8 +----- +* [Improve HTML link extraction](https://github.com/iipc/webarchive-commons/pull/72) +* [Move unit tests over from heritrix3 to webarchive-commons](https://github.com/iipc/webarchive-commons/issues/25) +* [Strip empty port via URLParser](https://github.com/iipc/webarchive-commons/pull/69/) +* [Use CharsetDetector to guess encoding of HTML documents](https://github.com/iipc/webarchive-commons/pull/68/) +* [Fix last header was lost if LF LF](https://github.com/iipc/webarchive-commons/pull/65/) +* [Make regular expression to extract URLs from CSS more restrictive](https://github.com/iipc/webarchive-commons/pull/63) +* [Remove invalid constant `PROFILE_REVISIT_URI_AGNOSTIC_IDENTICAL_DIGEST`](https://github.com/iipc/webarchive-commons/pull/62) + +1.1.7 +----- +* [Make canonicalizer be able to strip session id params even if they are the first params in the query string](https://github.com/iipc/webarchive-commons/pull/54) +* [Store origin-code of ARC file header](https://github.com/iipc/webarchive-commons/pull/52/) +* [Flush output etc before tallying stats to fix sizeOnDisk calculation](https://github.com/iipc/webarchive-commons/pull/51) +* [Get rid of broken, seemingly unnecessary escapeWhitespace() step of uri fixup](https://github.com/iipc/webarchive-commons/pull/50) + +1.1.6 +----- +* [Handle empty String argument in CharsetDetector.trimAttrValue](https://github.com/iipc/webarchive-commons/pull/49) +* [WAT extractor: adding information in WAT's warcinfo](https://github.com/iipc/webarchive-commons/issues/47) +* [WAT extractor: missing WARC format version](https://github.com/iipc/webarchive-commons/issues/45) +* [WAT extractor: envelope structure does not conform to the WAT specification](https://github.com/iipc/webarchive-commons/issues/44) +* [WAT extractor: WARC-Date in all records should be the WAT record generation date](https://github.com/iipc/webarchive-commons/issues/43) +* [WAT extractor: WARC-Filename in the WAT warcinfo record should be the WAT filename itself](https://github.com/iipc/webarchive-commons/issues/42) +* [WAT extractor: Entity-Trailing-Slop-Bytes should be called Entity-Trailing-Slop-Length](https://github.com/iipc/webarchive-commons/issues/48) + +1.1.5 +----- +* [Escape redirect URLs in RealCDXExtractorOutput](https://github.com/iipc/webarchive-commons/pull/36) +* [Tests fail on Windows](https://github.com/iipc/webarchive-commons/issues/2) +* [Test fails on Java 8](https://github.com/iipc/webarchive-commons/issues/31) +* [RecordingOutputStream can affect tcp packets sent in an undesirable way](https://github.com/iipc/webarchive-commons/issues/38) + +1.1.4 +----- +* [All dates should be independent of locale settings](https://github.com/iipc/webarchive-commons/pull/22) +* [Resolved fastutil conflict in dependencies](https://github.com/iipc/webarchive-commons/pull/24) + +1.1.3 +----- +* [Synchronised with IA fork](https://github.com/iipc/webarchive-commons/pull/18) +* [Updated to more recent Guava APIs](https://github.com/iipc/webarchive-commons/pull/17) +* [Fixed handling of uncompressed ARC files #13 and #14](https://github.com/iipc/webarchive-commons/pull/14) +* [Avoid pulling in the logback dependency IA#13](https://github.com/internetarchive/webarchive-commons/pull/13) + +1.1.2 +----- +* [Fixed support for reading uncompressed WARCs, along with some unit testing.](https://github.com/iipc/webarchive-commons/pull/12) + +1.1.1 +----- +* [Renamed from commons-webarchive to webarchive-commons](https://github.com/iipc/webarchive-commons/pull/8) +* [Cope with malformed GZip extra fields as produced by wget 1.14](https://github.com/iipc/webarchive-commons/pull/10) +* [Switch to httpcomponents, and add IA deployment information.](https://github.com/iipc/webarchive-commons/pull/11) diff --git a/README.md b/README.md index 72858a52..55be6e68 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ IIPC Web Archive Commons ======================== - -[![Build Status](https://travis-ci.org/iipc/webarchive-commons.png?branch=master)](https://travis-ci.org/iipc/webarchive-commons/) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.netpreserve.commons/webarchive-commons/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.netpreserve.commons/webarchive-commons) [![Javadoc](https://javadoc.io/badge2/org.netpreserve.commons/webarchive-commons/javadoc.svg)](https://www.javadoc.io/doc/org.netpreserve.commons/webarchive-commons) This repository contains common utility code for [OpenWayback][1] and other projects. diff --git a/pom-cdh4.xml b/pom-cdh4.xml deleted file mode 100644 index de19d8d0..00000000 --- a/pom-cdh4.xml +++ /dev/null @@ -1,229 +0,0 @@ - - 4.0.0 - - org.archive - ia-web-commons - 1.0-SNAPSHOT - jar - - ia-web-commons - http://maven.apache.org - - - UTF-8 - ${maven.build.timestamp} - yyyyMMddhhmmss - - - - - junit - junit - 3.8.1 - test - - - - com.google.guava - guava - 14.0.1 - - - - org.json - json - 20090211 - - - org.htmlparser - htmlparser - 1.6 - - - - org.mozilla - juniversalchardet - 1.0.3 - - - - commons-httpclient - commons-httpclient - 3.1 - - - - org.apache.hadoop - hadoop-core - 2.0.0-mr1-cdh4.2.0 - - - commons-httpclient - commons-httpclient - - - javax.servlet - servlet-api - - - javax.servlet.jsp - jsp-api - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util - - - tomcat - jasper-runtime - - - tomcat - jasper-compiler - - - - - org.apache.hadoop - hadoop-common - 2.0.0-cdh4.2.0 - - - org.apache.hadoop - hadoop-mapreduce-client-common - 2.0.0-cdh4.2.0 - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.0.0-cdh4.2.0 - - - - org.apache.pig - pig - 0.11.1 - provided - - - - commons-lang - commons-lang - 2.5 - - - - commons-io - commons-io - 2.4 - - - - org.gnu.inet - libidn - 1.15 - - - it.unimi.dsi - mg4j - 1.0.1 - compile - - - org.apache.httpcomponents - httpcore - 4.3 - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.6 - 1.6 - - - - maven-assembly-plugin - 2.4 - - - jar-with-dependencies - - ia-web-commons - - - - package - - single - - - - - - - - src/main/resources - true - - - - - - - internetarchive - Internet Archive Maven Repository - http://builds.archive.org:8080/maven2 - default - - - true - daily - warn - - - true - daily - warn - - - - - cloudera - Cloudera Hadoop - https://repository.cloudera.com/artifactory/cloudera-repos/ - default - - - true - daily - warn - - - true - daily - warn - - - - - - - - repository - - ${repository.url} - - - - diff --git a/pom.xml b/pom.xml index cfd201b0..4c019aae 100644 --- a/pom.xml +++ b/pom.xml @@ -1,20 +1,14 @@ 4.0.0 - - org.sonatype.oss - oss-parent - 7 - - - org.netpreserve.commons webarchive-commons - 1.1.1-SNAPSHOT + 3.0.5-SNAPSHOT jar webarchive-commons https://github.com/iipc/webarchive-commons + Common web archive utility code The International Internet Preservation Consortium @@ -46,39 +40,40 @@ scm:git:git@github.com:iipc/webarchive-commons.git scm:git:git@github.com:iipc/webarchive-commons.git - git@github.com:iipc/webarchive-commons.git - + https://github.com/iipc/webarchive-commons + webarchive-commons-3.0.4 + - UTF-8 ${maven.build.timestamp} yyyyMMddhhmmss + 8 - junit - junit - 3.8.1 + org.junit.jupiter + junit-jupiter + 5.14.4 test com.google.guava guava - 14.0.1 + 33.6.0-jre - + org.json json - 20090211 + 20260522 org.htmlparser htmlparser - 1.6 + 2.1 @@ -86,70 +81,68 @@ juniversalchardet 1.0.3 - + - commons-httpclient - commons-httpclient - 3.1 - + commons-codec + commons-codec + 1.22.0 + + + + commons-cli + commons-cli + 1.11.0 + org.apache.hadoop - hadoop-core - 0.20.2-cdh3u4 + hadoop-common + 3.4.3 + true - commons-httpclient - commons-httpclient - - - javax.servlet - servlet-api - - - javax.servlet.jsp - jsp-api - - - org.mortbay.jetty - jetty - - - org.mortbay.jetty - jetty-util + * + * + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + 3.4.3 + true + - tomcat - jasper-runtime + * + * - - tomcat - jasper-compiler - - - hsqldb - hsqldb - - + org.apache.pig pig - 0.10.0 + 0.18.0 provided + + + * + * + + - commons-lang - commons-lang - 2.5 + org.apache.commons + commons-lang3 + 3.20.0 commons-io commons-io - 2.4 + 2.22.0 @@ -157,27 +150,11 @@ libidn 1.15 - - it.unimi.dsi - dsiutils - 2.0.12 - compile - + org.apache.httpcomponents httpcore - 4.3 - - - joda-time - joda-time - 1.6 - - - fastutil - fastutil - 5.0.7 - compile + 4.4.16 @@ -186,31 +163,46 @@ org.apache.maven.plugins maven-compiler-plugin - 2.3.2 + 3.15.0 - 1.6 - 1.6 + ${java.version} + ${java.version} - maven-assembly-plugin - 2.4 + org.apache.maven.plugins + maven-surefire-plugin + 3.5.6 + + + de.thetaphi + forbiddenapis + 3.10 - - jar-with-dependencies - - webarchive-commons + ${java.version} + true + + false + + jdk-unsafe + jdk-deprecated + jdk-non-portable + + + src/test/resources/forbidden-apis-signatures.txt + - package - single + check + testCheck + src/main/resources @@ -219,25 +211,79 @@ - - - cloudera - Cloudera Hadoop - https://repository.cloudera.com/artifactory/cloudera-repos/ - default - - - true - daily - warn - - - true - daily - warn - - - - + + + + release + + + + org.sonatype.central + central-publishing-maven-plugin + 0.10.0 + true + + central + true + + + + org.apache.maven.plugins + maven-release-plugin + 3.3.1 + + + org.apache.maven.plugins + maven-source-plugin + 3.4.0 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.12.0 + + + attach-javadocs + + jar + + + + + + org.apache.maven.plugins + maven-gpg-plugin + 3.2.8 + + + sign-artifacts + verify + + sign + + + + + + + + + jdk9-plus + + [9,) + + + 8 + + + diff --git a/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java b/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java new file mode 100644 index 00000000..a81645f0 --- /dev/null +++ b/src/main/java/it/unimi/dsi/fastutil/io/RepositionableStream.java @@ -0,0 +1,42 @@ +// copied from fastutil, keeping the original package name to avoid breaking +// compatibility with existing user code that implements this interface +package it.unimi.dsi.fastutil.io; + +/* + * Copyright (C) 2005-2015 Sebastiano Vigna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** A basic interface specifying positioning methods for a byte stream. + * + * @author Sebastiano Vigna + * @since 4.4 + */ + +public interface RepositionableStream { + + /** Sets the current stream position. + * + * @param newPosition the new stream position. + */ + void position( long newPosition ) throws java.io.IOException; + + /** Returns the current stream position. + * + * @return the current stream position. + */ + long position() throws java.io.IOException; + +} diff --git a/src/main/java/org/archive/extract/DumpingExtractorOutput.java b/src/main/java/org/archive/extract/DumpingExtractorOutput.java index a4151076..1ccbf771 100644 --- a/src/main/java/org/archive/extract/DumpingExtractorOutput.java +++ b/src/main/java/org/archive/extract/DumpingExtractorOutput.java @@ -3,14 +3,17 @@ import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.logging.Logger; import org.archive.resource.Resource; import org.archive.util.StreamCopy; import org.json.JSONException; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; + +import static java.nio.charset.StandardCharsets.UTF_8; public class DumpingExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -18,11 +21,14 @@ public class DumpingExtractorOutput implements ExtractorOutput { private PrintStream out; public DumpingExtractorOutput(OutputStream out) { - this.out = new PrintStream(out); + try { + this.out = new PrintStream(out, false, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + } } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); StreamCopy.copy(resource.getInputStream(), co); long bytes = co.getCount(); diff --git a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java index ad10be40..567b1cd8 100644 --- a/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java +++ b/src/main/java/org/archive/extract/ExtractingResourceFactoryMapper.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.util.Iterator; +import java.util.Locale; import java.util.logging.Logger; import org.archive.format.arc.ARCConstants; @@ -68,14 +69,14 @@ private boolean childFieldStartsWith(MetaData m, String child, String key, String search) { String val = getChildField(m,child,key); return val == null ? false : - val.toLowerCase().startsWith(search.toLowerCase()); + val.toLowerCase(Locale.ROOT).startsWith(search.toLowerCase(Locale.ROOT)); } private boolean childFieldContains(MetaData m, String child, String key, String search) { String val = getChildField(m,child,key); return val == null ? false : - val.toLowerCase().contains(search.toLowerCase()); + val.toLowerCase(Locale.ROOT).contains(search.toLowerCase(Locale.ROOT)); } private boolean childFieldEquals(MetaData m, String child, @@ -88,7 +89,7 @@ private boolean childFieldEquals(MetaData m, String child, private String caseInsensitiveKeyScan(MetaData m, String child, String k) { try { if(m.has(child)) { - String kLC = k.toLowerCase(); + String kLC = k.toLowerCase(Locale.ROOT); JSONObject childJSObj = m.getJSONObject(child); @SuppressWarnings("rawtypes") Iterator i = childJSObj.keys(); @@ -96,7 +97,7 @@ private String caseInsensitiveKeyScan(MetaData m, String child, String k) { Object kObj = i.next(); if(kObj instanceof String) { String kString = (String) kObj; - if(kString.toLowerCase().equals(kLC)) { + if(kString.toLowerCase(Locale.ROOT).equals(kLC)) { return childJSObj.getString(kString); } } @@ -128,7 +129,7 @@ private boolean isHTTPARCResource(MetaData envelope) { private boolean isHTMLHttpResource(MetaData m) { String type = caseInsensitiveKeyScan(m,HTTP_HEADERS_LIST, "Content-Type"); - return type == null ? false : type.toLowerCase().contains("html"); + return type == null ? false : type.toLowerCase(Locale.ROOT).contains("html"); } private boolean isWARCType(MetaData envelope, WARCRecordType type) { @@ -153,7 +154,10 @@ private boolean isWARCInfoResource(MetaData envelope) { private boolean isHTTPResponseWARCResource(MetaData envelope) { return childFieldEquals(envelope,WARC_HEADER_METADATA, WARCConstants.CONTENT_TYPE, - WARCConstants.HTTP_RESPONSE_MIMETYPE); + WARCConstants.HTTP_RESPONSE_MIMETYPE) + || childFieldEquals(envelope,WARC_HEADER_METADATA, + WARCConstants.CONTENT_TYPE, + WARCConstants.HTTP_RESPONSE_MIMETYPE_NS); } private boolean isWARCJSONResource(MetaData envelope) { return childFieldEquals(envelope,WARC_HEADER_METADATA, diff --git a/src/main/java/org/archive/extract/ExtractingResourceProducer.java b/src/main/java/org/archive/extract/ExtractingResourceProducer.java index de671bee..07cdb88a 100644 --- a/src/main/java/org/archive/extract/ExtractingResourceProducer.java +++ b/src/main/java/org/archive/extract/ExtractingResourceProducer.java @@ -1,6 +1,7 @@ package org.archive.extract; import java.io.IOException; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -33,7 +34,7 @@ public Resource getNext() throws ResourceParseException, IOException { return current; } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format("Extracting (%s) with (%s)\n", + LOG.info(String.format(Locale.ROOT, "Extracting (%s) with (%s)\n", current.getClass().toString(), f.getClass().toString())); } diff --git a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java index 530dadd0..6cb7c445 100644 --- a/src/main/java/org/archive/extract/JSONViewExtractorOutput.java +++ b/src/main/java/org/archive/extract/JSONViewExtractorOutput.java @@ -3,19 +3,25 @@ import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.List; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.format.json.JSONView; import org.archive.resource.Resource; import org.archive.util.StreamCopy; +import static java.nio.charset.StandardCharsets.UTF_8; + public class JSONViewExtractorOutput implements ExtractorOutput { private PrintStream out; private JSONView view; public JSONViewExtractorOutput(OutputStream out, String filterPath) { view = new JSONView(filterPath.split(",")); - this.out = new PrintStream(out); + try { + this.out = new PrintStream(out, false, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + } } public void output(Resource resource) throws IOException { StreamCopy.readToEOF(resource.getInputStream()); diff --git a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java index 306f67a3..ff0b9e83 100644 --- a/src/main/java/org/archive/extract/RealCDXExtractorOutput.java +++ b/src/main/java/org/archive/extract/RealCDXExtractorOutput.java @@ -1,11 +1,14 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; +import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -23,8 +26,8 @@ import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class RealCDXExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -72,7 +75,7 @@ public RealCDXExtractorOutput(PrintWriter out) { // SimpleJSONPathSpec gzFooterLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Footer-Length"); // SimpleJSONPathSpec gzHeaderLengthSpec = new SimpleJSONPathSpec("Container.Gzip-Metadata.Header-Length"); public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); @@ -102,7 +105,7 @@ public void output(Resource resource) throws IOException { String meta = "TBD"; String redir = "TBD"; - if(format.equals("WARC")) { + if(format.startsWith("WARC")) { origUrl = getWARCURL(m); date = getWARCDate(m); String type = getWARCType(m); @@ -129,7 +132,7 @@ public void output(Resource resource) throws IOException { } else { meta = "-"; } - if(mime.toLowerCase().contains("html")) { + if(mime.toLowerCase(Locale.ROOT).contains("html")) { if(redir.equals("-")) { // maybe an obvious meta-refresh? redir = extractHTMLMetaRefresh(origUrl,m); @@ -200,7 +203,7 @@ public void output(Resource resource) throws IOException { } else { meta = "-"; } - if(mime.toLowerCase().contains("html")) { + if(mime.toLowerCase(Locale.ROOT).contains("html")) { if(redir.equals("-")) { // maybe an obvious meta-refresh? redir = extractHTMLMetaRefresh(origUrl,m); @@ -220,7 +223,8 @@ public void output(Resource resource) throws IOException { canUrl = keyMaker.makeKey(origUrl); // URL DATE OURL MIME HTTP-CODE SHA1 META REDIR OFFSET LENGTH FILE if(dumpJSON) { - out.format("%s %s %s %s %s %s %s %s %s %s %s %s\n", + out.format(Locale.ROOT, + "%s %s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, @@ -234,7 +238,8 @@ public void output(Resource resource) throws IOException { filename, m.toString(1)); } else { - out.format("%s %s %s %s %s %s %s %s %s %s %s\n", + out.format(Locale.ROOT, + "%s %s %s %s %s %s %s %s %s %s %s\n", canUrl, date, origUrl, @@ -267,7 +272,7 @@ private String extractHTMLRobots(MetaData m) { if(meta != null) { String name = scanHeadersLC(meta, "name", null); if(name != null) { - if(name.toLowerCase().equals("robots")) { + if(name.toLowerCase(Locale.ROOT).equals("robots")) { // alright - some robot instructions: String content = scanHeadersLC(meta, "content", null); if(content != null) { @@ -289,7 +294,7 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) { if(meta != null) { String name = scanHeadersLC(meta, "http-equiv", null); if(name != null) { - if(name.toLowerCase().equals("refresh")) { + if(name.toLowerCase(Locale.ROOT).equals("refresh")) { // alright - some robot instructions: String content = scanHeadersLC(meta, "content", null); if(content != null) { @@ -306,12 +311,14 @@ private String extractHTMLMetaRefresh(String origUrl, MetaData m) { return "-"; } - private String resolve(String context, String spec) { + static String resolve(String context, String spec) { // TODO: test! try { URL cUrl = new URL(context); - URL resolved = new URL(cUrl,spec); - return resolved.toURI().toASCIIString(); + URL url = new URL(cUrl, spec); + // this constructor escapes its arguments, if necessary + URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), url.getRef()); + return uri.toASCIIString(); } catch (URISyntaxException e) { } catch (MalformedURLException e) { @@ -326,7 +333,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) { if(o.length() == 0) { return defaultVal; } - String lc = match.toLowerCase().trim(); + String lc = match.toLowerCase(Locale.ROOT).trim(); // try { // System.err.println("REC:" + o.toString(1)); // } catch (JSONException e1) { @@ -334,7 +341,7 @@ private String scanHeadersLC(JSONObject o, String match, String defaultVal) { // e1.printStackTrace(); // } for(String key : JSONObject.getNames(o)) { - if(lc.equals(key.toLowerCase().trim())) { + if(lc.equals(key.toLowerCase(Locale.ROOT).trim())) { try { return o.getString(key).trim(); } catch (JSONException e) { @@ -468,7 +475,7 @@ private String parseRobotInstructions(String input) { if(input == null) { return "-"; } - String up = input.replaceAll("-", "").toUpperCase(); + String up = input.replaceAll("-", "").toUpperCase(Locale.ROOT); StringBuilder sb = new StringBuilder(3); if(up.contains(NO_FOLLOW_MATCH)) { sb.append("F"); diff --git a/src/main/java/org/archive/extract/ResourceExtractor.java b/src/main/java/org/archive/extract/ResourceExtractor.java index 7f4d6e7a..d9b9f396 100644 --- a/src/main/java/org/archive/extract/ResourceExtractor.java +++ b/src/main/java/org/archive/extract/ResourceExtractor.java @@ -1,12 +1,14 @@ package org.archive.extract; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.net.URISyntaxException; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -25,7 +27,6 @@ public class ResourceExtractor implements ResourceConstants, Tool { private final static Logger LOG = Logger.getLogger(ResourceExtractor.class.getName()); - Charset UTF8 = Charset.forName("utf-8"); public final static String TOOL_NAME = "extractor"; public static final String TOOL_DESCRIPTION = "A tool for extracting metadata from WARC, ARC, and WAT files"; @@ -64,7 +65,7 @@ public static void main(String[] args) throws Exception { private PrintWriter makePrintWriter(OutputStream os) { - return new PrintWriter(new OutputStreamWriter(os, Charset.forName("UTF-8"))); + return new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)); } public int run(String[] args) @@ -74,7 +75,7 @@ public int run(String[] args) if(args.length < 1) { return USAGE(1); } - if(args.length > 3) { + if(args.length > 4) { return USAGE(1); } int max = Integer.MAX_VALUE; @@ -89,7 +90,14 @@ public int run(String[] args) } } String path = args[arg]; - if(args.length == arg + 2) { + String outputFile = null; + if(args.length >= arg + 2) { + //if a output file is specified in the command line + if(args.length == arg + 3) { + outputFile = args[arg+2]; + os.close(); + os = new FileOutputStream(outputFile); + } if(args[arg].equals("-cdx")) { path = args[arg+1]; out = new RealCDXExtractorOutput(makePrintWriter(os)); @@ -100,7 +108,7 @@ public int run(String[] args) } else if(args[arg].equals("-wat")) { path = args[arg+1]; - out = new WATExtractorOutput(os); + out = new WATExtractorOutput(os, outputFile); } else { String filter = args[arg+1]; out = new JSONViewExtractorOutput(os, filter); @@ -130,18 +138,18 @@ public int run(String[] args) out.output(r); } catch(GZIPFormatException e) { - LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); if(ProducerUtils.STRICT_GZ) { throw e; } e.printStackTrace(); } catch(ResourceParseException e) { - LOG.severe(String.format("%s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); if(ProducerUtils.STRICT_GZ) { throw e; @@ -149,9 +157,9 @@ public int run(String[] args) e.printStackTrace(); } catch(RecoverableRecordFormatException e) { // this should not get here - ResourceFactory et al should wrap as ResourceParseExceptions... - LOG.severe(String.format("RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage())); + LOG.severe(String.format(Locale.ROOT, "RECOVERABLE - %s: %s",exProducer.getContext(),e.getMessage())); //Log is not coming out for some damn reason....needs to be studied - System.err.format("%s: %s",exProducer.getContext(),e.getMessage()); + System.err.format(Locale.ROOT, "%s: %s",exProducer.getContext(),e.getMessage()); e.printStackTrace(); diff --git a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java index 0d564a6f..b1050a14 100644 --- a/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java +++ b/src/main/java/org/archive/extract/WARCMetadataRecordExtractorOutput.java @@ -1,28 +1,22 @@ package org.archive.extract; import java.io.IOException; +import java.io.OutputStream; import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.net.URISyntaxException; -import java.net.URL; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.archive.format.gzip.GZIPFormatException; -import org.archive.format.json.JSONUtils; import org.archive.format.json.SimpleJSONPathSpec; import org.archive.resource.MetaData; import org.archive.resource.Resource; -import org.archive.util.IAUtils; import org.archive.util.StreamCopy; import org.json.JSONArray; -import org.json.JSONException; import org.json.JSONObject; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingOutputStream; -import com.google.common.io.NullOutputStream; public class WARCMetadataRecordExtractorOutput implements ExtractorOutput { private static final Logger LOG = @@ -47,7 +41,7 @@ public WARCMetadataRecordExtractorOutput(PrintWriter out) { } public void output(Resource resource) throws IOException { - NullOutputStream nullo = new NullOutputStream(); + OutputStream nullo = ByteStreams.nullOutputStream(); CountingOutputStream co = new CountingOutputStream(nullo); try { StreamCopy.copy(resource.getInputStream(), co); @@ -67,7 +61,7 @@ public void output(Resource resource) throws IOException { String date = "TBD"; String canUrl = "TBD"; - if(format.equals("WARC")) { + if(format.startsWith("WARC")) { origUrl = getWARCURL(m); date = getWARCDate(m); String type = getWARCType(m); @@ -86,7 +80,7 @@ public void output(Resource resource) throws IOException { String[] linkParts = outLinkValue.split(" "); if(linkParts.length > 2) //'outlinks': 'origUrl date origOutlinkUrl linktype linktext' - out.format("%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]); + out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t\n",origUrl,date,linkParts[0],linkParts[2]); } } else if(outputType.equals("hopinfo")) { String key = obj.get("Name").toString(); @@ -102,7 +96,7 @@ public void output(Resource resource) throws IOException { } if(outputType.equals("hopinfo")) { //'hopinfo': 'origCrawledUrl date origViaUrl hopPathFromVia sourceTag' - out.format("%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag); + out.format(Locale.ROOT,"%s\t%s\t%s\t%s\t%s\n",origUrl,date,viaUrl,viaPath,sourceTag); } } } diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index f4d27147..f695796f 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -1,13 +1,14 @@ package org.archive.extract; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.nio.charset.Charset; import java.text.ParseException; +import java.net.UnknownHostException; import java.util.Date; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberWriter; import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream; @@ -22,18 +23,29 @@ import org.archive.util.io.CommitedOutputStream; import org.json.JSONException; +import java.net.InetAddress; +import java.text.DateFormat; +import java.text.SimpleDateFormat; + +import java.util.logging.Logger; + +import static java.nio.charset.StandardCharsets.UTF_8; + public class WATExtractorOutput implements ExtractorOutput { WARCRecordWriter recW; private boolean wroteFirst; private GZIPMemberWriter gzW; private static int DEFAULT_BUFFER_RAM = 1024 * 1024; private int bufferRAM = DEFAULT_BUFFER_RAM; - private final static Charset UTF8 = Charset.forName("UTF-8"); + private String outputFile; - public WATExtractorOutput(OutputStream out) { + private static final Logger LOG = Logger.getLogger(WATExtractorOutput.class.getName()); + + public WATExtractorOutput(OutputStream out, String outputFile) { gzW = new GZIPMemberWriter(out); recW = new WARCRecordWriter(); wroteFirst = false; + this.outputFile = outputFile; } private CommitedOutputStream getOutput() { @@ -56,9 +68,9 @@ public void output(Resource resource) throws IOException { throw new IOException("Missing Envelope.Format"); } cos = getOutput(); - if(envelopeFormat.equals("ARC")) { + if(envelopeFormat.startsWith("ARC")) { writeARC(cos,top); - } else if(envelopeFormat.equals("WARC")) { + } else if(envelopeFormat.startsWith("WARC")) { writeWARC(cos,top); } else { // hrm... @@ -68,13 +80,51 @@ public void output(Resource resource) throws IOException { } private void writeWARCInfo(OutputStream recOut, MetaData md) throws IOException { - String filename = JSONUtils.extractSingle(md, "Container.Filename"); - if(filename == null) { - throw new IOException("No Container.Filename..."); + // filename is given in the command line + String filename = outputFile; + if (filename == null || filename.length() == 0) { + // if no filename by command line, we construct a default filename base on container filename + filename = JSONUtils.extractSingle(md, "Container.Filename"); + if (filename == null) { + throw new IOException("No Container.Filename..."); + } + if (filename.endsWith(".warc") || filename.endsWith(".warc.gz")) { + filename = filename.replaceFirst("\\.warc$", ".warc.wat.gz"); + filename = filename.replaceFirst("\\.warc\\.gz$", ".warc.wat.gz"); + } else if (filename.endsWith(".arc") || filename.endsWith(".arc.gz")) { + filename = filename.replaceFirst("\\.arc$", ".arc.wat.gz"); + filename = filename.replaceFirst("\\.arc\\.gz$", ".arc.wat.gz"); + } } + // removing path from filename + File tmpFile = new File(filename); + filename = tmpFile.getName(); HttpHeaders headers = new HttpHeaders(); - headers.add("Software-Info", IAUtils.COMMONS_VERSION); - headers.addDateHeader("Extracted-Date", new Date()); + headers.add("software", IAUtils.COMMONS_VERSION); + headers.addDateHeader("extractedDate", new Date()); + + // add ip, hostname + try { + InetAddress host = InetAddress.getLocalHost(); + headers.add("ip", host.getHostAddress()); + headers.add("hostname", host.getCanonicalHostName()); + } catch (UnknownHostException e) { + LOG.warning("unable to obtain local crawl engine host :\n"+e.getMessage()); + } + + headers.add("format", IAUtils.WARC_FORMAT); + headers.add("conformsTo", IAUtils.WARC_FORMAT_CONFORMS_TO); + // optional arguments + if(IAUtils.OPERATOR != null && IAUtils.OPERATOR.length() > 0) { + headers.add("operator", IAUtils.OPERATOR); + } + if(IAUtils.PUBLISHER != null && IAUtils.PUBLISHER.length() > 0) { + headers.add("publisher", IAUtils.PUBLISHER); + } + if(IAUtils.WAT_WARCINFO_DESCRIPTION != null && IAUtils.WAT_WARCINFO_DESCRIPTION.length() > 0) { + headers.add("description", IAUtils.WAT_WARCINFO_DESCRIPTION); + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); headers.write(baos); recW.writeWARCInfoRecord(recOut,filename,baos.toByteArray()); @@ -93,7 +143,7 @@ private void writeARC(OutputStream recOut, MetaData md) throws IOException { String capDateString = extractOrIO(md, "Envelope.ARC-Header-Metadata.Date"); String filename = extractOrIO(md, "Container.Filename"); String offset = extractOrIO(md, "Container.Offset"); - String recId = String.format("",filename,offset); + String recId = String.format(Locale.ROOT, "",filename,offset); writeWARCMDRecord(recOut,md,targetURI,capDateString,recId); } @@ -101,12 +151,16 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException { String warcType = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Type"); String targetURI; if(warcType.equals("warcinfo")) { - targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); + targetURI = JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); + } else if (warcType.equals("metadata")) { + // WARC-Target-URI is optional in metadata records + targetURI = JSONUtils.extractSingle(md, "Envelope.Metadata-Header-Metadata.WARC-Target-URI"); } else { targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI"); } - String capDateString = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Date"); - capDateString = transformWARCDate(capDateString); + // handle date of generation in WARC format + DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.ROOT); + String capDateString = dateFormat.format(new Date()); String recId = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Record-ID"); writeWARCMDRecord(recOut,md,targetURI,capDateString,recId); } @@ -117,7 +171,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md, ByteArrayOutputStream bos = new ByteArrayOutputStream(); - OutputStreamWriter osw = new OutputStreamWriter(bos, UTF8); + OutputStreamWriter osw = new OutputStreamWriter(bos, UTF_8); try { md.write(osw); } catch (JSONException e1) { @@ -125,7 +179,7 @@ private void writeWARCMDRecord(OutputStream recOut, MetaData md, throw new IOException(e1); } osw.flush(); -// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes("UTF-8")); +// ByteArrayInputStream bais = new ByteArrayInputStream(md.toString().getBytes(UTF_8)); Date capDate; try { capDate = DateUtils.getSecondsSinceEpoch(capDateString); diff --git a/src/main/java/org/archive/format/ArchiveFileConstants.java b/src/main/java/org/archive/format/ArchiveFileConstants.java index b0b8aa66..89e1308c 100644 --- a/src/main/java/org/archive/format/ArchiveFileConstants.java +++ b/src/main/java/org/archive/format/ArchiveFileConstants.java @@ -44,6 +44,11 @@ public interface ArchiveFileConstants { * Key for the Archive File version field. */ public static final String VERSION_FIELD_KEY = "version"; + + /** + * Key for the Archive File origin-code field. This value is often hard-coded, so use with care. + */ + public static final String ORIGIN_FIELD_KEY = "origin"; /** * Key for the Archive File length field. @@ -80,7 +85,7 @@ public interface ArchiveFileConstants { * Key for the Archive Record absolute offset into Archive file. */ public static final String ABSOLUTE_OFFSET_KEY = "absolute-offset"; - + public static final String READER_IDENTIFIER_FIELD_KEY = "reader-identifier"; diff --git a/src/main/java/org/archive/format/arc/ARCConstants.java b/src/main/java/org/archive/format/arc/ARCConstants.java index a336ddeb..39dbf7ed 100755 --- a/src/main/java/org/archive/format/arc/ARCConstants.java +++ b/src/main/java/org/archive/format/arc/ARCConstants.java @@ -1,6 +1,7 @@ package org.archive.format.arc; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; import java.util.zip.Deflater; @@ -16,7 +17,7 @@ */ public interface ARCConstants extends ArchiveFileConstants { public final static int MAX_META_LENGTH = 1024 * 32; - public final static Charset ARC_META_CHARSET = Charset.forName("utf-8"); + public final static Charset ARC_META_CHARSET = StandardCharsets.UTF_8; public final static int NEW_LINE_ORD = 10; public static final int CARRIAGE_RETURN_ORD = 13; public final static String DELIMITER = " "; @@ -196,7 +197,7 @@ public interface ARCConstants extends ArchiveFileConstants { .asList(new String[] { URL_FIELD_KEY, IP_HEADER_FIELD_KEY, DATE_FIELD_KEY, MIMETYPE_FIELD_KEY, LENGTH_FIELD_KEY, VERSION_FIELD_KEY, - ABSOLUTE_OFFSET_KEY }); + ORIGIN_FIELD_KEY, ABSOLUTE_OFFSET_KEY }); /** * Minimum possible record length. diff --git a/src/main/java/org/archive/format/arc/FiledescRecordParser.java b/src/main/java/org/archive/format/arc/FiledescRecordParser.java index c2d7bb65..6a34eb5d 100644 --- a/src/main/java/org/archive/format/arc/FiledescRecordParser.java +++ b/src/main/java/org/archive/format/arc/FiledescRecordParser.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; public class FiledescRecordParser { public boolean strict = false; @@ -12,7 +13,7 @@ public FiledescRecord parse(InputStream is) throws IOException { FiledescRecord rec = new FiledescRecord(); try { // TODO: count input bytes read... - BufferedReader br = new BufferedReader(new InputStreamReader(is,"UTF-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String line = br.readLine(); parseLine1(rec,line); line = br.readLine(); diff --git a/src/main/java/org/archive/format/cdx/CDXFile.java b/src/main/java/org/archive/format/cdx/CDXFile.java index 7dca0464..612f7454 100644 --- a/src/main/java/org/archive/format/cdx/CDXFile.java +++ b/src/main/java/org/archive/format/cdx/CDXFile.java @@ -18,6 +18,8 @@ import org.archive.util.iterator.CloseableIterator; import org.archive.util.zip.OpenJDK7GZIPInputStream; +import static java.nio.charset.StandardCharsets.UTF_8; + public class CDXFile extends SortedTextFile implements CDXInputSource { public CDXFile(String uri) throws IOException { @@ -94,7 +96,7 @@ public static BufferedReader createStreamingLineReader(String uri, boolean gzipp input = new OpenJDK7GZIPInputStream(swis); } - BufferedReader reader = new BufferedReader(new InputStreamReader(input)); + BufferedReader reader = new BufferedReader(new InputStreamReader(input, UTF_8)); return reader; } diff --git a/src/main/java/org/archive/format/cdx/FieldSplitLine.java b/src/main/java/org/archive/format/cdx/FieldSplitLine.java index 7e965b2f..2da61808 100644 --- a/src/main/java/org/archive/format/cdx/FieldSplitLine.java +++ b/src/main/java/org/archive/format/cdx/FieldSplitLine.java @@ -3,7 +3,7 @@ import java.util.ArrayList; import java.util.List; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Base class for text lines that are split by a delimiter Some examples will be diff --git a/src/main/java/org/archive/format/dns/DNSResponseParser.java b/src/main/java/org/archive/format/dns/DNSResponseParser.java index b5f81633..3e868ccf 100644 --- a/src/main/java/org/archive/format/dns/DNSResponseParser.java +++ b/src/main/java/org/archive/format/dns/DNSResponseParser.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; public class DNSResponseParser { @@ -28,7 +29,7 @@ public void parse(InputStream is, DNSResponse response) throws IOException, DNSP try { // TODO: should we wrap in a CountingInputStream and indicate // observed octet-length? - BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); + BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String date = br.readLine().trim(); if(isDate(date)) { response.setDate(date); diff --git a/src/main/java/org/archive/format/gzip/GZIPDecoder.java b/src/main/java/org/archive/format/gzip/GZIPDecoder.java index ef2998a5..9a8f7451 100644 --- a/src/main/java/org/archive/format/gzip/GZIPDecoder.java +++ b/src/main/java/org/archive/format/gzip/GZIPDecoder.java @@ -75,7 +75,7 @@ public long alignOnMagic3(InputStream is) throws IOException { // !?? // nope. are the next 2 possibilities? if((lookahead[1] == GZIP_MAGIC_ONE) && - (lookahead[2] == GZIP_MAGIC_TWO)) { + ((lookahead[2] & 0xff) == GZIP_MAGIC_TWO)) { // !12 keep = 2; } else if(lookahead[2] == GZIP_MAGIC_ONE) { diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java index a4ed6260..0a9a82e0 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecord.java @@ -98,12 +98,17 @@ public void writeTo(OutputStream os) throws IOException { os.write(value); } } - public int read(InputStream is) throws IOException { + public int read(InputStream is, int maxRead) throws IOException { byte tmpName[] = null; byte tmpVal[] = null; int valLen = 0; tmpName = ByteOp.readNBytes(is, GZIP_FEXTRA_NAME_BYTES); valLen = ByteOp.readShort(is); + if (valLen > (maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES)) { + /* read in what's left, but throw an exception */ + tmpVal = ByteOp.readNBytes(is, maxRead - BYTES_IN_SHORT - GZIP_FEXTRA_NAME_BYTES); + throw new GZIPFormatException.GZIPExtraFieldShortException(maxRead); + } if(valLen > 0) { tmpVal = ByteOp.readNBytes(is, valLen); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java index 7dc0de44..e5920552 100755 --- a/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java +++ b/src/main/java/org/archive/format/gzip/GZIPFExtraRecords.java @@ -53,12 +53,17 @@ public void readRecords(InputStream is) ArrayList tmpList = new ArrayList(); while(bytesRemaining > 0) { GZIPFExtraRecord tmpRecord = new GZIPFExtraRecord(); - int bytesRead = tmpRecord.read(is); - bytesRemaining -= bytesRead; + try { + int bytesRead = tmpRecord.read(is, bytesRemaining); + bytesRemaining -= bytesRead; + tmpList.add(tmpRecord); + } catch (GZIPFormatException.GZIPExtraFieldShortException ex) { + /* not enough bytes for the extra field; move on */ + bytesRemaining -= ex.bytesRead; + } if(bytesRemaining < 0) { throw new GZIPFormatException("Invalid FExtra length/records"); } - tmpList.add(tmpRecord); } this.addAll(tmpList); } diff --git a/src/main/java/org/archive/format/gzip/GZIPFormatException.java b/src/main/java/org/archive/format/gzip/GZIPFormatException.java index ca627a88..3916dafa 100644 --- a/src/main/java/org/archive/format/gzip/GZIPFormatException.java +++ b/src/main/java/org/archive/format/gzip/GZIPFormatException.java @@ -21,4 +21,11 @@ public GZIPFormatException(Exception e) { public GZIPFormatException(String message, IOException e) { super(message,e); } + public static class GZIPExtraFieldShortException extends GZIPFormatException { + int bytesRead; + public GZIPExtraFieldShortException(int bytesRead) { + super("Extra Field short."); + this.bytesRead = bytesRead; + } + } } diff --git a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java index d70bf394..154cf5f1 100644 --- a/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java +++ b/src/main/java/org/archive/format/gzip/GZIPMemberSeries.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.Inflater; @@ -227,7 +228,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Got EOF after %d bytes before finding magic in %s\n", amtSkipped * -1, streamContext)); } @@ -237,7 +238,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException if(amtSkipped > 0) { if(strict) { if(state == STATE_START) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Strict mode Skipped %d bytes in (%s) before finding magic at offset(%d)\n", amtSkipped, streamContext, offset-3)); } else { @@ -248,7 +249,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format( + LOG.info(String.format(Locale.ROOT, "Skipped %d bytes in (%s) before finding magic at offset(%d)\n", amtSkipped, streamContext, offset-3)); } @@ -268,7 +269,7 @@ public GZIPSeriesMember getNextMember() throws GZIPFormatException, IOException } offset = currentMemberStartOffset + 3; stream.setOffset(currentMemberStartOffset + 3); - LOG.warning(String.format( + LOG.warning(String.format(Locale.ROOT, "GZIPFormatException with record around offset(%d) in (%s)\n", offset, streamContext)); } diff --git a/src/main/java/org/archive/format/gzip/GZIPStaticHeader.java b/src/main/java/org/archive/format/gzip/GZIPStaticHeader.java index ca0e8990..21406657 100755 --- a/src/main/java/org/archive/format/gzip/GZIPStaticHeader.java +++ b/src/main/java/org/archive/format/gzip/GZIPStaticHeader.java @@ -13,7 +13,7 @@ * @author Brad * * +---+---+---+---+---+---+---+---+---+---+ - * |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) + * |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->) * +---+---+---+---+---+---+---+---+---+---+ */ public class GZIPStaticHeader implements GZIPConstants { diff --git a/src/main/java/org/archive/format/gzip/zipnum/MultiBlockIterator.java b/src/main/java/org/archive/format/gzip/zipnum/MultiBlockIterator.java index 05abbe1c..df2abd4d 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/MultiBlockIterator.java +++ b/src/main/java/org/archive/format/gzip/zipnum/MultiBlockIterator.java @@ -18,7 +18,7 @@ public class MultiBlockIterator extends AbstractPeekableIterator { private CloseableIterator> blockItr = null; /** - * @param blocks which should be fetched and unzipped, one after another + * @param blockItr blocks which should be fetched and unzipped, one after another */ public MultiBlockIterator(CloseableIterator> blockItr) { this.blockItr = blockItr; diff --git a/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java b/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java index 4afb58c4..8c4616a3 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java +++ b/src/main/java/org/archive/format/gzip/zipnum/TimestampBestPickDedupIterator.java @@ -1,6 +1,6 @@ package org.archive.format.gzip.zipnum; -import org.apache.commons.lang.math.NumberUtils; +import org.apache.commons.lang3.math.NumberUtils; import org.archive.util.iterator.CloseableIterator; public class TimestampBestPickDedupIterator extends TimestampDedupIterator { diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java index 2247eda4..c28ee536 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumBlockLoader.java @@ -31,7 +31,7 @@ public class ZipNumBlockLoader { protected int signDurationSecs = DEFAULT_SIG_DURATION_SECS; protected boolean useNio = false; - protected String httpLib = HttpLibs.APACHE_31.name(); + protected String httpLib = HttpLibs.APACHE_43.name(); protected boolean bufferFully = true; protected boolean noKeepAlive = true; diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java index bc773a58..0a3fa1bf 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumCluster.java @@ -11,8 +11,9 @@ */ import java.io.BufferedReader; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -21,6 +22,7 @@ import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Level; @@ -34,6 +36,8 @@ import org.archive.util.binsearch.impl.HTTPSeekableLineReader; import org.archive.util.iterator.CloseableIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + public class ZipNumCluster extends ZipNumIndex { final static Logger LOGGER = Logger.getLogger(ZipNumCluster.class.getName()); @@ -102,7 +106,7 @@ public void run() { public final static String LATEST_TIMESTAMP = "_LATEST"; public final static String OFF = "OFF"; - protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH); protected Date startDate, endDate; class BlockSize @@ -366,7 +370,7 @@ protected void loadLastBlockSizes(String filename) totalAdjustment = 0; try { - reader = new BufferedReader(new FileReader(filename)); + reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), UTF_8)); while ((line = reader.readLine()) != null) { String[] splits = line.split("\t"); diff --git a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java index a104244a..c0e4e01d 100644 --- a/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java +++ b/src/main/java/org/archive/format/gzip/zipnum/ZipNumWriter.java @@ -3,18 +3,18 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; import org.archive.format.gzip.GZIPMemberWriter; import org.archive.format.gzip.GZIPMemberWriterCommittedOutputStream; +import static java.nio.charset.StandardCharsets.UTF_8; + public class ZipNumWriter extends GZIPMemberWriterCommittedOutputStream { int limit; int count; OutputStream manifestOut; ByteArrayOutputStream manifestBuffer; char delimiter = '\t'; - private static final Charset UTF8 = Charset.forName("utf-8"); public ZipNumWriter(OutputStream main, OutputStream manifest, int limit) { super(new GZIPMemberWriter(main)); manifestOut = manifest; @@ -51,7 +51,7 @@ private void finishCurrent() throws IOException { sb.append(delimiter); sb.append(len); sb.append(delimiter); - manifestOut.write(sb.toString().getBytes(UTF8)); + manifestOut.write(sb.toString().getBytes(UTF_8)); manifestBuffer.writeTo(manifestOut); manifestOut.flush(); count = 0; diff --git a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java index ed5dfcb2..f1ac16c6 100755 --- a/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java +++ b/src/main/java/org/archive/format/http/DumpingHTTPParseObserver.java @@ -1,10 +1,10 @@ package org.archive.format.http; import java.io.PrintStream; -import java.nio.charset.Charset; +import java.util.Locale; + public class DumpingHTTPParseObserver implements HttpHeaderObserver { - private static final Charset UTF8 = Charset.forName("UTF-8"); private PrintStream ps = null; public DumpingHTTPParseObserver() { ps = System.out; @@ -15,13 +15,13 @@ public DumpingHTTPParseObserver(PrintStream ps) { public void headerParsed(byte[] name, int ns, int nl, byte[] value, int vs, int vl) { - ps.format("headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n", + ps.format(Locale.ROOT,"headerParsed:(%d:%d)(%s)(%d:%d)(%s)\n", ns,nl,new String(name,0,nl,UTF8), vs,vl,new String(value,0,vl,UTF8)); } public void headersComplete(int bytesRead) { - ps.format("headersComplete(%d)\n",bytesRead); + ps.format(Locale.ROOT,"headersComplete(%d)\n",bytesRead); } public void headersCorrupt() { ps.println("headersCorrupted\n"); diff --git a/src/main/java/org/archive/format/http/HttpConstants.java b/src/main/java/org/archive/format/http/HttpConstants.java index fa0a7e10..8ae4d4db 100755 --- a/src/main/java/org/archive/format/http/HttpConstants.java +++ b/src/main/java/org/archive/format/http/HttpConstants.java @@ -1,9 +1,10 @@ package org.archive.format.http; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; public interface HttpConstants { - public static final Charset UTF8 = Charset.forName("UTF-8"); + public static final Charset UTF8 = StandardCharsets.UTF_8; public static final byte CR = 13; public static final byte LF = 10; public static final byte SP = 32; diff --git a/src/main/java/org/archive/format/http/HttpHeader.java b/src/main/java/org/archive/format/http/HttpHeader.java index 57b70e1f..9ebe860f 100755 --- a/src/main/java/org/archive/format/http/HttpHeader.java +++ b/src/main/java/org/archive/format/http/HttpHeader.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.Locale; public class HttpHeader implements HttpConstants { private String name = null; @@ -27,7 +28,7 @@ public void write(OutputStream out) throws IOException { public String toString() { StringBuilder sb = new StringBuilder(name.length() + value.length()+20); - sb.append(String.format("HttpHeader(%s)(%s)",name,value)); + sb.append(String.format(Locale.ROOT, "HttpHeader(%s)(%s)",name,value)); return sb.toString(); } } diff --git a/src/main/java/org/archive/format/http/HttpHeaderParser.java b/src/main/java/org/archive/format/http/HttpHeaderParser.java index fdec62f2..ddbb6e47 100755 --- a/src/main/java/org/archive/format/http/HttpHeaderParser.java +++ b/src/main/java/org/archive/format/http/HttpHeaderParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class HttpHeaderParser implements HttpConstants { private static final int DEFAULT_MAX_NAME_LENGTH = 1024 * 100; @@ -231,6 +232,7 @@ public ParseState handleByte(byte b, HttpHeaderParser parser) if(b == LF) { // TODO: this is lax, is LFLF an OK terminator? // that's all folks! + parser.headerFinished(); parser.parseFinished(); return parser.endState; } @@ -287,7 +289,8 @@ public ParseState handleByte(byte b, HttpHeaderParser parser) return parser.postColonState; } if(parser.isStrict) { - throw new HttpParseException("Illegal char after name("+new String(name,0,nameLength)+")"); + throw new HttpParseException("Illegal char after name(" + + new String(name, 0, nameLength, StandardCharsets.ISO_8859_1) + ")"); } parser.headersCorrupted(); return parser.laxLineEatParseState; @@ -300,8 +303,9 @@ public ParseState handleByte(byte b, HttpHeaderParser parser) throws HttpParseEx if(isLWSP(b)) { return parser.postColonState; } + // reset previous value also in case the header value is empty + parser.setValueStartIdx(); if(b == CR) { - // TODO: THINK more... parser.valuePreCRState = parser.postColonState; return parser.valuePostCRState; } @@ -309,7 +313,6 @@ public ParseState handleByte(byte b, HttpHeaderParser parser) throws HttpParseEx // TODO: this is lax, is LFLF an OK terminator? return parser.lineStartState; } - parser.setValueStartIdx(); parser.addValueByte(b); return parser.valueState; } diff --git a/src/main/java/org/archive/format/http/HttpHeaders.java b/src/main/java/org/archive/format/http/HttpHeaders.java index ed8061d7..a65dd8fb 100755 --- a/src/main/java/org/archive/format/http/HttpHeaders.java +++ b/src/main/java/org/archive/format/http/HttpHeaders.java @@ -4,6 +4,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Date; +import java.util.Locale; import java.util.logging.Logger; import org.archive.util.ByteOp; @@ -54,9 +55,9 @@ public String getValue(String name) { } public String getValueCaseInsensitive(String name) { - String lc = name.toLowerCase(); + String lc = name.toLowerCase(Locale.ROOT); for(HttpHeader h : this) { - if(h.getName().toLowerCase().equals(lc)) { + if(h.getName().toLowerCase(Locale.ROOT).equals(lc)) { return h.getValue(); } } diff --git a/src/main/java/org/archive/format/http/HttpMessageParser.java b/src/main/java/org/archive/format/http/HttpMessageParser.java index c4fcdf92..24e59e03 100644 --- a/src/main/java/org/archive/format/http/HttpMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpMessageParser.java @@ -1,5 +1,6 @@ package org.archive.format.http; +import java.util.Locale; public class HttpMessageParser implements HttpConstants { @@ -22,11 +23,11 @@ protected int parseVersionLax(byte buf[], int start, int len) throws HttpParseException { String v = new String(buf,start,len,UTF8); - if(v.toLowerCase().compareTo(VERSION_0_STATUS.toLowerCase()) == 0) { + if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_0_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_0; - } else if(v.toLowerCase().compareTo(VERSION_1_STATUS.toLowerCase()) == 0) { + } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_1_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_1; - } else if(v.toLowerCase().compareTo(VERSION_9_STATUS.toLowerCase()) == 0) { + } else if(v.toLowerCase(Locale.ROOT).compareTo(VERSION_9_STATUS.toLowerCase(Locale.ROOT)) == 0) { return VERSION_9; } return VERSION_0; diff --git a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java index f7bc43c7..759bbe5d 100644 --- a/src/main/java/org/archive/format/http/HttpRequestMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpRequestMessageParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Locale; public class HttpRequestMessageParser extends HttpMessageParser { public int maxBytes = 1024 * 1024; @@ -223,7 +224,7 @@ protected int parseMethodStrict(byte buf[], int start, int len) protected int parseMethodLax(byte buf[], int start, int len) throws HttpParseException { - String v = new String(buf,start,len,UTF8).toUpperCase(); + String v = new String(buf,start,len,UTF8).toUpperCase(Locale.ROOT); if(v.compareTo(METHOD_GET_STRING) == 0) { return METHOD_GET; } else if(v.compareTo(METHOD_HEAD_STRING) == 0) { diff --git a/src/main/java/org/archive/format/http/HttpResponseMessage.java b/src/main/java/org/archive/format/http/HttpResponseMessage.java index 0cb7b7e5..6d3f5c35 100755 --- a/src/main/java/org/archive/format/http/HttpResponseMessage.java +++ b/src/main/java/org/archive/format/http/HttpResponseMessage.java @@ -1,5 +1,7 @@ package org.archive.format.http; +import java.util.Locale; + public class HttpResponseMessage extends HttpMessage implements HttpResponseMessageObserver { private int status = 0; private String reason = null; @@ -20,10 +22,10 @@ public String getReason() { return reason; } public String toString() { - return String.format("%s %d %s%s", getVersionString(), status, reason, CRLF); + return String.format(Locale.ROOT, "%s %d %s%s", getVersionString(), status, reason, CRLF); } public String toDebugString() { - return String.format("Message(%d):(%s) (%d) (%s)\n", + return String.format(Locale.ROOT, "Message(%d):(%s) (%d) (%s)\n", reason.length(),getVersionString(),status,reason,CRLF); } diff --git a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java index 3aee7c48..4ddef2ad 100755 --- a/src/main/java/org/archive/format/http/HttpResponseMessageParser.java +++ b/src/main/java/org/archive/format/http/HttpResponseMessageParser.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; public class HttpResponseMessageParser extends HttpMessageParser { public int maxBytes = 1024 * 128; @@ -97,7 +98,7 @@ public int parseStrict(byte buf[], int len, HttpResponseMessageObserver obs) version = parseVersionStrict(buf, vs, vl); status = parseStatusStrict(buf,ss,sl); - reason = new String(buf,idx+1,(len - idx)-1); + reason = new String(buf,idx+1,(len - idx)-1,StandardCharsets.ISO_8859_1); obs.messageParsed(version, status, reason, len); @@ -155,7 +156,7 @@ private int parseLax(byte buf[], int len, HttpResponseMessageObserver obs) idx++; int reasonLen = bufferEnd - idx; if(reasonLen > 0) { - reason = new String(buf,idx,reasonLen); + reason = new String(buf,idx,reasonLen,StandardCharsets.ISO_8859_1); } } else { // missed some: diff --git a/src/main/java/org/archive/format/json/CrossProductOfLists.java b/src/main/java/org/archive/format/json/CrossProductOfLists.java index 7be11fda..69cdae33 100644 --- a/src/main/java/org/archive/format/json/CrossProductOfLists.java +++ b/src/main/java/org/archive/format/json/CrossProductOfLists.java @@ -4,11 +4,12 @@ import java.util.ArrayList; import java.util.Deque; import java.util.List; +import java.util.Locale; import java.util.Stack; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; public class CrossProductOfLists { private static final Logger LOG = @@ -18,12 +19,12 @@ public List> crossProduct(List>> listOfLists) { if(LOG.isLoggable(Level.INFO)) { int count = listOfLists.size(); - LOG.info(String.format("Total of (%d) lists to cross product",count)); + LOG.info(String.format(Locale.ROOT, "Total of (%d) lists to cross product",count)); for(int i = 0; i < count; i++) { - LOG.info(String.format("Field (%d) is (%d) deep",i,listOfLists.get(i).size())); + LOG.info(String.format(Locale.ROOT, "Field (%d) is (%d) deep",i,listOfLists.get(i).size())); for(List inner : listOfLists.get(i)) { LOG.info( - String.format("----(%d):(%s)" + String.format(Locale.ROOT, "----(%d):(%s)" ,i,StringUtils.join(inner.toArray(),",") ) ); } } diff --git a/src/main/java/org/archive/format/json/JSONView.java b/src/main/java/org/archive/format/json/JSONView.java index b73c0666..444ea7e6 100644 --- a/src/main/java/org/archive/format/json/JSONView.java +++ b/src/main/java/org/archive/format/json/JSONView.java @@ -2,10 +2,11 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.json.JSONObject; /** @@ -28,7 +29,7 @@ public class JSONView { public JSONView(String... pathSpecs) { this.pathSpecs = new ArrayList(pathSpecs.length); if(LOG.isLoggable(Level.INFO)) { - LOG.info(String.format("Creating JSONView with(%s)", + LOG.info(String.format(Locale.ROOT, "Creating JSONView with(%s)", StringUtils.join(pathSpecs,","))); } for(String pathSpec : pathSpecs) { diff --git a/src/main/java/org/archive/format/text/charset/CharsetDetector.java b/src/main/java/org/archive/format/text/charset/CharsetDetector.java index d391aac3..08aac469 100644 --- a/src/main/java/org/archive/format/text/charset/CharsetDetector.java +++ b/src/main/java/org/archive/format/text/charset/CharsetDetector.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -60,7 +62,8 @@ public abstract class CharsetDetector { private final static String META_CONTENT_ATTR_PATTERN_STRING = "\\b" + META_CONTENT_ATTRIBUTE + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; private final static String META_HTTP_EQUIV_ATTR_PATTERN_STRING = "\\b" + - META_HTTP_EQUIV_ATTRIBUTE + "\\s*=\\s*(" + ANY_ATTR_VALUE + ")(?:\\s|>)?"; + META_HTTP_EQUIV_ATTRIBUTE + "\\s*=\\s*(" + META_CONTENT_TYPE + "|" + + ANY_ATTR_VALUE + ")(?:\\s|>)?"; @@ -90,7 +93,7 @@ public abstract class CharsetDetector { // ...and if the chardet library fails, use the Content-Type header protected final static String HTTP_CONTENT_TYPE_HEADER = "CONTENT-TYPE"; /** the default charset name to use when giving up */ - public final static String DEFAULT_CHARSET = "UTF-8"; + public final static String DEFAULT_CHARSET = StandardCharsets.UTF_8.name(); protected boolean isCharsetSupported(String charsetName) { // can you believe that this throws a runtime? Just asking if it's @@ -105,7 +108,7 @@ protected boolean isCharsetSupported(String charsetName) { } } protected String mapCharset(String orig) { - String lc = orig.toLowerCase(); + String lc = orig.toLowerCase(Locale.ROOT); if(lc.contains("iso8859-1") || lc.contains("iso-8859-1")) { return "cp1252"; } @@ -113,7 +116,7 @@ protected String mapCharset(String orig) { } protected String contentTypeToCharset(final String contentType) { int offset = - contentType.toUpperCase().indexOf(CHARSET_TOKEN.toUpperCase()); + contentType.toUpperCase(Locale.ROOT).indexOf(CHARSET_TOKEN.toUpperCase(Locale.ROOT)); if (offset != -1) { String cs = contentType.substring(offset + CHARSET_TOKEN.length()); @@ -138,7 +141,6 @@ protected String contentTypeToCharset(final String contentType) { * Attempt to divine the character encoding of the document from the * Content-Type HTTP header (with a "charset=") * - * @param resource * @return String character set found or null if the header was not present * @throws IOException */ @@ -148,7 +150,7 @@ protected String getCharsetFromHeaders(HttpHeaders headers) return null; } for(HttpHeader header : headers) { - if(header.getName().toUpperCase().trim().equals( + if(header.getName().toUpperCase(Locale.ROOT).trim().equals( HTTP_CONTENT_TYPE_HEADER)) { return contentTypeToCharset(header.getValue()); } @@ -160,7 +162,6 @@ protected String getCharsetFromHeaders(HttpHeaders headers) * Attempt to find a META tag in the HTML that hints at the character set * used to write the document. * - * @param resource * @return String character set found from META tags in the HTML * @throws IOException */ @@ -178,6 +179,9 @@ protected String getCharsetFromMeta(byte buffer[],int len) throws IOException { } private static String trimAttrValue(String value) { + if (value.isEmpty()) { + return value; + } String result = value; if (result.charAt(0) == '"') { result = result.substring(1, result.length() - 1); @@ -220,14 +224,11 @@ public static String findMetaContentType(String pageSample) { * Attempts to figure out the character set of the document using * the excellent juniversalchardet library. * - * @param resource * @return String character encoding found, or null if nothing looked good. - * @throws IOException */ protected String getCharsetFromBytes(byte buffer[], int len) throws IOException { String charsetName = null; - UniversalDetector detector = new UniversalDetector(null); detector.handleData(buffer, 0, len); detector.dataEnd(); @@ -239,9 +240,6 @@ protected String getCharsetFromBytes(byte buffer[], int len) return null; } /** - * @param resource (presumably text) Resource to determine the charset - * @param request WaybackRequest which may contain additional hints to - * processing * @return String charset name for the Resource * @throws IOException if there are problems reading the Resource */ diff --git a/src/main/java/org/archive/format/text/html/NodeUtils.java b/src/main/java/org/archive/format/text/html/NodeUtils.java index 625d9099..f231b91a 100644 --- a/src/main/java/org/archive/format/text/html/NodeUtils.java +++ b/src/main/java/org/archive/format/text/html/NodeUtils.java @@ -19,6 +19,8 @@ */ package org.archive.format.text.html; +import java.util.Locale; + import org.htmlparser.Node; import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; @@ -41,7 +43,7 @@ public static boolean isTagNodeNamed(Node node, String name) { if(isTagNode(node)) { TagNode tagNode = (TagNode) node; String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } return false; } @@ -50,7 +52,7 @@ public static boolean isOpenTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(!tagNode.isEndTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; @@ -60,7 +62,7 @@ public static boolean isNonEmptyOpenTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(!tagNode.isEndTag() && !tagNode.isEmptyXmlTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; @@ -70,7 +72,7 @@ public static boolean isCloseTagNodeNamed(Node node, String name) { TagNode tagNode = (TagNode) node; if(tagNode.isEndTag()) { String nodeName = tagNode.getTagName(); - return nodeName.equals(name.toUpperCase()); + return nodeName.equals(name.toUpperCase(Locale.ROOT)); } } return false; diff --git a/src/main/java/org/archive/format/warc/WARCConstants.java b/src/main/java/org/archive/format/warc/WARCConstants.java index c9f6cbf3..a6bdb3f4 100644 --- a/src/main/java/org/archive/format/warc/WARCConstants.java +++ b/src/main/java/org/archive/format/warc/WARCConstants.java @@ -19,12 +19,14 @@ package org.archive.format.warc; +import java.nio.charset.StandardCharsets; + import org.archive.format.ArchiveFileConstants; /** * WARC Constants used by WARC readers and writers. * - * @contributor stack + * @author stack */ public interface WARCConstants extends ArchiveFileConstants { /** @@ -93,7 +95,7 @@ public interface WARCConstants extends ArchiveFileConstants { * till we figure it, DEFAULT_ENCODING is single-byte charset -- same as * ARCs. */ - public static final String DEFAULT_ENCODING = "UTF-8"; + public static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name(); public static final String HEADER_LINE_ENCODING = DEFAULT_ENCODING; // TODO: Revisit. 8859 isn't correct, especially if we settle on RFC822 @@ -175,16 +177,14 @@ enum WARCRecordType { /** * These fields help a consumer of the warc to locate the warc record that * {@value #HEADER_KEY_REFERS_TO} refers to. - * - * @see WARCWriterProcessor + *

+ * See WARCWriterProcessor */ public static final String HEADER_KEY_REFERS_TO_TARGET_URI = "WARC-Refers-To-Target-URI"; public static final String HEADER_KEY_REFERS_TO_DATE = "WARC-Refers-To-Date"; public static final String HEADER_KEY_REFERS_TO_FILENAME = "WARC-Refers-To-Filename"; public static final String HEADER_KEY_REFERS_TO_FILE_OFFSET = "WARC-Refers-To-File-Offset"; - public static final String PROFILE_REVISIT_URI_AGNOSTIC_IDENTICAL_DIGEST = - "http://netpreserve.org/warc/1.0/revisit/uri-agnostic-identical-payload-digest"; public static final String PROFILE_REVISIT_IDENTICAL_DIGEST = "http://netpreserve.org/warc/1.0/revisit/identical-payload-digest"; public static final String PROFILE_REVISIT_NOT_MODIFIED = @@ -211,7 +211,9 @@ enum WARCRecordType { "application/http; msgtype=request"; public static final String HTTP_RESPONSE_MIMETYPE = "application/http; msgtype=response"; - + public static final String HTTP_RESPONSE_MIMETYPE_NS = + "application/http;msgtype=response"; // wget does this + public static final String FTP_CONTROL_CONVERSATION_MIMETYPE = "text/x-ftp-control-conversation"; diff --git a/src/main/java/org/archive/format/warc/WARCRecordWriter.java b/src/main/java/org/archive/format/warc/WARCRecordWriter.java index 0aab83b7..3278b289 100644 --- a/src/main/java/org/archive/format/warc/WARCRecordWriter.java +++ b/src/main/java/org/archive/format/warc/WARCRecordWriter.java @@ -88,7 +88,10 @@ public void writeJSONMetadataRecord( OutputStream out, { HttpHeaders headers = new HttpHeaders(); headers.add(HEADER_KEY_TYPE, WARCRecordType.metadata.name()); - headers.add(HEADER_KEY_URI, targetURI); + if (targetURI != null) { + // WARC-Target-URI is optional in metadata records + headers.add(HEADER_KEY_URI, targetURI); + } headers.add(HEADER_KEY_DATE, DateUtils.getLog14Date(originalDate)); headers.add(HEADER_KEY_ID, makeRecordId()); headers.add(HEADER_KEY_REFERS_TO, origRecordId); diff --git a/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java b/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java index e92ed7e1..d31e31c9 100644 --- a/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java +++ b/src/main/java/org/archive/hadoop/ArchiveJSONViewLoader.java @@ -6,7 +6,7 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; diff --git a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java index 37c8af99..a3cbb26c 100644 --- a/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java +++ b/src/main/java/org/archive/hadoop/ArchiveMetadataLoader.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Locale; import java.util.logging.Logger; import org.apache.hadoop.mapreduce.InputFormat; @@ -54,7 +55,7 @@ public Tuple getNext() throws IOException { try { key = reader.getCurrentKey(); - LOG.info(String.format("Loaded key-offset %d\n", key.offset)); + LOG.info(String.format(Locale.ROOT, "Loaded key-offset %d\n", key.offset)); value = reader.getCurrentValue(); } catch (InterruptedException e) { // is this needed and the right way? diff --git a/src/main/java/org/archive/hadoop/FilenameInputFormat.java b/src/main/java/org/archive/hadoop/FilenameInputFormat.java index 5893afb1..3f41cdee 100644 --- a/src/main/java/org/archive/hadoop/FilenameInputFormat.java +++ b/src/main/java/org/archive/hadoop/FilenameInputFormat.java @@ -17,7 +17,6 @@ package org.archive.hadoop; import java.io.*; -import java.util.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java index 28ebca73..684202bb 100644 --- a/src/main/java/org/archive/hadoop/PerMapOutputFormat.java +++ b/src/main/java/org/archive/hadoop/PerMapOutputFormat.java @@ -17,7 +17,6 @@ package org.archive.hadoop; import java.io.*; -import java.util.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; diff --git a/src/main/java/org/archive/hadoop/ResourceRecordReader.java b/src/main/java/org/archive/hadoop/ResourceRecordReader.java index 06d3ce2e..88b93dd2 100644 --- a/src/main/java/org/archive/hadoop/ResourceRecordReader.java +++ b/src/main/java/org/archive/hadoop/ResourceRecordReader.java @@ -1,6 +1,7 @@ package org.archive.hadoop; import java.io.IOException; +import java.util.Locale; import java.util.logging.Logger; import org.apache.hadoop.fs.FSDataInputStream; @@ -111,7 +112,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException { if(r != null) { StreamCopy.readToEOF(r.getInputStream()); - LOG.info(String.format("Extracted offset %d\n", + LOG.info(String.format(Locale.ROOT, "Extracted offset %d\n", series.getCurrentMemberStartOffset())); cachedK = new ResourceContext(name, series.getCurrentMemberStartOffset()); @@ -121,7 +122,7 @@ public boolean nextKeyValue() throws IOException, InterruptedException { } catch (ResourceParseException e) { e.printStackTrace(); throw new IOException( - String.format("ResourceParseException at(%s)(%d)", + String.format(Locale.ROOT, "ResourceParseException at(%s)(%d)", name,series.getCurrentMemberStartOffset()), e); } diff --git a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java deleted file mode 100644 index 105c4f7e..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderGetMethod.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpState; -import org.apache.commons.httpclient.methods.GetMethod; -import org.archive.util.Recorder; - - -/** - * Override of GetMethod that marks the passed HttpRecorder w/ the transition - * from HTTP head to body and that forces a close on the http connection. - * - * The actions done in this subclass used to be done by copying - * org.apache.commons.HttpMethodBase, overlaying our version in place of the - * one that came w/ httpclient. Here is the patch of the difference between - * shipped httpclient code and our mods: - *

- *    -- -1338,6 +1346,12 --
- *
- *        public void releaseConnection() {
- *
- *   +        // HERITRIX always ants the streams closed.
- *   +        if (responseConnection != null)
- *   +        {
- *   +            responseConnection.close();
- *   +        }
- *   +
- *            if (responseStream != null) {
- *                try {
- *                    // FYI - this may indirectly invoke responseBodyConsumed.
- *   -- -1959,6 +1973,11 --
- *                        this.statusLine = null;
- *                    }
- *                }
- *   +            // HERITRIX mark transition from header to content.
- *   +            if (this.httpRecorder != null)
- *   +            {
- *   +                this.httpRecorder.markContentBegin();
- *   +            }
- *                readResponseBody(state, conn);
- *                processResponseBody(state, conn);
- *            } catch (IOException e) {
- * 
- * - *

We're not supposed to have access to the underlying connection object; - * am only violating contract because see cases where httpclient is skipping - * out w/o cleaning up after itself. - * - * @author stack - * @version $Revision$, $Date$ - */ -public class HttpRecorderGetMethod extends GetMethod { - - protected static Logger logger = - Logger.getLogger(HttpRecorderGetMethod.class.getName()); - - /** - * Instance of http recorder method. - */ - protected HttpRecorderMethod httpRecorderMethod = null; - - - public HttpRecorderGetMethod(String uri, Recorder recorder) { - super(uri); - this.httpRecorderMethod = new HttpRecorderMethod(recorder); - } - - protected void readResponseBody(HttpState state, HttpConnection connection) - throws IOException, HttpException { - // We're about to read the body. Mark transition in http recorder. - this.httpRecorderMethod.markContentBegin(connection); - super.readResponseBody(state, connection); - } - - protected boolean shouldCloseConnection(HttpConnection conn) { - // Always close connection after each request. As best I can tell, this - // is superfluous -- we've set our client to be HTTP/1.0. Doing this - // out of paranoia. - return true; - } - - public int execute(HttpState state, HttpConnection conn) - throws HttpException, IOException { - // Save off the connection so we can close it on our way out in case - // httpclient fails to (We're not supposed to have access to the - // underlying connection object; am only violating contract because - // see cases where httpclient is skipping out w/o cleaning up - // after itself). - this.httpRecorderMethod.setConnection(conn); - return super.execute(state, conn); - } - - protected void addProxyConnectionHeader(HttpState state, HttpConnection conn) - throws IOException, HttpException { - super.addProxyConnectionHeader(state, conn); - this.httpRecorderMethod.handleAddProxyConnectionHeader(this); - } -} diff --git a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderMethod.java deleted file mode 100644 index 932e7e98..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderMethod.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.util.logging.Logger; - -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpMethod; -import org.archive.util.Recorder; - - -/** - * This class encapsulates the specializations supplied by the - * overrides {@link HttpRecorderGetMethod} and {@link HttpRecorderPostMethod}. - * - * It keeps instance of HttpRecorder and HttpConnection. - * - * @author stack - * @version $Revision$, $Date$ - */ -public class HttpRecorderMethod { - protected static Logger logger = - Logger.getLogger(HttpRecorderMethod.class.getName()); - - /** - * Instance of http recorder we're using recording this http get. - */ - private Recorder httpRecorder = null; - - /** - * Save around so can force close. - * - * See [ 922080 ] IllegalArgumentException (size is wrong). - * https://sourceforge.net/tracker/?func=detail&aid=922080&group_id=73833&atid=539099 - */ - private HttpConnection connection = null; - - - public HttpRecorderMethod(Recorder recorder) { - this.httpRecorder = recorder; - } - - public void markContentBegin(HttpConnection c) { - if (c != this.connection) { - // We're checking that we're not being asked to work on - // a connection that is other than the one we started - // this method#execute with. - throw new IllegalArgumentException("Connections differ: " + - this.connection + " " + c + " " + - Thread.currentThread().getName()); - } - this.httpRecorder.markContentBegin(); - } - - /** - * @return Returns the connection. - */ - public HttpConnection getConnection() { - return this.connection; - } - - /** - * @param connection The connection to set. - */ - public void setConnection(HttpConnection connection) { - this.connection = connection; - } - /** - * @return Returns the httpRecorder. - */ - public Recorder getHttpRecorder() { - return httpRecorder; - } - - /** - * If a 'Proxy-Connection' header has been added to the request, - * it'll be of a 'keep-alive' type. Until we support 'keep-alives', - * override the Proxy-Connection setting and instead pass a 'close' - * (Otherwise every request has to timeout before we notice - * end-of-document). - * @param method Method to find proxy-connection header in. - */ - public void handleAddProxyConnectionHeader(HttpMethod method) { - Header h = method.getRequestHeader("Proxy-Connection"); - if (h != null) { - h.setValue("close"); - method.setRequestHeader(h); - } - } -} diff --git a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java b/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java deleted file mode 100644 index 20f1bfd1..00000000 --- a/src/main/java/org/archive/httpclient/HttpRecorderPostMethod.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; - -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpState; -import org.apache.commons.httpclient.methods.PostMethod; -import org.archive.util.Recorder; - - -/** - * Override of PostMethod that marks the passed HttpRecorder w/ the transition - * from HTTP head to body and that forces a close on the responseConnection. - * - * This is a copy of {@link HttpRecorderGetMethod}. Only difference is the - * parent subclass. - * - * @author stack - * @version $Date$ $Revision$ - */ -public class HttpRecorderPostMethod extends PostMethod { - /** - * Instance of http recorder method. - */ - protected HttpRecorderMethod httpRecorderMethod = null; - - - public HttpRecorderPostMethod(String uri, Recorder recorder) { - super(uri); - this.httpRecorderMethod = new HttpRecorderMethod(recorder); - } - - protected void readResponseBody(HttpState state, HttpConnection connection) - throws IOException, HttpException { - // We're about to read the body. Mark transition in http recorder. - this.httpRecorderMethod.markContentBegin(connection); - super.readResponseBody(state, connection); - } - - protected boolean shouldCloseConnection(HttpConnection conn) { - // Always close connection after each request. As best I can tell, this - // is superfluous -- we've set our client to be HTTP/1.0. Doing this - // out of paranoia. - return true; - } - - public int execute(HttpState state, HttpConnection conn) - throws HttpException, IOException { - // Save off the connection so we can close it on our way out in case - // httpclient fails to (We're not supposed to have access to the - // underlying connection object; am only violating contract because - // see cases where httpclient is skipping out w/o cleaning up - // after itself). - this.httpRecorderMethod.setConnection(conn); - return super.execute(state, conn); - } - - protected void addProxyConnectionHeader(HttpState state, HttpConnection conn) - throws IOException, HttpException { - super.addProxyConnectionHeader(state, conn); - this.httpRecorderMethod.handleAddProxyConnectionHeader(this); - } -} diff --git a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java b/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java deleted file mode 100644 index 4ba6a837..00000000 --- a/src/main/java/org/archive/httpclient/SingleHttpConnectionManager.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.SimpleHttpConnectionManager; - -/** - * An HttpClient-compatible HttpConnection "manager" that actually - * just gives out a new connection each time -- skipping the overhead - * of connection management, since we already throttle our crawler - * with external mechanisms. - * - * @author gojomo - */ -public class SingleHttpConnectionManager extends SimpleHttpConnectionManager { - - public SingleHttpConnectionManager() { - super(); - } - - public HttpConnection getConnectionWithTimeout( - HostConfiguration hostConfiguration, long timeout) { - - HttpConnection conn = new HttpConnection(hostConfiguration); - conn.setHttpConnectionManager(this); - conn.getParams().setDefaults(this.getParams()); - return conn; - } - - public void releaseConnection(HttpConnection conn) { - // ensure connection is closed - conn.close(); - finishLast(conn); - } - - protected static void finishLast(HttpConnection conn) { - // copied from superclass because it wasn't made available to subclasses - InputStream lastResponse = conn.getLastResponseInputStream(); - if (lastResponse != null) { - conn.setLastResponseInputStream(null); - try { - lastResponse.close(); - } catch (IOException ioe) { - //FIXME: badness - close to force reconnect. - conn.close(); - } - } - } -} diff --git a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java b/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java deleted file mode 100644 index 91e850ea..00000000 --- a/src/main/java/org/archive/httpclient/ThreadLocalHttpConnectionManager.java +++ /dev/null @@ -1,291 +0,0 @@ -/** - * ==================================================================== - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ==================================================================== - * - */ -package org.archive.httpclient; - -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpConnection; -import org.apache.commons.httpclient.HttpConnectionManager; -import org.apache.commons.httpclient.params.HttpConnectionManagerParams; - -/** - * A simple, but thread-safe HttpClient {@link HttpConnectionManager}. - * Based on {@link org.apache.commons.httpclient.SimpleHttpConnectionManager}. - * - * Java >= 1.4 is recommended. - * - * @author Christian Kohlschuetter - */ -public final class ThreadLocalHttpConnectionManager implements - HttpConnectionManager { - - private static final CloserThread closer = new CloserThread(); - private static final Logger logger = Logger - .getLogger(ThreadLocalHttpConnectionManager.class.getName()); - - private final ThreadLocal tl = new ThreadLocal() { - protected synchronized ConnectionInfo initialValue() { - return new ConnectionInfo(); - } - }; - - private ConnectionInfo getConnectionInfo() { - return (ConnectionInfo) tl.get(); - } - - private static final class ConnectionInfo { - /** The http connection */ - private HttpConnection conn = null; - - /** - * The time the connection was made idle. - */ - private long idleStartTime = Long.MAX_VALUE; - } - - public ThreadLocalHttpConnectionManager() { - } - - /** - * Since the same connection is about to be reused, make sure the - * previous request was completely processed, and if not - * consume it now. - * @param conn The connection - * @return true, if the connection is reusable - */ - private static boolean finishLastResponse(final HttpConnection conn) { - InputStream lastResponse = conn.getLastResponseInputStream(); - if(lastResponse != null) { - conn.setLastResponseInputStream(null); - try { - lastResponse.close(); - return true; - } catch (IOException ioe) { - // force reconnect. - return false; - } - } else { - return false; - } - } - - /** - * Collection of parameters associated with this connection manager. - */ - private HttpConnectionManagerParams params = new HttpConnectionManagerParams(); - - /** - * @see HttpConnectionManager#getConnection(HostConfiguration) - */ - public HttpConnection getConnection( - final HostConfiguration hostConfiguration) { - return getConnection(hostConfiguration, 0); - } - - /** - * Gets the staleCheckingEnabled value to be set on HttpConnections that are created. - * - * @return true if stale checking will be enabled on HttpConections - * - * @see HttpConnection#isStaleCheckingEnabled() - * - * @deprecated Use {@link HttpConnectionManagerParams#isStaleCheckingEnabled()}, - * {@link HttpConnectionManager#getParams()}. - */ - public boolean isConnectionStaleCheckingEnabled() { - return this.params.isStaleCheckingEnabled(); - } - - /** - * Sets the staleCheckingEnabled value to be set on HttpConnections that are created. - * - * @param connectionStaleCheckingEnabled true if stale checking will be enabled - * on HttpConections - * - * @see HttpConnection#setStaleCheckingEnabled(boolean) - * - * @deprecated Use {@link HttpConnectionManagerParams#setStaleCheckingEnabled(boolean)}, - * {@link HttpConnectionManager#getParams()}. - */ - public void setConnectionStaleCheckingEnabled( - final boolean connectionStaleCheckingEnabled) { - this.params.setStaleCheckingEnabled(connectionStaleCheckingEnabled); - } - - /** - * @see HttpConnectionManager#getConnectionWithTimeout(HostConfiguration, long) - * - * @since 3.0 - */ - public HttpConnection getConnectionWithTimeout( - final HostConfiguration hostConfiguration, final long timeout) { - - final ConnectionInfo ci = getConnectionInfo(); - HttpConnection httpConnection = ci.conn; - - // make sure the host and proxy are correct for this connection - // close it and set the values if they are not - if(httpConnection == null || !finishLastResponse(httpConnection) - || !hostConfiguration.hostEquals(httpConnection) - || !hostConfiguration.proxyEquals(httpConnection)) { - - if(httpConnection != null && httpConnection.isOpen()) { - closer.closeConnection(httpConnection); - } - - httpConnection = new HttpConnection(hostConfiguration); - httpConnection.setHttpConnectionManager(this); - httpConnection.getParams().setDefaults(this.params); - ci.conn = httpConnection; - - httpConnection.setHost(hostConfiguration.getHost()); - httpConnection.setPort(hostConfiguration.getPort()); - httpConnection.setProtocol(hostConfiguration.getProtocol()); - httpConnection.setLocalAddress(hostConfiguration.getLocalAddress()); - - httpConnection.setProxyHost(hostConfiguration.getProxyHost()); - httpConnection.setProxyPort(hostConfiguration.getProxyPort()); - } - - // remove the connection from the timeout handler - ci.idleStartTime = Long.MAX_VALUE; - - return httpConnection; - } - - /** - * @see HttpConnectionManager#getConnection(HostConfiguration, long) - * - * @deprecated Use #getConnectionWithTimeout(HostConfiguration, long) - */ - public HttpConnection getConnection( - final HostConfiguration hostConfiguration, final long timeout) { - return getConnectionWithTimeout(hostConfiguration, timeout); - } - - /** - * @see HttpConnectionManager#releaseConnection(org.apache.commons.httpclient.HttpConnection) - */ - public void releaseConnection(final HttpConnection conn) { - final ConnectionInfo ci = getConnectionInfo(); - HttpConnection httpConnection = ci.conn; - - if(conn != httpConnection) { - throw new IllegalStateException( - "Unexpected release of an unknown connection."); - } - - finishLastResponse(httpConnection); - - // track the time the connection was made idle - ci.idleStartTime = System.currentTimeMillis(); - } - - /** - * Returns {@link HttpConnectionManagerParams parameters} associated - * with this connection manager. - * - * @since 2.1 - * - * @see HttpConnectionManagerParams - */ - public HttpConnectionManagerParams getParams() { - return this.params; - } - - /** - * Assigns {@link HttpConnectionManagerParams parameters} for this - * connection manager. - * - * @since 2.1 - * - * @see HttpConnectionManagerParams - */ - public void setParams(final HttpConnectionManagerParams p) { - if(p == null) { - throw new IllegalArgumentException("Parameters may not be null"); - } - this.params = p; - } - - /** - * @since 3.0 - */ - public void closeIdleConnections(final long idleTimeout) { - long maxIdleTime = System.currentTimeMillis() - idleTimeout; - - final ConnectionInfo ci = getConnectionInfo(); - - if(ci.idleStartTime <= maxIdleTime) { - ci.conn.close(); - } - } - - private static final class CloserThread extends Thread { - private List connections - = new ArrayList(); - - private static final int SLEEP_INTERVAL = 5000; - - public CloserThread() { - super("HttpConnection closer"); - // Make this a daemon thread so it can't be responsible for the JVM - // not shutting down. - setDaemon(true); - start(); - } - - public void closeConnection(final HttpConnection conn) { - synchronized (connections) { - connections.add(conn); - } - } - - public void run() { - try { - while (!Thread.interrupted()) { - Thread.sleep(SLEEP_INTERVAL); - - List s; - synchronized (connections) { - s = connections; - connections = new ArrayList(); - } - logger.log(Level.INFO, "Closing " + s.size() - + " HttpConnections"); - for(final Iterator it = s.iterator(); - it.hasNext();) { - HttpConnection conn = it.next(); - conn.close(); - conn.setHttpConnectionManager(null); - it.remove(); - } - } - } catch (InterruptedException e) { - return; - } - } - } -} diff --git a/src/main/java/org/archive/httpclient/package.html b/src/main/java/org/archive/httpclient/package.html index 87ae77ed..adca4891 100644 --- a/src/main/java/org/archive/httpclient/package.html +++ b/src/main/java/org/archive/httpclient/package.html @@ -7,16 +7,16 @@ apache jakarta commons httpclient. -

HttpRecorderGetMethod

+

HttpRecorderGetMethod

Class that the passed HttpRecorder w/ boundary between HTTP header and content. Also forces a close on the response on call to releaseConnection.

-

ConfigurableTrustManagerProtocolSocketFactory

+

ConfigurableTrustManagerProtocolSocketFactory

A protocol socket factory that allows setting of trust level on construction.

-

References

+

References

JavaTM Secure Socket Extension (JSSE): Reference Guide

diff --git a/src/main/java/org/archive/io/ArchiveFileConstants.java b/src/main/java/org/archive/io/ArchiveFileConstants.java deleted file mode 100644 index b1a39194..00000000 --- a/src/main/java/org/archive/io/ArchiveFileConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io; - -@Deprecated -public interface ArchiveFileConstants extends org.archive.format.ArchiveFileConstants { -} diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 66056d33..070455a5 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -26,12 +26,14 @@ import java.io.EOFException; import java.io.File; import java.io.FileInputStream; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStreamWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; @@ -42,13 +44,17 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.ArchiveFileConstants.*; + +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Reader for an Archive file of Archive {@link ArchiveRecord}s. * @author stack * @version $Date$ $Version$ */ -public abstract class ArchiveReader implements ArchiveFileConstants, Iterable, Closeable { +public abstract class ArchiveReader implements Iterable, Closeable { /** * Is this Archive file compressed? */ @@ -601,8 +607,7 @@ public String getStrippedFileName() { */ public static String getStrippedFileName(String name, final String dotFileExtension) { - name = stripExtension(name, - ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION); + name = stripExtension(name, DOT_COMPRESSED_FILE_EXTENSION); return stripExtension(name, dotFileExtension); } @@ -614,7 +619,7 @@ protected static boolean getTrueOrFalse(final String value) { if (value == null || value.length() <= 0) { return false; } - return Boolean.TRUE.toString().equals(value.toLowerCase()); + return Boolean.TRUE.toString().equals(value.toLowerCase(Locale.ROOT)); } /** @@ -658,7 +663,7 @@ protected void cdxOutput(boolean toFile) DOT_COMPRESSED_FILE_EXTENSION); cdxFilename = stripExtension(cdxFilename, getDotFileExtension()); cdxFilename += ('.' + CDX); - cdxWriter = new BufferedWriter(new FileWriter(cdxFilename)); + cdxWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(cdxFilename), UTF_8)); } String header = "CDX b e a m s c " + ((isCompressed()) ? "V" : "v") @@ -699,7 +704,7 @@ public boolean outputRecord(final String format) boolean result = true; if (format.equals(CDX)) { System.out.println(get().outputCdx(getStrippedFileName())); - } else if(format.equals(ArchiveFileConstants.DUMP)) { + } else if(format.equals(DUMP)) { // No point digesting if dumping content. setDigest(false); get().dump(); @@ -711,9 +716,7 @@ public boolean outputRecord(final String format) /** * Dump this file on STDOUT - * @throws compress True if dumped output is compressed. - * @throws IOException - * @throws java.text.ParseException + * @param compress True if dumped output is compressed. */ public abstract void dump(final boolean compress) throws IOException, java.text.ParseException; @@ -758,4 +761,4 @@ protected static Options getOptions() { "'or 'nohead'. Default: 'cdx'.")); return options; } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java index 17f14d3a..fe72236b 100644 --- a/src/main/java/org/archive/io/ArchiveReaderFactory.java +++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java @@ -25,6 +25,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; +import java.util.Locale; import org.archive.io.arc.ARCReaderFactory; import org.archive.io.warc.WARCReaderFactory; @@ -33,6 +34,7 @@ import org.archive.url.UsableURI; import org.archive.util.FileUtils; +import static org.archive.format.ArchiveFileConstants.*; /** * Factory that returns an Archive file Reader. @@ -40,7 +42,7 @@ * @author stack * @version $Date$ $Revision$ */ -public class ArchiveReaderFactory implements ArchiveFileConstants { +public class ArchiveReaderFactory { // Static block to enable S3 URLs static { if (System.getProperty("java.protocol.handler.pkgs") != null) { @@ -295,7 +297,7 @@ protected void addUserAgent(final HttpURLConnection connection) { * @throws IOException */ protected boolean isCompressed(final File f) throws IOException { - return f.getName().toLowerCase(). + return f.getName().toLowerCase(Locale.ROOT). endsWith(DOT_COMPRESSED_FILE_EXTENSION); } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index 63bfe628..01e8d5ec 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -23,8 +23,10 @@ import java.io.OutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Locale; import java.util.logging.Level; +import org.archive.format.ArchiveFileConstants; import org.archive.util.Base32; /** @@ -392,7 +394,7 @@ public boolean hasContentHeaders() { return false; } - if (!url.toLowerCase().startsWith("http")) { + if (!url.toLowerCase(Locale.ROOT).startsWith("http")) { return false; } diff --git a/src/main/java/org/archive/io/CompositeFileReader.java b/src/main/java/org/archive/io/CompositeFileReader.java index 14b56219..6e331565 100644 --- a/src/main/java/org/archive/io/CompositeFileReader.java +++ b/src/main/java/org/archive/io/CompositeFileReader.java @@ -23,6 +23,8 @@ import java.io.InputStreamReader; import java.util.List; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * @author gojomo @@ -34,7 +36,7 @@ public class CompositeFileReader extends InputStreamReader { * @throws IOException */ public CompositeFileReader(List filenames) throws IOException { - super(new CompositeFileInputStream(filenames)); + super(new CompositeFileInputStream(filenames), UTF_8); } } diff --git a/src/main/java/org/archive/io/GenericReplayCharSequence.java b/src/main/java/org/archive/io/GenericReplayCharSequence.java index 1af3922b..ff96717c 100644 --- a/src/main/java/org/archive/io/GenericReplayCharSequence.java +++ b/src/main/java/org/archive/io/GenericReplayCharSequence.java @@ -33,25 +33,24 @@ import java.nio.channels.FileChannel; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.text.NumberFormat; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; import org.archive.util.DevUtils; -import com.google.common.base.Charsets; import com.google.common.primitives.Ints; /** * (Replay)CharSequence view on recorded streams. + ** + *

Call {@link #close()} on this class when done to clean up resources. * - * For small streams, use {@link InMemoryReplayCharSequence}. - * - *

Call {@link close()} on this class when done to clean up resources. - * - * @contributor stack - * @contributor nlevitt + * @author stack + * @author nlevitt * @version $Revision$, $Date$ */ public class GenericReplayCharSequence implements ReplayCharSequence { @@ -67,9 +66,9 @@ public class GenericReplayCharSequence implements ReplayCharSequence { * decodings. The name of the file that holds the decoding is the name * of the backing file w/ this encoding for a suffix. * - *

See Encoding. + *

See Encoding. */ - public static final Charset WRITE_ENCODING = Charsets.UTF_16BE; + public static final Charset WRITE_ENCODING = StandardCharsets.UTF_16BE; private static final long MAP_MAX_BYTES = 64 * 1024 * 1024; // 64M @@ -170,8 +169,8 @@ private void updateMemoryMappedBuffer() { long charLength = (long) this.length() - (long) prefixBuffer.limit(); // in characters long mapSize = Math.min((charLength * bytesPerChar) - mapByteOffset, MAP_MAX_BYTES); logger.fine("updateMemoryMappedBuffer: mapOffset=" - + NumberFormat.getInstance().format(mapByteOffset) - + " mapSize=" + NumberFormat.getInstance().format(mapSize)); + + NumberFormat.getInstance(Locale.ROOT).format(mapByteOffset) + + " mapSize=" + NumberFormat.getInstance(Locale.ROOT).format(mapSize)); try { // TODO: stress-test without these possibly-costly requests! // System.gc(); @@ -257,9 +256,9 @@ protected void decode(InputStream inStream, int prefixMax, this.length = Ints.saturatedCast(count); if(count>Integer.MAX_VALUE) { logger.warning("input stream is longer than Integer.MAX_VALUE=" - + NumberFormat.getInstance().format(Integer.MAX_VALUE) + + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE) + " characters -- only first " - + NumberFormat.getInstance().format(Integer.MAX_VALUE) + + NumberFormat.getInstance(Locale.ROOT).format(Integer.MAX_VALUE) + " are accessible through this GenericReplayCharSequence"); } diff --git a/src/main/java/org/archive/io/GzipHeader.java b/src/main/java/org/archive/io/GzipHeader.java deleted file mode 100644 index 6b8263bc..00000000 --- a/src/main/java/org/archive/io/GzipHeader.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.GzipHeader} - */ -@Deprecated -public class GzipHeader extends org.archive.util.zip.GzipHeader { -} diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index 3cce595b..858edb4d 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -25,12 +25,11 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Locale; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpParser; -import org.apache.commons.httpclient.StatusLine; -import org.apache.commons.httpclient.util.EncodingUtil; -import org.archive.io.arc.ARCConstants; +import org.archive.format.http.HttpHeader; +import org.archive.format.arc.ARCConstants; import org.archive.util.LaxHttpParser; /** @@ -59,7 +58,7 @@ public class HeaderedArchiveRecord extends ArchiveRecord { * * Only available after the reading of headers. */ - private Header [] contentHeaders = null; + private HttpHeader[] contentHeaders = null; public HeaderedArchiveRecord(final ArchiveRecord ar) throws IOException { @@ -147,27 +146,29 @@ private InputStream readContentHeaders() throws IOException { int eolCharCount = getEolCharsCount(statusBytes); if (eolCharCount <= 0) { throw new IOException("Failed to read raw lie where one " + - " was expected: " + new String(statusBytes)); + " was expected: " + new String(statusBytes, ARCConstants.DEFAULT_ENCODING)); } - String statusLine = EncodingUtil.getString(statusBytes, 0, + String statusLine = new String(statusBytes, 0, statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); - if (statusLine == null) { - throw new NullPointerException("Expected status line is null"); - } + statusLine = statusLine.trim(); // TODO: Tighten up this test. - boolean isHttpResponse = StatusLine.startsWithHTTP(statusLine); + boolean isHttpResponse = statusLine.startsWith("HTTP"); boolean isHttpRequest = false; if (!isHttpResponse) { - isHttpRequest = statusLine.toUpperCase().startsWith("GET") || - !statusLine.toUpperCase().startsWith("POST"); + isHttpRequest = statusLine.toUpperCase(Locale.ROOT).startsWith("GET") || + !statusLine.toUpperCase(Locale.ROOT).startsWith("POST"); } if (!isHttpResponse && !isHttpRequest) { throw new UnexpectedStartLineIOException("Failed parse of " + "status line: " + statusLine); } - this.statusCode = isHttpResponse? - (new StatusLine(statusLine)).getStatusCode(): -1; - + + if (isHttpResponse) { + this.statusCode = parseStatusCode(statusLine); + } else { + this.statusCode = -1; + } + // Save off all bytes read. Keep them as bytes rather than // convert to strings so we don't have to worry about encodings // though this should never be a problem doing http headers since @@ -183,7 +184,7 @@ private InputStream readContentHeaders() throws IOException { eolCharCount = getEolCharsCount(lineBytes); if (eolCharCount <= 0) { throw new IOException("Failed reading headers: " + - ((lineBytes != null)? new String(lineBytes): null)); + ((lineBytes != null)? new String(lineBytes, StandardCharsets.ISO_8859_1): null)); } // Save the bytes read. baos.write(lineBytes); @@ -210,7 +211,19 @@ private InputStream readContentHeaders() throws IOException { bais.reset(); return bais; } - + + public static int parseStatusCode(String statusLine) { + int i = statusLine.indexOf(' '); + if (i < 0) return -1; + int j = statusLine.indexOf(' ', i + 1); + if (j < 0) j = statusLine.length(); + try { + return Integer.parseInt(statusLine.substring(i + 1, j)); + } catch (NumberFormatException e) { + return -1; + } + } + public static class UnexpectedStartLineIOException extends RecoverableIOException { private static final long serialVersionUID = 1L; @@ -252,7 +265,7 @@ public int getContentHeadersLength() { return this.contentHeadersLength; } - public Header[] getContentHeaders() { + public HttpHeader[] getContentHeaders() { return contentHeaders; } diff --git a/src/main/java/org/archive/io/MiserOutputStream.java b/src/main/java/org/archive/io/MiserOutputStream.java index f10ac9ca..f29256fd 100644 --- a/src/main/java/org/archive/io/MiserOutputStream.java +++ b/src/main/java/org/archive/io/MiserOutputStream.java @@ -27,7 +27,7 @@ * A filter stream that both counts bytes written, and optionally swallows * flush() requests. * - * @contributor gojomo + * @author gojomo */ public class MiserOutputStream extends FilterOutputStream { protected long count; diff --git a/src/main/java/org/archive/io/NoGzipMagicException.java b/src/main/java/org/archive/io/NoGzipMagicException.java deleted file mode 100644 index 27d1058a..00000000 --- a/src/main/java/org/archive/io/NoGzipMagicException.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.NoGzipMagicException} - */ -@Deprecated -public class NoGzipMagicException extends org.archive.util.zip.NoGzipMagicException { -} diff --git a/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java b/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java index 224f24e7..bd5c1eea 100644 --- a/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java +++ b/src/main/java/org/archive/io/ObjectPlusFilesOutputStream.java @@ -18,10 +18,8 @@ */ package org.archive.io; -import java.io.File; -import java.io.IOException; -import java.io.ObjectOutputStream; -import java.io.OutputStream; +import java.io.*; +import java.nio.file.Files; import java.util.LinkedList; import org.archive.util.FileUtils; @@ -116,19 +114,10 @@ public void snapshotAppendOnlyFile(File file) throws IOException { * @throws IOException */ private void hardlinkOrCopy(File file, File destination) throws IOException { - // For Linux/UNIX, try a hard link first. - Process link = Runtime.getRuntime().exec("ln "+file.getAbsolutePath()+" "+destination.getAbsolutePath()); - // TODO NTFS also supports hard links; add appropriate try try { - link.waitFor(); - } catch (InterruptedException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - if(link.exitValue()!=0) { - // hard link failed + Files.createLink(destination.toPath(), file.toPath()); + } catch (UnsupportedEncodingException e) { FileUtils.copyFile(file,destination); } } - } diff --git a/src/main/java/org/archive/io/Preformatter.java b/src/main/java/org/archive/io/Preformatter.java index dcd31bb6..c7099c51 100644 --- a/src/main/java/org/archive/io/Preformatter.java +++ b/src/main/java/org/archive/io/Preformatter.java @@ -24,7 +24,7 @@ * Interface indicating a logging Formatter can preformat a record (outside * the standard-implementation synchronized block) and cache it, returning it * for the next request for formatting from the same thread. - * @contributor gojomo + * @author gojomo */ public interface Preformatter { public void preformat(LogRecord record); diff --git a/src/main/java/org/archive/io/RecordingInputStream.java b/src/main/java/org/archive/io/RecordingInputStream.java index b46905ed..3c9db61f 100644 --- a/src/main/java/org/archive/io/RecordingInputStream.java +++ b/src/main/java/org/archive/io/RecordingInputStream.java @@ -74,8 +74,10 @@ public RecordingInputStream(int bufferSize, String backingFilename) } public void open(InputStream wrappedStream) throws IOException { - logger.fine(Thread.currentThread().getName() + " opening " + - wrappedStream + ", " + Thread.currentThread().getName()); + if (logger.isLoggable(Level.FINE)) { + logger.fine("wrapping " + wrappedStream + " in thread " + + Thread.currentThread().getName()); + } if(isOpen()) { // error; should not be opening/wrapping in an unclosed // stream remains open @@ -135,11 +137,11 @@ public int read(byte[] b) throws IOException { public void close() throws IOException { if (logger.isLoggable(Level.FINE)) { - logger.fine(Thread.currentThread().getName() + " closing " + - this.in + ", " + Thread.currentThread().getName()); + logger.fine("closing " + this.in + " in thread " + + Thread.currentThread().getName()); } IOUtils.closeQuietly(this.in); - this.in = null; + this.in = null; IOUtils.closeQuietly(this.recordingOutputStream); } @@ -159,20 +161,77 @@ public long readFully() throws IOException { return this.recordingOutputStream.getSize(); } + public void readToEndOfContent(long contentLength) + throws IOException, InterruptedException { + // Check we're open before proceeding. + if (!isOpen()) { + // TODO: should this be a noisier exception-raising error? + return; + } + + long totalBytes = recordingOutputStream.position - recordingOutputStream.getMessageBodyBegin(); + long bytesRead = -1L; + long maxToRead = -1; + while (contentLength <= 0 || totalBytes < contentLength) { + try { + // read no more than soft max + maxToRead = (contentLength <= 0) + ? drainBuffer.length + : Math.min(drainBuffer.length, contentLength - totalBytes); + // nor more than hard max + maxToRead = Math.min(maxToRead, recordingOutputStream.getRemainingLength()); + // but always at least 1 (to trigger hard max exception) XXX wtf is this? + maxToRead = Math.max(maxToRead, 1); + + bytesRead = read(drainBuffer,0,(int)maxToRead); + if (bytesRead == -1) { + break; + } + totalBytes += bytesRead; + + if (Thread.interrupted()) { + throw new InterruptedException("Interrupted during IO"); + } + } catch (SocketTimeoutException e) { + // A socket timeout is just a transient problem, meaning + // nothing was available in the configured timeout period, + // but something else might become available later. + // Take this opportunity to check the overall + // timeout (below). One reason for this timeout is + // servers that keep up the connection, 'keep-alive', even + // though we asked them to not keep the connection open. + if (logger.isLoggable(Level.FINE)) { + logger.log(Level.FINE, "socket timeout", e); + } + // check for interrupt + if (Thread.interrupted()) { + throw new InterruptedException("Interrupted during IO"); + } + // check for overall timeout + recordingOutputStream.checkLimits(); + } catch (SocketException se) { + throw se; + } catch (NullPointerException e) { + // [ 896757 ] NPEs in Andy's Th-Fri Crawl. + // A crawl was showing NPE's in this part of the code but can + // not reproduce. Adding this rethrowing catch block w/ + // diagnostics to help should we come across the problem in the + // future. + throw new NullPointerException("Stream " + this.in + ", " + + e.getMessage() + " " + Thread.currentThread().getName()); + } + } + } + /** * Read all of a stream (Or read until we timeout or have read to the max). - * @param softMaxLength Maximum length to read; if zero or < 0, then no + * @param softMaxLength Maximum length to read; if zero or < 0, then no * limit. If met, return normally. - * @param hardMaxLength Maximum length to read; if zero or < 0, then no - * limit. If exceeded, throw RecorderLengthExceededException - * @param timeout Timeout in milliseconds for total read; if zero or - * negative, timeout is Long.MAX_VALUE. If exceeded, throw - * RecorderTimeoutException - * @param maxBytesPerMs How many bytes per millisecond. * @throws IOException failed read. * @throws RecorderLengthExceededException * @throws RecorderTimeoutException * @throws InterruptedException + * @deprecated */ public void readFullyOrUntil(long softMaxLength) throws IOException, RecorderLengthExceededException, @@ -324,12 +383,12 @@ public synchronized void mark(int readlimit) { @Override public boolean markSupported() { - return this.in.markSupported(); + return in != null && this.in.markSupported(); } @Override public synchronized void reset() throws IOException { - this.in.reset(); + if (in != null) this.in.reset(); this.recordingOutputStream.reset(); } @@ -349,7 +408,23 @@ public int getRecordedBufferLength() { return recordingOutputStream.getBufferLength(); } + /** + * See doc on {@link RecordingOutputStream#chopAtMessageBodyBegin()} + */ + public void chopAtMessageBodyBegin() { + recordingOutputStream.chopAtMessageBodyBegin(); + } + public void clearForReuse() throws IOException { recordingOutputStream.clearForReuse(); } + + /** + * Returns an OutputStream that can be used for recording input data. This is useful if the input comes in some + * form other than an InputStream. For example, if the input is provided by a callback periodically called with + * a chunk of data. + */ + public RecordingOutputStream asOutputStream() { + return this.recordingOutputStream; + } } diff --git a/src/main/java/org/archive/io/RecordingOutputStream.java b/src/main/java/org/archive/io/RecordingOutputStream.java index 4d0713da..6c77997b 100644 --- a/src/main/java/org/archive/io/RecordingOutputStream.java +++ b/src/main/java/org/archive/io/RecordingOutputStream.java @@ -19,8 +19,8 @@ package org.archive.io; -import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; - +import java.io.BufferedOutputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -84,17 +84,11 @@ public class RecordingOutputStream extends OutputStream { private byte[] buffer; /** current virtual position in the recording */ - private long position; + long position; /** flag to disable recording */ private boolean recording; - /** - * Reusable buffer for FastBufferedOutputStream - */ - protected byte[] bufStreamBuf = - new byte [ FastBufferedOutputStream.DEFAULT_BUFFER_SIZE ]; - /** * True if we're to digest content. */ @@ -132,6 +126,29 @@ public class RecordingOutputStream extends OutputStream { */ protected long messageBodyBeginMark; + /** + * While messageBodyBeginMark is not set, the last two bytes seen. + * + *

+ * This class does automatic detection of http message body begin (i.e. end + * of http headers). Unfortunately httpcomponents did not want to add + * functionality to help us with this, see + * https://issues.apache.org/jira/browse/HTTPCORE-325 + * + *

+ * It works like this: while messageBodyBeginMark is not set, we remember + * the last two bytes seen, and look at each byte we write. If the + * lastTwoBytes+currentByte is "\n\r\n", or lastTwoBytes[1]+currentByte is + * "\n\n" then we call markMessageBodyBegin() at the position after + * currentByte. + * + *

+ * An assumption here is that protocols other than http don't have headers, + * and for those protocols the user of this class will call + * markMessageBodyBegin() at position 0 before writing anything. + */ + protected int[] lastTwoBytes = new int[] {-1, -1}; + /** * Stream to record. */ @@ -183,15 +200,18 @@ public void open(OutputStream wrappedStream) throws IOException { } clearForReuse(); this.out = wrappedStream; + startTime = System.currentTimeMillis(); + } + + protected OutputStream ensureDiskStream() throws FileNotFoundException { if (this.diskStream == null) { - // TODO: Fix so we only make file when its actually needed. FileOutputStream fis = new FileOutputStream(this.backingFilename); - - this.diskStream = new RecyclingFastBufferedOutputStream(fis, bufStreamBuf); + this.diskStream = new BufferedOutputStream(fis); } - startTime = System.currentTimeMillis(); + return this.diskStream; } + public void write(int b) throws IOException { if(position< 0l) { + // looking for "\n\n" or "\n\r\n" + if (b == '\n' + && (lastTwoBytes[1] == '\n' + || (lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r'))) { + markMessageBodyBegin(); + } else { + lastTwoBytes[0] = lastTwoBytes[1]; + lastTwoBytes[1] = b; + } + } + checkLimits(); } + private int findMessageBodyBeginMark(byte[] b, int off, int len) { + if ((lastTwoBytes[1] == '\n' || lastTwoBytes[0] == '\n' && lastTwoBytes[1] == '\r') + && len >= 1 && b[off] == '\n') { + return 1; + } else if (lastTwoBytes[1] == '\n' && len >= 2 && b[off] == '\r' && b[off+1] == '\n') { + return 2; + } + + for (int i = off; i < off + len - 1; i++) { + if (b[i] == '\n' && b[i+1] == '\n') { + return i + 2; + } else if (b[i] == '\n' && b[i+1] == '\r' + && i + 2 < off + len && b[i+2] == '\n') { + return i + 3; + } + } + + return -1; + } + public void write(byte[] b, int off, int len) throws IOException { if(position < maxPosition) { if(position+len<=maxPosition) { @@ -220,12 +274,35 @@ public void write(byte[] b, int off, int len) throws IOException { off += consumeRange; len -= consumeRange; } + + if (messageBodyBeginMark < 0) { + // see comment on int[] lastTwoBytes + int mark = findMessageBodyBeginMark(b, off, len); + if (mark > 0) { + if(recording) { + record(b, off, mark - off); + } + if (this.out != null) { + this.out.write(b, off, mark - off); + } + markMessageBodyBegin(); + len = len - (mark - off); + off = mark; + } + } + if(recording) { record(b, off, len); } if (this.out != null) { this.out.write(b, off, len); } + if (len >= 1) { + lastTwoBytes[1] = b[off + len - 1]; + if (len >= 2) { + lastTwoBytes[0] = b[off + len - 2]; + } + } checkLimits(); } @@ -251,7 +328,7 @@ protected void checkLimits() throws RecorderIOException { throw new RecorderTimeoutException(); } // need to throttle reading to hit max configured rate? - if(position/duration > maxRateBytesPerMs) { + if(position/duration >= maxRateBytesPerMs) { long desiredDuration = position / maxRateBytesPerMs; try { Thread.sleep(desiredDuration-duration); @@ -274,10 +351,7 @@ private void record(int b) throws IOException { this.digest.update((byte)b); } if (this.position >= this.buffer.length) { - // TODO: Its possible to call write w/o having first opened a - // stream. Protect ourselves against this. - assert this.diskStream != null: "Diskstream is null"; - this.diskStream.write(b); + this.ensureDiskStream().write(b); } else { this.buffer[(int) this.position] = (byte) b; } @@ -312,12 +386,7 @@ private void record(byte[] b, int off, int len) throws IOException { */ private void tailRecord(byte[] b, int off, int len) throws IOException { if(this.position >= this.buffer.length){ - // TODO: Its possible to call write w/o having first opened a - // stream. Lets protect ourselves against this. - if (this.diskStream == null) { - throw new IOException("diskstream is null"); - } - this.diskStream.write(b, off, len); + this.ensureDiskStream().write(b, off, len); this.position += len; } else { assert this.buffer != null: "Buffer is null"; @@ -557,6 +626,18 @@ public long getRemainingLength() { return maxLength - position; } + /** + * Forget about anything past the point where the content-body starts. This + * is needed to support FetchHTTP's shouldFetchBody setting. See also the + * docs on {@link #lastTwoBytes} + */ + public void chopAtMessageBodyBegin() { + if (messageBodyBeginMark >= 0) { + this.size = messageBodyBeginMark; + this.position = messageBodyBeginMark; + } + } + public void clearForReuse() throws IOException { this.out = null; this.position = 0; diff --git a/src/main/java/org/archive/io/RecyclingFastBufferedOutputStream.java b/src/main/java/org/archive/io/RecyclingFastBufferedOutputStream.java deleted file mode 100644 index a3b76e46..00000000 --- a/src/main/java/org/archive/io/RecyclingFastBufferedOutputStream.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; - -import java.io.OutputStream; - -/** - * FastBufferedOutputStream that accepts a passed-in buffer (avoiding - * reallocation). - */ -public class RecyclingFastBufferedOutputStream extends FastBufferedOutputStream { - public RecyclingFastBufferedOutputStream( final OutputStream os, final byte[] buffer ) { - super(os); - this.buffer = buffer; - avail = buffer.length; - } -} - - diff --git a/src/main/java/org/archive/io/ReplayCharSequence.java b/src/main/java/org/archive/io/ReplayCharSequence.java index aa9b9587..bd74f2f8 100644 --- a/src/main/java/org/archive/io/ReplayCharSequence.java +++ b/src/main/java/org/archive/io/ReplayCharSequence.java @@ -23,8 +23,7 @@ import java.io.IOException; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; - -import com.google.common.base.Charsets; +import java.nio.charset.StandardCharsets; /** @@ -40,7 +39,7 @@ public interface ReplayCharSequence extends CharSequence, Closeable { /** charset to use in replay when declared value * is absent/illegal/unavailable */ - public Charset FALLBACK_CHARSET = Charsets.ISO_8859_1; // TODO: should this be UTF-8? + public Charset FALLBACK_CHARSET = StandardCharsets.ISO_8859_1; // TODO: should this be UTF-8? /** * Call this method when done so implementation has chance to clean up @@ -59,7 +58,7 @@ public interface ReplayCharSequence extends CharSequence, Closeable { public long getDecodeExceptionCount(); /** - * Return the first coding-exception encountered, if the count > 0. + * Return the first coding-exception encountered, if the count > 0. * @return CharacterCodingException */ public CharacterCodingException getCodingException(); diff --git a/src/main/java/org/archive/io/ReplayInputStream.java b/src/main/java/org/archive/io/ReplayInputStream.java index fccf5fd3..60b0dc85 100644 --- a/src/main/java/org/archive/io/ReplayInputStream.java +++ b/src/main/java/org/archive/io/ReplayInputStream.java @@ -64,7 +64,7 @@ public class ReplayInputStream extends SeekInputStream * @param size Size of data to replay. * @param responseBodyStart Start of the response body. * @param backingFilename Backing file that sits behind the buffer. If - * size > than buffer then we go to backing file to read + * size > than buffer then we go to backing file to read * data that is beyond buffer.length. * * @throws IOException If we fail to open an input stream on @@ -84,7 +84,7 @@ public ReplayInputStream(byte[] buffer, long size, long responseBodyStart, * @param buffer Buffer to read from. * @param size Size of data to replay. * @param backingFilename Backing file that sits behind the buffer. If - * size > than buffer then we go to backing file to read + * size > than buffer then we go to backing file to read * data that is beyond buffer.length. * @throws IOException If we fail to open an input stream on * backing file. @@ -130,7 +130,7 @@ public ReplayInputStream(InputStream fillStream) throws IOException { } /** - * Close & destroy any internally-generated temporary files. + * Close & destroy any internally-generated temporary files. */ public void destroy() { IOUtils.closeQuietly(this); @@ -192,11 +192,15 @@ public int read(byte[] b, int off, int len) throws IOException { } public void readFullyTo(OutputStream os) throws IOException { + readFullyTo(this, os); + } + + public static void readFullyTo(InputStream in, OutputStream os) throws IOException { byte[] buf = new byte[4096]; - int c = read(buf); + int c = in.read(buf); while (c != -1) { os.write(buf,0,c); - c = read(buf); + c = in.read(buf); } } @@ -218,12 +222,7 @@ public void readHeaderTo(OutputStream os) throws IOException { */ public void readContentTo(OutputStream os) throws IOException { setToResponseBodyStart(); - byte[] buf = new byte[4096]; - int c = read(buf); - while (c != -1) { - os.write(buf,0,c); - c = read(buf); - } + readFullyTo(os); } /** diff --git a/src/main/java/org/archive/io/RepositionableInputStream.java b/src/main/java/org/archive/io/RepositionableInputStream.java index 6f885130..838b5952 100644 --- a/src/main/java/org/archive/io/RepositionableInputStream.java +++ b/src/main/java/org/archive/io/RepositionableInputStream.java @@ -29,7 +29,7 @@ * stream. Uses a {@link BufferedInputStream}. Calls mark on every read so * we'll remember at least the last thing read (You can only backup on the * last thing read -- not last 2 or 3 things read). Used by - * {@link GzippedInputStream} when reading streams over a network. Wraps a + * GzippedInputStream when reading streams over a network. Wraps a * HTTP, etc., stream so we can back it up if needs be after the * GZIP inflater has done a fill of its full buffer though it only needed * the first few bytes to finish decompressing the current GZIP member. diff --git a/src/main/java/org/archive/io/UTF8Bytes.java b/src/main/java/org/archive/io/UTF8Bytes.java index c280b08d..4dc0144b 100644 --- a/src/main/java/org/archive/io/UTF8Bytes.java +++ b/src/main/java/org/archive/io/UTF8Bytes.java @@ -19,6 +19,7 @@ package org.archive.io; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; /** * Marker Interface for instances that can be serialized as UTF8 bytes. @@ -27,7 +28,7 @@ * @version $Date$ $Version$ */ public interface UTF8Bytes { - public static final String UTF8 = "UTF-8"; + public static final String UTF8 = StandardCharsets.UTF_8.name(); /** * @return Instance as UTF-8 bytes. diff --git a/src/main/java/org/archive/io/WriterPool.java b/src/main/java/org/archive/io/WriterPool.java index 2dc385a1..79da16c0 100644 --- a/src/main/java/org/archive/io/WriterPool.java +++ b/src/main/java/org/archive/io/WriterPool.java @@ -30,6 +30,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.archive.format.ArchiveFileConstants; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -88,10 +89,7 @@ public abstract class WriterPool { /** * Constructor * @param serial Used to generate unique filename sequences - * @param factory Factory that knows how to make a {@link WriterPoolMember}. * @param settings Settings for this pool. - * @param poolMaximumActive - * @param poolMaximumWait */ public WriterPool(final AtomicInteger serial, final WriterPoolSettings settings, @@ -218,7 +216,7 @@ public synchronized void invalidateFile(WriterPoolMember f) // gets attention. File file = f.getFile(); file.renameTo(new File(file.getAbsoluteFile() + - WriterPoolMember.INVALID_SUFFIX)); + ArchiveFileConstants.INVALID_SUFFIX)); } /** diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index 6ea6b295..5d350534 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -19,15 +19,19 @@ package org.archive.io; +import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; import java.text.NumberFormat; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; @@ -37,6 +41,7 @@ import org.archive.util.FileUtils; import org.archive.util.PropertyUtils; +import static org.archive.format.ArchiveFileConstants.*; /** @@ -47,10 +52,10 @@ * @author stack * @version $Date$ $Revision$ */ -public abstract class WriterPoolMember implements ArchiveFileConstants { +public abstract class WriterPoolMember { private final Logger logger = Logger.getLogger(this.getClass().getName()); - public static final String UTF8 = "UTF-8"; + public static final String UTF8 = StandardCharsets.UTF_8.name(); /** * Default archival-aggregate filename template. @@ -80,9 +85,6 @@ public abstract class WriterPoolMember implements ArchiveFileConstants { /** Counting stream for metering */ protected MiserOutputStream countOut = null; - /** reusable buffer for recycling scenarios */ - protected byte[] rebuf; - protected WriterPoolSettings settings; private final String extension; @@ -104,12 +106,17 @@ public abstract class WriterPoolMember implements ArchiveFileConstants { */ protected static int roundRobinIndex = 0; + /** + * Symbol set for serial number formatter. + */ + protected static DecimalFormatSymbols serialNoFormatterSymbols = new DecimalFormatSymbols(Locale.ROOT); + /** * NumberFormat instance for formatting serial number. * * Pads serial number with zeros. */ - protected static NumberFormat serialNoFormatter = new DecimalFormat("00000"); + protected static NumberFormat serialNoFormatter = new DecimalFormat("00000", serialNoFormatterSymbols); /** @@ -126,9 +133,6 @@ public abstract class WriterPoolMember implements ArchiveFileConstants { * @param serialNo used to create unique filename sequences * @param out Where to write. * @param file File the out is connected to. - * @param cmprs Compress the content written. - * @param a14DigitDate If null, we'll write current time. - * @throws IOException */ protected WriterPoolMember(AtomicInteger serialNo, final OutputStream out, final File file, @@ -146,11 +150,6 @@ protected WriterPoolMember(AtomicInteger serialNo, * Constructor. * * @param serialNo used to create unique filename sequences - * @param dirs Where to drop files. - * @param prefix File prefix to use. - * @param cmprs Compress the records written. - * @param maxSize Maximum size for ARC files written. - * @param template filenaming template to use * @param extension Extension to give file. */ public WriterPoolMember(AtomicInteger serialNo, @@ -209,10 +208,7 @@ protected String createFile(final File file) throws IOException { close(); this.f = file; FileOutputStream fos = new FileOutputStream(this.f); - if(rebuf==null) { - rebuf = new byte[settings.getWriteBufferSize()]; - } - this.countOut = new MiserOutputStream(new RecyclingFastBufferedOutputStream(fos,rebuf),settings.getFrequentFlushes()); + this.countOut = new MiserOutputStream(new BufferedOutputStream(fos),settings.getFrequentFlushes()); this.out = this.countOut; logger.fine("Opened " + this.f.getAbsolutePath()); return this.f.getName(); @@ -365,7 +361,6 @@ protected void postWriteRecordTasks() * Position in raw output (typically, physical file). * Used making accounting of bytes written. * @return Position in final media (assuming all flushing completes) - * @throws IOException */ public long getPosition() { return (countOut==null)? 0L : this.countOut.getCount(); diff --git a/src/main/java/org/archive/io/arc/ARC2WCDX.java b/src/main/java/org/archive/io/arc/ARC2WCDX.java index 19010131..aec571e9 100644 --- a/src/main/java/org/archive/io/arc/ARC2WCDX.java +++ b/src/main/java/org/archive/io/arc/ARC2WCDX.java @@ -22,18 +22,18 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; -import java.util.Date; -import java.util.Iterator; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; import java.util.zip.GZIPOutputStream; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HeaderGroup; -import org.apache.commons.httpclient.util.DateParseException; -import org.apache.commons.httpclient.util.DateUtil; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.util.ArchiveUtils; import org.archive.util.SURT; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Create a 'Wide' CDX from an ARC. Takes one argument, the path to the ARC. * Writes .wcdx.gz in same directory. @@ -63,7 +63,7 @@ public static Object[] createWcdx(ARCReader reader) { PrintStream writer = null; long count = 0; try { - writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile))); + writer = new PrintStream(new GZIPOutputStream(new FileOutputStream(wcdxFile)), false, UTF_8.name()); // write header: legend + timestamp StringBuilder legend = new StringBuilder(); @@ -95,12 +95,15 @@ public static Object[] createWcdx(ARCReader reader) { ARCRecord record = (ARCRecord) iter.next(); record.close(); ARCRecordMetaData h = (ARCRecordMetaData) record.getHeader(); - Header[] httpHeaders = record.getHttpHeaders(); + HttpHeader[] httpHeaders = record.getHttpHeaders(); if(httpHeaders==null) { - httpHeaders = new Header[0]; + httpHeaders = new HttpHeader[0]; + } + Map headerMap = new HashMap<>(); + for (HttpHeader header : httpHeaders) { + headerMap.putIfAbsent(header.getName().toLowerCase(Locale.ROOT), header); } - HeaderGroup hg = new HeaderGroup(); - hg.setHeaders(httpHeaders); + StringBuilder builder = new StringBuilder(); // SURT-form URI @@ -108,7 +111,7 @@ public static Object[] createWcdx(ARCReader reader) { // record timestamp ('b') appendField(builder,h.getDate()); // http header date - appendTimeField(builder,hg.getFirstHeader("Date")); + appendTimeField(builder, headerMap.get("date")); // response code ('s') appendField(builder,h.getStatusCode()); // media type ('m') @@ -131,17 +134,17 @@ public static Object[] createWcdx(ARCReader reader) { // uncompressed (declared in ARC headerline) record length appendField(builder,h.getLength()); // http header content-length - appendField(builder,hg.getFirstHeader("Content-Length")); + appendField(builder, headerMap.get("content-length")); // http header mod-date - appendTimeField(builder,hg.getFirstHeader("Last-Modified")); + appendTimeField(builder, headerMap.get("last-modified")); // http header expires - appendTimeField(builder,hg.getFirstHeader("Expires")); + appendTimeField(builder, headerMap.get("expires")); // http header etag - appendField(builder,hg.getFirstHeader("ETag")); + appendField(builder, headerMap.get("etag")); // http header redirect ('Location' header?) - appendField(builder,hg.getFirstHeader("Location")); + appendField(builder, headerMap.get("location")); // ip ('e') appendField(builder,h.getIp()); // original URI @@ -186,8 +189,8 @@ protected static void appendField(StringBuilder builder, Object obj) { // prepend with delimiter builder.append(' '); } - if(obj instanceof Header) { - obj = ((Header)obj).getValue().trim(); + if(obj instanceof HttpHeader) { + obj = ((HttpHeader)obj).getValue().trim(); } builder.append((obj==null||obj.toString().length()==0)?"-":obj); @@ -202,16 +205,16 @@ protected static void appendTimeField(StringBuilder builder, Object obj) { builder.append("-"); return; } - if(obj instanceof Header) { - String s = ((Header)obj).getValue().trim(); + if(obj instanceof HttpHeader) { + String s = ((HttpHeader)obj).getValue().trim(); try { - Date date = DateUtil.parseDate(s); + Date date = parseDate(s); String d = ArchiveUtils.get14DigitDate(date); if(d.startsWith("209")) { d = "199"+d.substring(3); } obj = d; - } catch (DateParseException e) { + } catch (ParseException e) { builder.append('e'); return; } @@ -219,6 +222,23 @@ protected static void appendTimeField(StringBuilder builder, Object obj) { } builder.append(obj); } + + private static Date parseDate(String s) throws ParseException { + SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); + format.setTimeZone(TimeZone.getTimeZone("GMT")); + format.set2DigitYearStart(new Date(946684800)); // year 2000 + try { + return format.parse(s); + } catch (ParseException e) { + try { + format.applyPattern("EEEE, dd-MMM-yy HH:mm:ss zzz"); + return format.parse(s); + } catch (ParseException e1) { + format.applyPattern("EEE MMM d HH:mm:ss yyyy"); + return format.parse(s); + } + } + } } //'wide' CDX diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java index 7f85cc2a..f8935e79 100644 --- a/src/main/java/org/archive/io/arc/ARCReader.java +++ b/src/main/java/org/archive/io/arc/ARCReader.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Logger; @@ -43,6 +44,7 @@ import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Get an iterator on an ARC file or get a record by absolute position. @@ -66,7 +68,7 @@ * @version $Date$ $Revision$ */ public abstract class ARCReader extends ArchiveReader -implements ARCConstants, Closeable { +implements Closeable { private final Logger logger = Logger.getLogger(ARCReader.class.getName()); /** @@ -446,7 +448,6 @@ public static void createCDXIndexFile(String urlOrPath) * @throws IOException * @throws java.text.ParseException */ - @SuppressWarnings("unchecked") public static void main(String [] args) throws ParseException, IOException, java.text.ParseException { Options options = getOptions(); @@ -492,7 +493,7 @@ public static void main(String [] args) break; case 'f': - format = cmdlineOptions[i].getValue().toLowerCase(); + format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT); boolean match = false; // List of supported formats. final String [] supportedFormats = diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index e7dc1625..bbcc8b6f 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -27,6 +27,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import java.util.Locale; import java.util.logging.Level; import org.archive.io.ArchiveReader; @@ -40,6 +41,7 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.arc.ARCConstants.*; /** * Factory that returns an ARCReader. @@ -48,8 +50,7 @@ * * @author stack */ -public class ARCReaderFactory extends ArchiveReaderFactory -implements ARCConstants { +public class ARCReaderFactory extends ArchiveReaderFactory { /** * This factory instance. */ @@ -147,11 +148,11 @@ protected ArchiveReader getArchiveReader(final String arc, possiblyWrapped.mark(100); boolean compressed = testCompressedARCStream(possiblyWrapped); possiblyWrapped.reset(); - + if (compressed) { return new CompressedARCReader(arc, possiblyWrapped, atFirstRecord); } else { - return new UncompressedARCReader(arc, possiblyWrapped); + return new UncompressedARCReader(arc, possiblyWrapped, atFirstRecord); } } @@ -230,7 +231,7 @@ public static boolean testCompressedARCFile(File arcFile, throws IOException { boolean compressedARCFile = false; FileUtils.assertReadable(arcFile); - if(!skipSuffixCheck && !arcFile.getName().toLowerCase() + if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT) .endsWith(COMPRESSED_ARC_FILE_EXTENSION)) { return compressedARCFile; } @@ -247,9 +248,9 @@ public static boolean testCompressedARCFile(File arcFile, public static boolean isARCSuffix(final String arcName) { return (arcName == null)? false: - (arcName.toLowerCase().endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))? + (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_ARC_FILE_EXTENSION))? true: - (arcName.toLowerCase().endsWith(DOT_ARC_FILE_EXTENSION))? + (arcName.toLowerCase(Locale.ROOT).endsWith(DOT_ARC_FILE_EXTENSION))? true: false; } @@ -330,10 +331,11 @@ public UncompressedARCReader(final File f, final long offset) * @param f Uncompressed arc to read. * @param is InputStream. */ - public UncompressedARCReader(final String f, final InputStream is) { + public UncompressedARCReader(final String f, final InputStream is, boolean atFirstRecord) { // Arc file has been tested for existence by time it has come // to here. setIn(new CountingInputStream(is)); + setAlignedOnFirstRecord(atFirstRecord); initialize(f); } } @@ -451,4 +453,4 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException { logStdErr(Level.WARNING, message); } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 21bea07c..c14426a5 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -27,34 +27,36 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.StatusLine; -import org.apache.commons.httpclient.util.EncodingUtil; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.io.ArchiveRecordHeader; +import org.archive.io.HeaderedArchiveRecord; import org.archive.io.RecoverableIOException; import org.archive.util.InetAddressUtil; import org.archive.util.LaxHttpParser; import org.archive.util.TextUtils; +import static org.archive.format.arc.ARCConstants.*; + /** * An ARC file record. * Does not compass the ARCRecord metadata line, just the record content. * @author stack */ -public class ARCRecord extends ArchiveRecord implements ARCConstants { +public class ARCRecord extends ArchiveRecord { /** - * Http status line object. + * Http status code. * - * May be null if record is not http. + * May be -1 if record is not http. */ - private StatusLine httpStatus = null; + private int statusCode = -1; /** * Http header bytes. @@ -69,7 +71,7 @@ public class ARCRecord extends ArchiveRecord implements ARCConstants { * * Only populated after reading of headers. */ - private Header [] httpHeaders = null; + private HttpHeader[] httpHeaders = null; /** * Array of field names. @@ -190,17 +192,15 @@ public ARCRecord(InputStream in, ArchiveRecordHeader metaData, * formatted). * @param parseHttpHeaders True if we are to parse HTTP headers. Costs * about ~20% of CPU during an ARC parse. - * @param isAllignedOnFirstRecord True if this is the first record to be + * @param isAlignedOnFirstRecord True if this is the first record to be * read from an archive - * @param String version Version information to be returned to the + * @param version Version information to be returned to the * ARCReader constructing this record - * - * @throws IOException */ public ARCRecord(InputStream in, final String identifier, final long offset, boolean digest, boolean strict, final boolean parseHttpHeaders, - final boolean isAlignedOnFirstRecord, String version) + final boolean isAlignedOnFirstRecord, String version) throws IOException { super(in, null, 0, digest, strict); setHeader(parseHeaders(in, identifier, offset, strict, isAlignedOnFirstRecord, version)); @@ -243,6 +243,7 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, getTokenizedHeaderLine(in, firstLineValues); int bodyOffset = 0; + String origin = ""; if (offset == 0 && isAlignedOnFirstRecord) { // If offset is zero and we were aligned at first record on // creation (See #alignedOnFirstRecord for more on this), then no @@ -263,6 +264,7 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, bodyOffset += getTokenizedHeaderLine(in, secondLineValues); version = ((String)secondLineValues.get(0) + "." + (String)secondLineValues.get(1)); + origin = (String)secondLineValues.get(2); // Just read over the 3rd line. We used to parse it and use // values found here but now we just hardcode them to avoid // having to read this 3rd line even for random arc file accesses. @@ -271,7 +273,8 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, } setBodyOffset(bodyOffset); - return computeMetaData(this.headerFieldNameKeys, firstLineValues, version, offset, identifier); + return computeMetaData(this.headerFieldNameKeys, firstLineValues, + version, origin, offset, identifier); } /** @@ -362,7 +365,8 @@ private int getTokenizedHeaderLine(final InputStream stream, * @exception IOException If no. of keys doesn't match no. of values. */ private ARCRecordMetaData computeMetaData(List keys, - List values, String v, long offset, final String identifier) + List values, String v, String origin, + long offset, final String identifier) throws IOException { if (keys.size() != values.size()) { List originalValues = values; @@ -373,7 +377,7 @@ private ARCRecordMetaData computeMetaData(List keys, if (keys.size() != values.size()) { // Early ARCs had a space in mimetype. if (values.size() == (keys.size() + 1) && - values.get(4).toLowerCase().startsWith("charset=")) { + values.get(4).toLowerCase(Locale.ROOT).startsWith("charset=")) { List nuvalues = new ArrayList(keys.size()); nuvalues.add(0, values.get(0)); @@ -423,6 +427,7 @@ private ARCRecordMetaData computeMetaData(List keys, } headerFields.put(VERSION_FIELD_KEY, v); + headerFields.put(ORIGIN_FIELD_KEY, origin); headerFields.put(ABSOLUTE_OFFSET_KEY, new Long(offset)); return new ARCRecordMetaData(identifier, headerFields); @@ -584,11 +589,11 @@ private InputStream readHttpHeader() throws IOException { if (eolCharCount <= 0) { throw new RecoverableIOException( "Failed to read http status where one was expected: " - + ((statusBytes == null) ? "" : new String(statusBytes))); + + ((statusBytes == null) ? "" : new String(statusBytes, DEFAULT_ENCODING))); } - - statusLine = EncodingUtil.getString(statusBytes, 0, - statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); + + statusLine = new String(statusBytes, 0, + statusBytes.length - eolCharCount, DEFAULT_ENCODING); // If a null or DELETED break immediately if ((statusLine == null) || statusLine.startsWith("DELETED")) { @@ -597,7 +602,7 @@ private InputStream readHttpHeader() throws IOException { // If it's actually the status line, break, otherwise continue skipping any // previous header values - if (!statusLine.contains(":") && StatusLine.startsWithHTTP(statusLine)) { + if (!statusLine.contains(":") && statusLine.trim().startsWith("HTTP")) { break; } @@ -610,7 +615,7 @@ private InputStream readHttpHeader() throws IOException { } if ((statusLine == null) || - !StatusLine.startsWithHTTP(statusLine)) { + !statusLine.trim().startsWith("HTTP")) { if (statusLine.startsWith("DELETED")) { // Some old ARCs have deleted records like following: // http://vireo.gatech.edu:80/ebt-bin/nph-dweb/dynaweb/SGI_Developer/SGITCL_PG/@Generic__BookTocView/11108%3Btd%3D2 130.207.168.42 19991010131803 text/html 29202 @@ -626,13 +631,12 @@ private InputStream readHttpHeader() throws IOException { } } - try { - this.httpStatus = new StatusLine(statusLine); - } catch(IOException e) { - logger.warning(e.getMessage() + " at offset: " + h.getOffset()); - this.errors.add(ArcRecordErrors.HTTP_STATUS_LINE_EXCEPTION); + this.statusCode = HeaderedArchiveRecord.parseStatusCode(statusLine.trim()); + if (statusCode == -1) { + logger.warning("Bad status line at offset: " + h.getOffset()); + this.errors.add(ArcRecordErrors.HTTP_STATUS_LINE_EXCEPTION); } - + // Save off all bytes read. Keep them as bytes rather than // convert to strings so we don't have to worry about encodings // though this should never be a problem doing http headers since @@ -655,7 +659,7 @@ private InputStream readHttpHeader() throws IOException { break; } else { throw new IOException("Failed reading http headers: " + - ((lineBytes != null)? new String(lineBytes): null)); + ((lineBytes != null)? new String(lineBytes, DEFAULT_ENCODING): null)); } } else { httpHeaderBytesRead += lineBytes.length; @@ -680,8 +684,7 @@ private InputStream readHttpHeader() throws IOException { // Read the status line. Don't let it into the parseHeaders function. // It doesn't know what to do with it. bais.read(statusBytes, 0, statusBytes.length); - this.httpHeaders = LaxHttpParser.parseHeaders(bais, - ARCConstants.DEFAULT_ENCODING); + this.httpHeaders = LaxHttpParser.parseHeaders(bais, DEFAULT_ENCODING); this.getMetaData().setStatusCode(Integer.toString(getStatusCode())); bais.reset(); return bais; @@ -703,7 +706,7 @@ public DeletedARCRecordIOException(final String reason) { * @return Status code. */ public int getStatusCode() { - return (this.httpStatus == null)? -1: this.httpStatus.getStatusCode(); + return statusCode; } /** @@ -732,7 +735,7 @@ public ARCRecordMetaData getMetaData() { /** * @return http headers (Only available after header has been read). */ - public Header [] getHttpHeaders() { + public HttpHeader[] getHttpHeaders() { return this.httpHeaders; } @@ -832,4 +835,4 @@ protected String getDigest4Cdx(ArchiveRecordHeader h) { } return (result != null) ? result: super.getDigest4Cdx(h); } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java index 3f617041..2a187477 100644 --- a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java +++ b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java @@ -27,13 +27,14 @@ import org.archive.io.ArchiveRecordHeader; +import static org.archive.format.arc.ARCConstants.*; /** * An immutable class to hold an ARC record meta data. * * @author stack */ -public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants { +public class ARCRecordMetaData implements ArchiveRecordHeader { /** * Map of record header fields. * @@ -168,6 +169,13 @@ public String getVersion() { return (String)this.headerFields.get(VERSION_FIELD_KEY); } + /** + * @return Arcfile origin code. + */ + public String getOrigin() { + return (String)this.headerFields.get(ORIGIN_FIELD_KEY); + } + /** * @return Offset into arcfile at which this record begins. */ @@ -264,4 +272,4 @@ public int getContentBegin() { protected void setContentBegin(final int offset) { this.contentBegin = offset; } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java index 985457e2..05c15abb 100644 --- a/src/main/java/org/archive/io/arc/ARCUtils.java +++ b/src/main/java/org/archive/io/arc/ARCUtils.java @@ -27,12 +27,15 @@ import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; +import java.util.Locale; import org.archive.url.UsableURI; import org.archive.util.zip.GzipHeader; import org.archive.util.zip.NoGzipMagicException; -public class ARCUtils implements ARCConstants { +import static org.archive.format.arc.ARCConstants.*; + +public class ARCUtils { /** * @param pathOrUri Path or URI to extract arc filename from. * @return Extracted arc file name. @@ -92,7 +95,7 @@ public static boolean testCompressedARCFile(File arcFile, throws IOException { boolean compressedARCFile = false; isReadable(arcFile); - if(!skipSuffixCheck && !arcFile.getName().toLowerCase() + if(!skipSuffixCheck && !arcFile.getName().toLowerCase(Locale.ROOT) .endsWith(COMPRESSED_ARC_FILE_EXTENSION)) { return compressedARCFile; } @@ -195,7 +198,7 @@ public static boolean testUncompressedARCFile(File arcFile) throws IOException { boolean uncompressedARCFile = false; isReadable(arcFile); - if(arcFile.getName().toLowerCase().endsWith(ARC_FILE_EXTENSION)) { + if(arcFile.getName().toLowerCase(Locale.ROOT).endsWith(ARC_FILE_EXTENSION)) { FileInputStream fis = new FileInputStream(arcFile); try { byte [] b = new byte[ARC_MAGIC_NUMBER.length()]; diff --git a/src/main/java/org/archive/io/arc/ARCWriter.java b/src/main/java/org/archive/io/arc/ARCWriter.java index b5825d50..82d13e9f 100644 --- a/src/main/java/org/archive/io/arc/ARCWriter.java +++ b/src/main/java/org/archive/io/arc/ARCWriter.java @@ -42,6 +42,7 @@ import org.archive.util.DevUtils; import org.archive.util.MimetypeUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Write ARC files. @@ -86,7 +87,7 @@ * write our own GZIP*Streams, ones that resettable and consious of gzip * members. * - *

This class will write until we hit >= maxSize. The check is done at + *

This class will write until we hit >= maxSize. The check is done at * record boundary. Records do not span ARC files. We will then close current * file and open another and then continue writing. * @@ -95,9 +96,9 @@ * alexa * ARC c-tools: *

- * % av_procarc hx20040109230030-0.arc.gz | av_ziparc > \
+ * % av_procarc hx20040109230030-0.arc.gz | av_ziparc > \
  *     /tmp/hx20040109230030-0.dat.gz
- * % av_ripdat /tmp/hx20040109230030-0.dat.gz > /tmp/hx20040109230030-0.cdx
+ * % av_ripdat /tmp/hx20040109230030-0.dat.gz > /tmp/hx20040109230030-0.cdx
  * 
* Examine the produced cdx file to make sure it makes sense. Search * for 'no-type 0'. If found, then we're opening a gzip record w/o data to @@ -110,7 +111,7 @@ * * @author stack */ -public class ARCWriter extends WriterPoolMember implements ARCConstants, Closeable { +public class ARCWriter extends WriterPoolMember implements Closeable { private static final Logger logger = Logger.getLogger(ARCWriter.class.getName()); @@ -129,12 +130,7 @@ public class ARCWriter extends WriterPoolMember implements ARCConstants, Closeab * @param serialNo used to generate unique file name sequences * @param out Where to write. * @param arc File the out is connected to. - * @param cmprs Compress the content written. - * @param metadata File meta data. Can be null. Is list of File and/or - * String objects. - * @param a14DigitDate If null, we'll write current time. - * @throws IOException - */ +2 */ public ARCWriter(final AtomicInteger serialNo, final PrintStream out, final File arc, final WriterPoolSettings settings) throws IOException { diff --git a/src/main/java/org/archive/io/warc/WARCConstants.java b/src/main/java/org/archive/io/warc/WARCConstants.java deleted file mode 100644 index 83cc8a6d..00000000 --- a/src/main/java/org/archive/io/warc/WARCConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io.warc; - -@Deprecated -public interface WARCConstants extends org.archive.format.warc.WARCConstants { -} diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index a34854ef..34583e58 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.util.Iterator; import java.util.List; +import java.util.Locale; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; @@ -31,17 +32,19 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; -import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.lang3.NotImplementedException; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; +import static org.archive.format.warc.WARCConstants.*; + /** * WARCReader. * Go via {@link WARCReaderFactory} to get instance. * @author stack * @version $Date: 2006-11-27 18:03:03 -0800 (Mon, 27 Nov 2006) $ $Version$ */ -public class WARCReader extends ArchiveReader implements WARCConstants { +public class WARCReader extends ArchiveReader { protected WARCReader() { super(); } @@ -196,7 +199,6 @@ public static void main(String [] args) Options options = getOptions(); PosixParser parser = new PosixParser(); CommandLine cmdline = parser.parse(options, args, false); - @SuppressWarnings("unchecked") List cmdlineArgs = cmdline.getArgList(); Option [] cmdlineOptions = cmdline.getOptions(); HelpFormatter formatter = new HelpFormatter(); @@ -231,7 +233,7 @@ public static void main(String [] args) break; case 'f': - format = cmdlineOptions[i].getValue().toLowerCase(); + format = cmdlineOptions[i].getValue().toLowerCase(Locale.ROOT); boolean match = false; // List of supported formats. final String [] supportedFormats = @@ -284,4 +286,4 @@ public static void main(String [] args) } } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index 9c6c7e77..70b80340 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -26,17 +26,19 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import java.util.Locale; import org.archive.io.ArchiveReader; import org.archive.io.ArchiveReaderFactory; import org.archive.io.ArchiveRecord; -import org.archive.io.warc.WARCConstants; import org.archive.util.ArchiveUtils; import org.archive.util.FileUtils; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.CountingInputStream; +import static org.archive.format.warc.WARCConstants.*; + /** * Factory for WARC Readers. * Figures whether to give out a compressed file Reader or an uncompressed @@ -44,8 +46,7 @@ * @author stack * @version $Date: 2006-08-23 17:59:04 -0700 (Wed, 23 Aug 2006) $ $Version$ */ -public class WARCReaderFactory extends ArchiveReaderFactory -implements WARCConstants { +public class WARCReaderFactory extends ArchiveReaderFactory { private static final WARCReaderFactory factory = new WARCReaderFactory(); /** @@ -100,12 +101,20 @@ public static ArchiveReader get(final String s, final InputStream is, atFirstRecord); } + /* + * Note that the ARC companion does this differently, with quite a lot of duplication. + * + * @see org.archive.io.arc.ARCReaderFactory.getArchiveReader(String, InputStream, boolean) + */ protected ArchiveReader getArchiveReader(final String f, final InputStream is, final boolean atFirstRecord) throws IOException { - // For now, assume stream is compressed. Later add test of input - // stream or handle exception thrown when figure not compressed stream. - return new CompressedWARCReader(f, is, atFirstRecord); + // Check if it's compressed, based on file extension. + if( f.endsWith(".gz") ) { + return new CompressedWARCReader(f, is, atFirstRecord); + } else { + return new UncompressedWARCReader(f, is); + } } public static WARCReader get(final URL arcUrl, final long offset) @@ -299,9 +308,9 @@ protected void gotoEOR(ArchiveRecord rec) throws IOException { public static boolean isWARCSuffix(final String f) { return (f == null)? false: - (f.toLowerCase().endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))? + (f.toLowerCase(Locale.ROOT).endsWith(DOT_COMPRESSED_WARC_FILE_EXTENSION))? true: - (f.toLowerCase().endsWith(DOT_WARC_FILE_EXTENSION))? + (f.toLowerCase(Locale.ROOT).endsWith(DOT_WARC_FILE_EXTENSION))? true: false; } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/warc/WARCRecord.java b/src/main/java/org/archive/io/warc/WARCRecord.java index 635d1c3b..21f662ea 100644 --- a/src/main/java/org/archive/io/warc/WARCRecord.java +++ b/src/main/java/org/archive/io/warc/WARCRecord.java @@ -29,19 +29,22 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpParser; +import org.archive.format.http.HttpHeader; import org.archive.io.ArchiveRecord; import org.archive.io.ArchiveRecordHeader; import org.archive.util.LaxHttpParser; +import static org.archive.format.ArchiveFileConstants.ABSOLUTE_OFFSET_KEY; +import static org.archive.format.ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY; +import static org.archive.format.warc.WARCConstants.*; + /** * A WARC file Record. * * @author stack */ -public class WARCRecord extends ArchiveRecord implements WARCConstants { +public class WARCRecord extends ArchiveRecord { private Pattern WHITESPACE = Pattern.compile("\\s"); /** @@ -123,7 +126,7 @@ protected ArchiveRecordHeader parseHeaders(final InputStream in, // keep count of bytes read, digest and fail properly if EOR too soon... // We don't want digesting while reading Headers. // - Header [] h = LaxHttpParser.parseHeaders(in, WARC_HEADER_ENCODING); + HttpHeader[] h = LaxHttpParser.parseHeaders(in, WARC_HEADER_ENCODING); for (int i = 0; i < h.length; i++) { m.put(h[i].getName(), h[i].getValue()); } diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index b9558263..65eb3346 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -36,13 +36,16 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang.StringUtils; -import org.archive.io.ArchiveFileConstants; -import org.archive.io.UTF8Bytes; +import org.apache.commons.lang3.StringUtils; +import org.archive.format.ArchiveFileConstants; import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; import org.archive.util.anvl.Element; +import static org.archive.format.warc.WARCConstants.*; + +import static java.nio.charset.StandardCharsets.UTF_8; + /** * WARC implementation. @@ -53,11 +56,10 @@ * *

While being written, WARCs have a '.open' suffix appended. * - * @contributor stack + * @author stack * @version $Revision: 4604 $ $Date: 2006-09-05 22:38:18 -0700 (Tue, 05 Sep 2006) $ */ -public class WARCWriter extends WriterPoolMember -implements WARCConstants { +public class WARCWriter extends WriterPoolMember { public static final String TOTALS = "totals"; public static final String SIZE_ON_DISK = "sizeOnDisk"; public static final String TOTAL_BYTES = "totalBytes"; @@ -81,7 +83,7 @@ public class WARCWriter extends WriterPoolMember /** * Temporarily accumulates stats managed externally by - * {@link WARCWriterProcessor}. WARCWriterProcessor will call + * WARCWriterProcessor. WARCWriterProcessor will call * {@link #resetTmpStats()}, write some records, then add * {@link #getTmpStats()} into its long-term running totals. */ @@ -97,9 +99,6 @@ public class WARCWriter extends WriterPoolMember * @param serialNo used to generate unique file name sequences * @param out Where to write. * @param f File the out is connected to. - * @param cmprs Compress the content written. - * @param a14DigitDate If null, we'll write current time. - * @throws IOException */ public WARCWriter(final AtomicInteger serialNo, final OutputStream out, final File f, @@ -110,13 +109,6 @@ public WARCWriter(final AtomicInteger serialNo, /** * Constructor. - * - * @param dirs Where to drop files. - * @param prefix File prefix to use. - * @param cmprs Compress the records written. - * @param maxSize Maximum size for ARC files written. - * @param suffix File tail to use. If null, unused. - * @param warcinfoData File metadata for warcinfo record. */ public WARCWriter(final AtomicInteger serialNo, final WARCWriterPoolSettings settings) { @@ -236,8 +228,8 @@ public void writeRecord(WARCRecordInfo recordInfo) long totalBytes = 0; long startPosition; - try { - startPosition = getPosition(); + startPosition = getPosition(); + try { preWriteRecordTasks(); // TODO: Revisit encoding of header. @@ -245,10 +237,11 @@ public void writeRecord(WARCRecordInfo recordInfo) write(bytes); totalBytes += bytes.length; + // Write out the header/body separator. + write(CRLF_BYTES); + totalBytes += CRLF_BYTES.length; + if (recordInfo.getContentStream() != null && recordInfo.getContentLength() > 0) { - // Write out the header/body separator. - write(CRLF_BYTES); // TODO: should this be written even for zero-length? - totalBytes += CRLF_BYTES.length; contentBytes += copyFrom(recordInfo.getContentStream(), recordInfo.getContentLength(), recordInfo.getEnforceLength()); @@ -260,13 +253,12 @@ public void writeRecord(WARCRecordInfo recordInfo) write(CRLF_BYTES); totalBytes += 2 * CRLF_BYTES.length; - tally(recordInfo.getType(), contentBytes, totalBytes, getPosition() - startPosition); - recordInfo.setWARCFilename(getFilenameWithoutOccupiedSuffix()); recordInfo.setWARCFileOffset(startPosition); tmpRecordLog.add(recordInfo); } finally { postWriteRecordTasks(); + tally(recordInfo.getType(), contentBytes, totalBytes, getPosition() - startPosition); } } @@ -353,9 +345,9 @@ public URI writeWarcinfoRecord(String filename, final String description) recordInfo.setMimetype("application/warc-fields"); // Strip .open suffix if present. - if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { + if (filename.endsWith(ArchiveFileConstants.OCCUPIED_SUFFIX)) { filename = filename.substring(0, - filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); + filename.length() - ArchiveFileConstants.OCCUPIED_SUFFIX.length()); } recordInfo.addExtraHeader(HEADER_KEY_FILENAME, filename); if (description != null && description.length() > 0) { @@ -366,12 +358,12 @@ public URI writeWarcinfoRecord(String filename, final String description) byte [] warcinfoBody = null; if (settings.getMetadata() == null) { // TODO: What to write into a warcinfo? What to associate? - warcinfoBody = "TODO: Unimplemented".getBytes(); + warcinfoBody = "TODO: Unimplemented".getBytes(UTF_8); } else { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (final Iterator i = settings.getMetadata().iterator(); i.hasNext();) { - baos.write(i.next().toString().getBytes(UTF8Bytes.UTF8)); + baos.write(i.next().toString().getBytes(UTF_8)); } warcinfoBody = baos.toByteArray(); } diff --git a/src/main/java/org/archive/io/warc/WARCWriterPool.java b/src/main/java/org/archive/io/warc/WARCWriterPool.java index fdc97162..b94025df 100644 --- a/src/main/java/org/archive/io/warc/WARCWriterPool.java +++ b/src/main/java/org/archive/io/warc/WARCWriterPool.java @@ -26,8 +26,8 @@ /** * A pool of WARCWriters. - * @contributor stack - * @contributor gojomo + * @author stack + * @author gojomo * @version $Revision: 4566 $ $Date: 2006-08-31 09:51:41 -0700 (Thu, 31 Aug 2006) $ */ public class WARCWriterPool extends WriterPool { diff --git a/src/main/java/org/archive/io/warc/package.html b/src/main/java/org/archive/io/warc/package.html index f52aa95b..d3631d54 100644 --- a/src/main/java/org/archive/io/warc/package.html +++ b/src/main/java/org/archive/io/warc/package.html @@ -16,8 +16,8 @@

Implementation Notes

Tools

Initial implementations of Arc2Warc and Warc2Arc -tools can be found in the package above this one, at -{@link org.archive.io.Arc2Warc} and {@link org.archive.io.Warc2Arc} +tools can be found in Heritrix, at +org.archive.io.Arc2Warc and org.archive.io.Warc2Arc respectively. Pass --help to learn how to use each tool.

diff --git a/src/main/java/org/archive/net/PublicSuffixes.java b/src/main/java/org/archive/net/PublicSuffixes.java index eab8081a..79130332 100644 --- a/src/main/java/org/archive/net/PublicSuffixes.java +++ b/src/main/java/org/archive/net/PublicSuffixes.java @@ -22,21 +22,24 @@ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.archive.util.TextUtils; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Utility class for making use of the information about 'public suffixes' at * http://publicsuffix.org. @@ -74,7 +77,7 @@ public class PublicSuffixes { * prefix tree node. each Node represents sequence of letters (prefix) * and alternative sequences following it (list of Node's). Nodes in * {@code branches} are sorted for skip list like lookup and for generating - * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char).) + * effective regular expression (see {@link #compareTo(Node)} and {@link #compareTo(char)}. * * as is intended for internal use only, there's no access methods. procedures for updating * prefix tree with new input are defined within this class ({@link #addBranch(CharSequence)}). @@ -121,6 +124,7 @@ public boolean add(CharSequence s) { i++; // zero-length match holds only when both cs and s are empty. if (i == 0) return cs.length() == 0 && s.length() == 0; + // cs is longer than s, so we need to replace cs with a prefix, and add a branch if (i < cs.length()) { CharSequence cs0 = cs.subSequence(0, i); CharSequence cs1 = cs.subSequence(i, cs.length()); @@ -128,10 +132,21 @@ public boolean add(CharSequence s) { cs = cs0; Node alt1 = new Node(cs1, branches); (branches = new ArrayList()).add(alt1); - addBranch(cs2); + if(cs2.length() == 0) { + // if cs2 is empty, we have a terminal node. + branches.add(new Node("", null)); + } else { + // otherwise, we have a new branch. + addBranch(cs2); + } + } else { - assert i == cs.length(); - addBranch(s.subSequence(i, s.length())); + // s is longer than cs, so we need to add a branch + if(i != s.length()) { + // but not if they are equal. + assert i == cs.length(); + addBranch(s.subSequence(i, s.length())); + } } return true; } @@ -172,12 +187,12 @@ public static void main(String args[]) throws IOException { InputStream is; if (args.length == 0 || "=".equals(args[0])) { // use bundled list - is = PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"); + is = PublicSuffixes.class.getResourceAsStream( + "/org/archive/effective_tld_names.dat"); } else { is = new FileInputStream(args[0]); } - BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8")); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8)); String regex = getTopmostAssignedSurtPrefixRegex(reader); IOUtils.closeQuietly(is); @@ -185,11 +200,11 @@ public static void main(String args[]) throws IOException { BufferedWriter writer; if (args.length >= 2) { // write to specified file - writer = new BufferedWriter(new FileWriter(args[1])); + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), UTF_8)); needsClose = true; } else { // write to stdout - writer = new BufferedWriter(new OutputStreamWriter(System.out)); + writer = new BufferedWriter(new OutputStreamWriter(System.out, Charset.defaultCharset())); } writer.append(regex); writer.flush(); @@ -219,7 +234,7 @@ protected static Node readPublishedFileToSurtTrie(BufferedReader reader) throws // discard utf8 notation after entry line = line.split("\\s+")[0]; // TODO: maybe we don't need to create lower-cased String - line = line.toLowerCase(); + line = line.toLowerCase(Locale.ROOT); // SURT-order domain segments String[] segs = line.split("\\."); StringBuilder sb = new StringBuilder(); @@ -265,7 +280,7 @@ protected static void buildRegex(Node alt, StringBuilder sb) { sb.append("(?="); close = ")"; } else if (c == '*') { - sb.append("[-\\w]+"); + sb.append("[-\\w\\u00C0-\\u017F]+"); } else { sb.append(c); } @@ -304,7 +319,7 @@ private static String surtPrefixRegexFromTrie(Node trie) { regex.append("(?ix)^\n"); trie.addBranch("*,"); // for new/unknown TLDs buildRegex(trie, regex); - regex.append("\n([-\\w]+,)"); + regex.append("\n([-\\w\\u00C0-\\u017F]+,)"); return regex.toString(); } @@ -319,16 +334,11 @@ public static synchronized Pattern getTopmostAssignedSurtPrefixPattern() { public static synchronized String getTopmostAssignedSurtPrefixRegex() { if (topmostAssignedSurtPrefixRegex == null) { // use bundled list - try { - BufferedReader reader = new BufferedReader(new InputStreamReader( - PublicSuffixes.class.getClassLoader().getResourceAsStream( - "effective_tld_names.dat"), "UTF-8")); - topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); - IOUtils.closeQuietly(reader); - } catch (UnsupportedEncodingException ex) { - // should never happen - throw new RuntimeException(ex); - } + BufferedReader reader = new BufferedReader(new InputStreamReader( + PublicSuffixes.class.getResourceAsStream( + "/org/archive/effective_tld_names.dat"), UTF_8)); + topmostAssignedSurtPrefixRegex = getTopmostAssignedSurtPrefixRegex(reader); + IOUtils.closeQuietly(reader); } return topmostAssignedSurtPrefixRegex; } diff --git a/src/main/java/org/archive/resource/AbstractResource.java b/src/main/java/org/archive/resource/AbstractResource.java index 409e7408..301c53d4 100755 --- a/src/main/java/org/archive/resource/AbstractResource.java +++ b/src/main/java/org/archive/resource/AbstractResource.java @@ -5,7 +5,7 @@ import org.archive.util.StreamCopy; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; public abstract class AbstractResource implements Resource { protected ResourceContainer container; @@ -44,7 +44,7 @@ public static void dumpShort(PrintStream out, Resource resource) throws IOExcept // out.println("Headers Before"); // out.print(m.toString()); - long bytes = StreamCopy.copy(resource.getInputStream(), new NullOutputStream()); + long bytes = StreamCopy.copy(resource.getInputStream(), ByteStreams.nullOutputStream()); out.println("Resource Was:"+bytes+" Long"); out.println("[\n]Headers After"); diff --git a/src/main/java/org/archive/resource/MetaData.java b/src/main/java/org/archive/resource/MetaData.java index a975b0d4..9b7747d8 100755 --- a/src/main/java/org/archive/resource/MetaData.java +++ b/src/main/java/org/archive/resource/MetaData.java @@ -7,6 +7,15 @@ import org.json.JSONObject; import org.json.JSONTokener; +/** + * A nested structure of {@linkplain JSONObject}s to hold the metadata of + * content in nested containers, e.g. a HTML page as payload of a HTTP response + * in a WARC record stored as gzip "member". + * + * MetaData is multi-valued: if a second value is added under the same "key" + * ("name"), both values are stored in a {@linkplain JSONArray} as value. This + * allows to hold all values of repeating WARC or HTTP headers. + */ public class MetaData extends JSONObject { private static final Logger LOG = @@ -19,8 +28,8 @@ public MetaData(MetaData parentMetaData, String name) { this.topMetaData = this; } else { topMetaData = parentMetaData.topMetaData; + parentMetaData.putChild(name, this); } - parentMetaData.putChild(name, this); } public MetaData(String jsonString) throws JSONException { @@ -67,6 +76,18 @@ public int getInt(String key) { } } + @Override + public int optInt(String key, int defaultValue) { + if (has(key)) { + try { + return super.getInt(key); + } catch(JSONException e) { + LOG.severe(e.getMessage()); + } + } + return defaultValue; + } + @Override public long getLong(String key) { try { @@ -77,6 +98,18 @@ public long getLong(String key) { } } + @Override + public long optLong(String key, long defaultValue) { + if (has(key)) { + try { + return super.getLong(key); + } catch(JSONException e) { + LOG.severe(e.getMessage()); + } + } + return defaultValue; + } + @Override public String getString(String key) { try { @@ -102,9 +135,49 @@ public void setTopMetaData(MetaData topMetaData) { this.topMetaData = topMetaData; } + @Override + public JSONObject put(String name, boolean value) throws JSONException { + return super.accumulate(name, value); + } + + @Override + public JSONObject put(String name, double value) throws JSONException { + return super.accumulate(name, value); + } + + @Override + public JSONObject put(String name, int value) throws JSONException { + return super.accumulate(name, value); + } + + @Override + public JSONObject put(String name, long value) throws JSONException { + return super.accumulate(name, value); + } + + @Override + public JSONObject put(String key, Object value) { + if (value instanceof JSONArray) { + super.remove(key); + super.put(key, value); + } else if (has(key)) { + if (super.get(key) instanceof JSONArray) { + ((JSONArray) super.get(key)).put(value); + return this; + } else { + JSONArray array = new JSONArray(); + array.put(super.get(key)); + array.put(value); + super.put(key, array); + } + return super.accumulate(key, value); + } + return super.put(key, value); + } + public JSONObject putString(String key, String val) { try { - return super.put(key,val); + return super.accumulate(key,val); } catch(JSONException e) { LOG.severe(e.getMessage()); return null; @@ -113,7 +186,7 @@ public JSONObject putString(String key, String val) { public JSONObject putLong(String key, long val) { try { - return super.put(key,String.valueOf(val)); + return super.accumulate(key,String.valueOf(val)); } catch(JSONException e) { LOG.severe(e.getMessage()); return null; @@ -122,7 +195,7 @@ public JSONObject putLong(String key, long val) { public JSONObject putBoolean(String key, boolean val) { try { - return super.put(key,val); + return super.accumulate(key,val); } catch(JSONException e) { LOG.severe(e.getMessage()); return null; diff --git a/src/main/java/org/archive/resource/ResourceConstants.java b/src/main/java/org/archive/resource/ResourceConstants.java index dd04fcfe..3b8bea1c 100644 --- a/src/main/java/org/archive/resource/ResourceConstants.java +++ b/src/main/java/org/archive/resource/ResourceConstants.java @@ -31,6 +31,7 @@ public interface ResourceConstants { public static final String ENVELOPE_FORMAT = "Format"; public static final String ENVELOPE_FORMAT_ARC = "ARC"; public static final String ENVELOPE_FORMAT_WARC = "WARC"; + public static final String ENVELOPE_FORMAT_WARC_1_0 = "WARC/1.0"; public static final String WARC_HEADER_LENGTH = "WARC-Header-Length"; public static final String WARC_HEADER_METADATA = "WARC-Header-Metadata"; @@ -104,7 +105,7 @@ public interface ResourceConstants { public static final String HTTP_ENTITY_LENGTH = "Entity-Length"; public static final String HTTP_ENTITY_DIGEST = "Entity-Digest"; - public static final String HTTP_ENTITY_TRAILING_SLOP = "Entity-Trailing-Slop-Bytes"; + public static final String HTTP_ENTITY_TRAILING_SLOP = "Entity-Trailing-Slop-Length"; public static final String HTML_METADATA = "HTML-Metadata"; public static final String HTML_HEAD = "Head"; diff --git a/src/main/java/org/archive/resource/ResourceFactory.java b/src/main/java/org/archive/resource/ResourceFactory.java index 64b4eb4e..cddc8934 100755 --- a/src/main/java/org/archive/resource/ResourceFactory.java +++ b/src/main/java/org/archive/resource/ResourceFactory.java @@ -12,12 +12,6 @@ public interface ResourceFactory { /** * Attempts to create a Resource from the InputStream - * @param is - * @param metaData - * @param container - * @return - * @throws ResourceParseException - * @throws IOException */ public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) diff --git a/src/main/java/org/archive/resource/arc/ARCResource.java b/src/main/java/org/archive/resource/arc/ARCResource.java index 5d63fd4d..b0195f08 100644 --- a/src/main/java/org/archive/resource/arc/ARCResource.java +++ b/src/main/java/org/archive/resource/arc/ARCResource.java @@ -18,8 +18,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class ARCResource extends AbstractResource @@ -54,7 +54,7 @@ public ARCResource(MetaData metaData, ResourceContainer container, fields.putLong(DECLARED_LENGTH_KEY, arcMetaData.getLength()); countingIS = new CountingInputStream( - new LimitInputStream(raw, arcMetaData.getLength())); + ByteStreams.limit(raw, arcMetaData.getLength())); try { digIS = new DigestInputStream(countingIS, @@ -64,10 +64,12 @@ public ARCResource(MetaData metaData, ResourceContainer container, } } + @Override public InputStream getInputStream() { return new EOFNotifyingInputStream(digIS, this); } + @Override public void notifyEOF() throws IOException { metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); String digString = Base32.encode(digIS.getMessageDigest().digest()); diff --git a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java index 812a3f0d..b111dc1e 100644 --- a/src/main/java/org/archive/resource/generic/GenericResourceProducer.java +++ b/src/main/java/org/archive/resource/generic/GenericResourceProducer.java @@ -1,6 +1,7 @@ package org.archive.resource.generic; import java.io.IOException; +import java.util.Locale; import org.archive.resource.MetaData; import org.archive.resource.Resource; @@ -45,6 +46,6 @@ public void close() throws IOException { stream.close(); } public String getContext() { - return String.format("Context(%s)(%d)", name, stream.getOffset()); + return String.format(Locale.ROOT, "Context(%s)(%d)", name, stream.getOffset()); } } diff --git a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java index 0fc18162..1058b01b 100644 --- a/src/main/java/org/archive/resource/gzip/GZIPMetaData.java +++ b/src/main/java/org/archive/resource/gzip/GZIPMetaData.java @@ -15,6 +15,8 @@ import org.json.JSONException; import org.json.JSONObject; +import static java.nio.charset.StandardCharsets.UTF_8; + public class GZIPMetaData extends MetaData implements ResourceConstants { private static final Logger LOG = Logger.getLogger(GZIPMetaData.class.getName()); @@ -26,7 +28,7 @@ public void setData(GZIPSeriesMember member) { GZIPHeader header = member.getHeader(); GZIPStaticHeader staticH = header.getStaticHeader(); if(staticH.isFNameSet()) { - putString(GZIP_FILENAME,new String(header.getFileName(),"UTF-8")); + putString(GZIP_FILENAME, new String(header.getFileName(), UTF_8)); } if(staticH.isFCommentSet()) { putLong(GZIP_COMMENT_LENGTH,header.getCommentLength()); @@ -39,7 +41,7 @@ public void setData(GZIPSeriesMember member) { for(int i = 0; i < records; i++) { GZIPFExtraRecord rec = header.getRecord(i); JSONObject recJO = new JSONObject(); - String name = new String(rec.getName(),"UTF-8"); + String name = new String(rec.getName(), UTF_8); recJO.put(GZIP_FEXTRA_NAME, name); if(name.equals("SL") || name.equals("LX")) { recJO.put(GZIP_FEXTRA_VALUE, ByteOp.bytesToInt(rec.getValue())); @@ -55,8 +57,6 @@ public void setData(GZIPSeriesMember member) { putLong(GZIP_INFLATED_CRC,footer.getCRC()); putLong(GZIP_INFLATED_LENGTH,footer.getLength()); - } catch (UnsupportedEncodingException e) { - LOG.warning(e.getMessage()); } catch (JSONException e) { LOG.warning(e.getMessage()); } diff --git a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java index 39611ab8..5267a0f9 100644 --- a/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java +++ b/src/main/java/org/archive/resource/gzip/GZIPResourceContainer.java @@ -1,6 +1,7 @@ package org.archive.resource.gzip; import java.io.IOException; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; @@ -54,6 +55,6 @@ public void close() throws IOException { series.close(); } public String getContext() { - return String.format("Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset()); + return String.format(Locale.ROOT, "Context(%s)(%d)", series.getStreamContext(), series.getCurrentMemberStartOffset()); } } diff --git a/src/main/java/org/archive/resource/html/ExtractingParseObserver.java b/src/main/java/org/archive/resource/html/ExtractingParseObserver.java index e1f57b55..ab439d5c 100644 --- a/src/main/java/org/archive/resource/html/ExtractingParseObserver.java +++ b/src/main/java/org/archive/resource/html/ExtractingParseObserver.java @@ -2,12 +2,17 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.Stack; +import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.archive.format.text.html.ParseObserver; +import org.htmlparser.Attribute; import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; import org.htmlparser.nodes.TextNode; @@ -21,27 +26,41 @@ public class ExtractingParseObserver implements ParseObserver { boolean inTitle = false; protected static String cssUrlPatString = - "url\\s*\\(\\s*([\\\\\"']*.+?[\\\\\"']*)\\s*\\)"; + "url\\s*\\(\\s*((?:\\\\?[\"'])?.+?(?:\\\\?[\"'])?)\\s*\\)"; + protected static String cssUrlTrimPatString = + "^(?:\\\\?[\"'])+|(?:\\\\?[\"'])+$"; protected static String cssImportNoUrlPatString = - "@import\\s+(('[^']+')|(\"[^\"]+\")|(\\('[^']+'\\))|(\\(\"[^\"]+\"\\))|(\\([^)]+\\))|([a-z0-9_.:/\\\\-]+))\\s*;"; + "@import\\s+((?:'[^']+')|(?:\"[^\"]+\")|(?:\\('[^']+'\\))|(?:\\(\"[^\"]+\"\\))|(?:\\([^)]+\\))|(?:[a-z0-9_.:/\\\\-]+))\\s*;"; protected static Pattern cssImportNoUrlPattern = Pattern .compile(cssImportNoUrlPatString); protected static Pattern cssUrlPattern = Pattern.compile(cssUrlPatString); + + protected static Pattern cssUrlTrimPattern = Pattern.compile(cssUrlTrimPatString); + + protected static String jsOnClickUrl1PatString = + "(?i)^(?:javascript:)?(?:(?:window|top|document|self|parent)\\.)?location(?:\\.href)?\\s*=\\s*('|')([^'\"]{3,256})\\1$"; + protected static String jsOnClickUrl2PatString = + "(?i)^(?:javascript:)?(?:window|parent)\\.open\\((['\"]|')([^\"']{3,256}?)\\1[,)]"; + protected static Pattern[] jsOnClickUrlPatterns = { + Pattern.compile(jsOnClickUrl1PatString), + Pattern.compile(jsOnClickUrl2PatString) + }; + private final static int MAX_TEXT_LEN = 100; -// private static String GLOBAL_ATTR[] = {"background"}; - private static final String PATH = "path"; private static final String PATH_SEPARATOR = "@/"; - private final static Map extractors; + private static final Map extractors; + private static final Set globalHrefAttributes; static { extractors = new HashMap(); extractors.put("A", new AnchorTagExtractor()); extractors.put("APPLET", new AppletTagExtractor()); extractors.put("AREA", new AreaTagExtractor()); extractors.put("BASE", new BaseTagExtractor()); + extractors.put("DIV", new DivTagExtractor()); extractors.put("EMBED", new EmbedTagExtractor()); extractors.put("FORM", new FormTagExtractor()); extractors.put("FRAME", new FrameTagExtractor()); @@ -52,6 +71,22 @@ public class ExtractingParseObserver implements ParseObserver { extractors.put("META", new MetaTagExtractor()); extractors.put("OBJECT", new ObjectTagExtractor()); extractors.put("SCRIPT", new ScriptTagExtractor()); + extractors.put("Q", new QuotationLinkTagExtractor()); + extractors.put("BLOCKQUOTE", new QuotationLinkTagExtractor()); + extractors.put("DEL", new QuotationLinkTagExtractor()); + extractors.put("INS", new QuotationLinkTagExtractor()); + // HTML5: + extractors.put("BUTTON", new ButtonTagExtractor()); + extractors.put("MENUITEM", new MenuitemTagExtractor()); + extractors.put("VIDEO", new EmbedVideoTagExtractor()); + extractors.put("AUDIO", new EmbedTagExtractor()); + extractors.put("TRACK", new EmbedTagExtractor()); + extractors.put("SOURCE", new EmbedTagExtractor()); + + globalHrefAttributes = new HashSet(); + globalHrefAttributes.add("background"); + globalHrefAttributes.add("data-href"); + globalHrefAttributes.add("data-uri"); } @@ -79,11 +114,19 @@ public void handleTagOpen(TagNode tag) { inTitle = !tag.isEmptyXmlTag(); return; } + // first the global attributes: - // background - String v = tag.getAttribute("background"); - if(v != null) { - data.addHref(PATH,makePath(name,"background"),"url",v); + Vector attributes = tag.getAttributesEx(); + for (Attribute a : attributes) { + String attrName = a.getName(); + String attrValue = a.getValue(); + if (attrName == null || attrValue == null) { + continue; + } + attrName = attrName.toLowerCase(Locale.ROOT); + if (globalHrefAttributes.contains(attrName)) { + data.addHref(PATH,makePath(name,attrName),"url",attrValue); + } } // TODO: style attribute, BASE(href) tag, Resolve URLs @@ -235,7 +278,20 @@ private static void addHrefWithAttrs(HTMLMetaData data, TagNode node, if(l != null) { data.addHref(l); } - } + } + + private static void addHrefsOnclick(HTMLMetaData data, TagNode node) { + String onclick = node.getAttribute("onclick"); + if (onclick != null) { + String path = makePath(node.getTagName(), "onclick"); + for (Pattern pattern : jsOnClickUrlPatterns) { + String url = patternJSExtract(pattern, onclick); + if (url != null) { + data.addHref(PATH, path, "url", url); + } + } + } + } private interface TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs); @@ -251,7 +307,7 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs l.add(makePath("A","href")); l.add("url"); l.add(url); - for(String a : new String[] {"target","alt","title"}) { + for(String a : new String[] {"target","alt","title","rel","hreflang","type"}) { String v = node.getAttribute(a); if(v != null) { l.add(a); @@ -278,7 +334,22 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs private static class AreaTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { - addBasicHrefs(data,node,"href"); + String url = node.getAttribute("href"); + if(url != null) { + ArrayList l = new ArrayList(); + l.add(PATH); + l.add(makePath("AREA","href")); + l.add("url"); + l.add(url); + for(String a : new String[] {"rel"}) { + String v = node.getAttribute(a); + if(v != null) { + l.add(a); + l.add(v); + } + } + data.addHref(l); + } } } @@ -291,12 +362,30 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs } } + private static class ButtonTagExtractor implements TagExtractor { + public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { + addBasicHrefs(data,node,"formaction"); + } + } + + private static class DivTagExtractor implements TagExtractor { + public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { + addHrefsOnclick(data,node); + } + } + private static class EmbedTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { addBasicHrefs(data,node,"src"); } } + private static class EmbedVideoTagExtractor implements TagExtractor { + public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { + addBasicHrefs(data,node,"src","poster"); + } + } + private static class FormTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { ArrayList l = new ArrayList(); @@ -324,21 +413,27 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs addBasicHrefs(data,node,"src"); } } + private static class IFrameTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { addBasicHrefs(data,node,"src"); } } + private static class ImgTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { addHrefWithAttrs(data,node,"src","alt","title"); + addBasicHrefs(data,node,"longdesc"); } } + private static class InputTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { - addBasicHrefs(data,node,"src"); + addBasicHrefs(data,node,"src","formaction"); + addHrefsOnclick(data,node); } } + private static class LinkTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { ArrayList l = getAttrListUrl(node,"href","rel","type"); @@ -347,19 +442,34 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs } } } + + private static class MenuitemTagExtractor implements TagExtractor { + public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { + addBasicHrefs(data,node,"icon"); + } + } + private static class MetaTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { - ArrayList l = getAttrList(node,"name","rel","content","http-equiv"); + ArrayList l = getAttrList(node,"name","rel","content","http-equiv","property"); if(l != null) { data.addMeta(l); } } } + private static class ObjectTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { - addBasicHrefs(data,node,"codebase","cdata"); + addBasicHrefs(data,node,"codebase","cdata","data"); + } + } + + private static class QuotationLinkTagExtractor implements TagExtractor { + public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { + addBasicHrefs(data,node,"cite"); } } + private static class ScriptTagExtractor implements TagExtractor { public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs) { ArrayList l = getAttrListUrl(node,"src","type"); @@ -368,40 +478,29 @@ public void extract(HTMLMetaData data, TagNode node, ExtractingParseObserver obs } } } + private void patternCSSExtract(HTMLMetaData data, Pattern pattern, String content) { Matcher m = pattern.matcher(content); int idx = 0; int contentLen = content.length(); - while((idx < contentLen) && m.find(idx)) { + if (contentLen > 100000) + // extract URLs only from the first 100 kB + contentLen = 100000; + while((idx < contentLen) && m.find()) { + idx = m.end(); String url = m.group(1); - int origUrlLength = url.length(); - int urlStart = m.start(1); - int urlEnd = m.end(1); - idx = urlEnd; - if(url.length() < 2) { - continue; - } - if ((url.charAt(0) == '(') - && (url.charAt(origUrlLength-1) == ')')) { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; - origUrlLength -= 2; - } - if (url.charAt(0) == '"') { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; - } else if (url.charAt(0) == '\'') { - url = url.substring(1, origUrlLength - 1); - urlStart += 1; - } else if (url.charAt(0) == '\\') { - if(url.length() == 2) - continue; - url = url.substring(2, origUrlLength - 2); - urlStart += 2; + url = cssUrlTrimPattern.matcher(url).replaceAll(""); + if (!url.isEmpty()) { + data.addHref("path","STYLE/#text","href", url); } - int urlLength = url.length(); - data.addHref("path","STYLE/#text","href",url); - idx += urlLength; } } + + private static String patternJSExtract(Pattern pattern, String content) { + Matcher m = pattern.matcher(content); + if (m.find()) { + return m.group(2); + } + return null; + } } diff --git a/src/main/java/org/archive/resource/html/HTMLMetaData.java b/src/main/java/org/archive/resource/html/HTMLMetaData.java index 024d9677..d995cf65 100644 --- a/src/main/java/org/archive/resource/html/HTMLMetaData.java +++ b/src/main/java/org/archive/resource/html/HTMLMetaData.java @@ -1,6 +1,7 @@ package org.archive.resource.html; import java.util.List; +import java.util.Locale; import java.util.logging.Logger; import org.archive.resource.MetaData; @@ -98,7 +99,7 @@ private void appendObj2(JSONObject o, String arr, String... a) { } catch(JSONException e) { try { - System.err.format("GotErr(%s) JSON(%s)(%s)", e.getMessage(), + System.err.format(Locale.ROOT, "GotErr(%s) JSON(%s)(%s)", e.getMessage(), o.toString(1),a.toString()); } catch (JSONException e1) { // TODO Auto-generated catch block diff --git a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java index 935843f1..410449a1 100644 --- a/src/main/java/org/archive/resource/html/HTMLResourceFactory.java +++ b/src/main/java/org/archive/resource/html/HTMLResourceFactory.java @@ -1,9 +1,16 @@ package org.archive.resource.html; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.util.logging.Logger; +import org.archive.format.http.HttpHeaders; +import org.archive.format.json.JSONUtils; +import org.archive.format.text.charset.CharsetDetector; +import org.archive.format.text.charset.StandardCharsetDetector; import org.archive.format.text.html.CDATALexer; import org.archive.format.text.html.LexParser; import org.archive.resource.MetaData; @@ -13,17 +20,48 @@ import org.archive.resource.ResourceParseException; import org.htmlparser.lexer.Page; import org.htmlparser.util.ParserException; +import org.json.JSONException; +import org.json.JSONObject; public class HTMLResourceFactory implements ResourceFactory { + private static final Logger LOG = Logger.getLogger(HTMLResourceFactory.class.getName()); + + protected static final int CHARSET_GUESS_CHUNK_SIZE = 8192; + protected static final String HTTP_HEADER_PATH = "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers"; + + protected CharsetDetector charSetDetector = new StandardCharsetDetector(); + + public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { HTMLMetaData hmd = new HTMLMetaData(parentMetaData); ExtractingParseObserver epo = new ExtractingParseObserver(hmd); LexParser parser = new LexParser(epo); CDATALexer lex = new CDATALexer(); - // TODO: figure out charset: - String charset = "UTF-8"; + + // guess charset based on HTTP header and sniffed content chunk + String charset = StandardCharsets.UTF_8.name(); + is = new BufferedInputStream(is, CHARSET_GUESS_CHUNK_SIZE); + byte[] chunk = new byte[CHARSET_GUESS_CHUNK_SIZE]; + is.mark(0); + int chunkSize = is.read(chunk, 0, CHARSET_GUESS_CHUNK_SIZE); + is.reset(); + if (chunkSize > 0) { + JSONObject headers = JSONUtils.extractObject(hmd.getTopMetaData(), HTTP_HEADER_PATH); + HttpHeaders httpHeaders = new HttpHeaders(); + if (headers.has("Content-Type")) { + try { + httpHeaders.add("Content-Type", headers.getString("Content-Type")); + } catch (JSONException e) { } + } + try { + charset = charSetDetector.getCharset(chunk, chunkSize, httpHeaders); + } catch (Exception e) { + LOG.severe("Failed to guess charset: " + e.getMessage()); + } + } + Page page; try { page = new Page(is, charset); diff --git a/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java b/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java index 79805090..eb25d821 100644 --- a/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java +++ b/src/main/java/org/archive/resource/http/HTTPHeadersResourceFactory.java @@ -31,6 +31,7 @@ public HTTPHeadersResourceFactory(String name, String type) { parser = new HttpHeaderParser(); } + @Override public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { @@ -40,9 +41,13 @@ public Resource getResource(InputStream is, MetaData parentMetaData, if(headers.isCorrupt()) { parentMetaData.putBoolean(HTTP_HEADERS_CORRUPT, true); } - parentMetaData.putLong(PAYLOAD_LENGTH, bytes); - - parentMetaData.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(is)); + if (!parentMetaData.has(PAYLOAD_LENGTH) || bytes != parentMetaData.getLong(PAYLOAD_LENGTH)) { + parentMetaData.putLong(PAYLOAD_LENGTH, bytes); + } + long trailingSlopBytes = StreamCopy.readToEOF(is); + if (!parentMetaData.has(PAYLOAD_SLOP_BYTES) || trailingSlopBytes > 0) { + parentMetaData.putLong(PAYLOAD_SLOP_BYTES, trailingSlopBytes); + } if(type != null) { parentMetaData.putString(PAYLOAD_CONTENT_TYPE, type); } diff --git a/src/main/java/org/archive/resource/http/HTTPResponseResource.java b/src/main/java/org/archive/resource/http/HTTPResponseResource.java index b5d189bc..cc325427 100644 --- a/src/main/java/org/archive/resource/http/HTTPResponseResource.java +++ b/src/main/java/org/archive/resource/http/HTTPResponseResource.java @@ -7,7 +7,6 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Logger; - import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; import org.archive.format.http.HttpResponseMessage; @@ -20,8 +19,8 @@ import org.archive.util.io.EOFNotifyingInputStream; import org.archive.util.io.EOFObserver; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; @@ -65,7 +64,7 @@ public HTTPResponseResource(MetaData metaData, headers.putString(h.getName(),h.getValue()); } if(forceCheck && (length >= 0)) { - LimitInputStream lis = new LimitInputStream(response, length); + InputStream lis = ByteStreams.limit(response, length); countingIS = new CountingInputStream(lis); } else { countingIS = new CountingInputStream(response); diff --git a/src/main/java/org/archive/resource/warc/WARCResource.java b/src/main/java/org/archive/resource/warc/WARCResource.java index ab9b6900..a5e5ac35 100644 --- a/src/main/java/org/archive/resource/warc/WARCResource.java +++ b/src/main/java/org/archive/resource/warc/WARCResource.java @@ -5,6 +5,7 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.Locale; import org.archive.format.http.HttpHeader; import org.archive.format.http.HttpResponse; @@ -19,8 +20,8 @@ import org.archive.util.io.EOFObserver; import org.archive.util.io.PushBackOneByteInputStream; +import com.google.common.io.ByteStreams; import com.google.common.io.CountingInputStream; -import com.google.common.io.LimitInputStream; public class WARCResource extends AbstractResource implements EOFObserver, ResourceConstants { CountingInputStream countingIS; @@ -36,14 +37,14 @@ public WARCResource(MetaData metaData, ResourceContainer container, this.response = response; long length = -1; - metaData.putString(ENVELOPE_FORMAT, ENVELOPE_FORMAT_WARC); + metaData.putString(ENVELOPE_FORMAT, ENVELOPE_FORMAT_WARC_1_0); metaData.putLong(WARC_HEADER_LENGTH, response.getHeaderBytes()); MetaData fields = metaData.createChild(WARC_HEADER_METADATA); for(HttpHeader h : response.getHeaders()) { String name = h.getName(); String value = h.getValue(); fields.putString(name,value); - if(name.toLowerCase().equals("content-length")) { + if(name.toLowerCase(Locale.ROOT).equals("content-length")) { // TODO: catch formatexception length = Long.parseLong(value); } @@ -51,9 +52,9 @@ public WARCResource(MetaData metaData, ResourceContainer container, if(length >= 0) { countingIS = new CountingInputStream( - new LimitInputStream(response, length)); + ByteStreams.limit(response, length)); } else { - throw new ResourceParseException(null); + throw new ResourceParseException(new Exception("Zero or negative length: " + length)); } try { digIS = new DigestInputStream(countingIS, @@ -63,16 +64,20 @@ public WARCResource(MetaData metaData, ResourceContainer container, } } + @Override public InputStream getInputStream() { return new EOFNotifyingInputStream(digIS, this); } + @Override public void notifyEOF() throws IOException { - envelope.putLong(PAYLOAD_LENGTH, countingIS.getCount()); String digString = Base32.encode(digIS.getMessageDigest().digest()); - envelope.putString(PAYLOAD_DIGEST, "sha1:"+digString); if(container.isCompressed()) { + if (!metaData.has(PAYLOAD_LENGTH) || countingIS.getCount() != metaData.getLong(PAYLOAD_LENGTH)) { + metaData.putLong(PAYLOAD_LENGTH, countingIS.getCount()); + } metaData.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(response)); + metaData.putString(PAYLOAD_DIGEST, "sha1:"+digString); } else { // consume trailing bytes if we can... InputStream raw = response.getInner(); @@ -81,11 +86,17 @@ public void notifyEOF() throws IOException { (PushBackOneByteInputStream) raw; long numNewlines = StreamCopy.skipChars(pb1bis, CR_NL_CHARS); if(numNewlines > 0) { + long payloadLength = countingIS.getCount(); + if (!metaData.has(PAYLOAD_LENGTH) || payloadLength != metaData.getLong(PAYLOAD_LENGTH)) { + metaData.putLong(PAYLOAD_LENGTH, payloadLength); + } metaData.putLong(PAYLOAD_SLOP_BYTES, numNewlines); + metaData.putString(PAYLOAD_DIGEST, "sha1:"+digString); } } } } + public MetaData getEnvelopeMetaData() { return envelope; } diff --git a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java index 43041efb..8cc8c146 100644 --- a/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java +++ b/src/main/java/org/archive/resource/warc/record/WARCJSONMetaDataResourceFactory.java @@ -3,7 +3,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; import org.archive.resource.MetaData; import org.archive.resource.Resource; @@ -14,9 +13,9 @@ import org.json.JSONException; import org.json.JSONTokener; -public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants { - private static final Charset UTF8 = Charset.forName("UTF-8"); +import static java.nio.charset.StandardCharsets.UTF_8; +public class WARCJSONMetaDataResourceFactory implements ResourceFactory, ResourceConstants { public WARCJSONMetaDataResourceFactory() { } @@ -27,7 +26,7 @@ public Resource getResource(InputStream is, MetaData parentMetaData, MetaData md; try { - md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF8))); + md = new MetaData(new JSONTokener(new InputStreamReader(is, UTF_8))); } catch (JSONException e) { throw new ResourceParseException(e); } diff --git a/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java b/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java index 3f502665..ba8a35da 100644 --- a/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java +++ b/src/main/java/org/archive/resource/warc/record/WARCMetaDataResourceFactory.java @@ -21,6 +21,7 @@ public WARCMetaDataResourceFactory() { parser = new HttpHeaderParser(); } + @Override public Resource getResource(InputStream is, MetaData parentMetaData, ResourceContainer container) throws ResourceParseException, IOException { @@ -33,8 +34,13 @@ public Resource getResource(InputStream is, MetaData parentMetaData, if(headers.isCorrupt()) { md.putBoolean(WARC_META_FIELDS_CORRUPT, true); } - md.putLong(PAYLOAD_SLOP_BYTES, StreamCopy.readToEOF(is)); - md.putLong(PAYLOAD_LENGTH, bytes); + long trailingSlopBytes = StreamCopy.readToEOF(is); + if (!parentMetaData.has(PAYLOAD_SLOP_BYTES) || trailingSlopBytes > 0) { + parentMetaData.putLong(PAYLOAD_SLOP_BYTES, trailingSlopBytes); + } + if (!parentMetaData.has(PAYLOAD_LENGTH) || bytes != parentMetaData.getLong(PAYLOAD_LENGTH)) { + parentMetaData.putLong(PAYLOAD_LENGTH, bytes); + } return new WARCMetaDataResource(md,container, headers); } catch (HttpParseException e) { diff --git a/src/main/java/org/archive/streamcontext/HTTP11Stream.java b/src/main/java/org/archive/streamcontext/HTTP11Stream.java index 06f51409..995dc53e 100755 --- a/src/main/java/org/archive/streamcontext/HTTP11Stream.java +++ b/src/main/java/org/archive/streamcontext/HTTP11Stream.java @@ -5,6 +5,7 @@ import java.io.InputStream; import java.net.URL; import java.net.URLConnection; +import java.util.Locale; public class HTTP11Stream extends AbstractBufferingStream { private URL url; @@ -42,7 +43,7 @@ public int doRead(byte[] b, int off, int len) throws IOException { public void doSeek(long offset) throws IOException { doClose(); conn = url.openConnection(); - conn.setRequestProperty("Range", String.format("bytes=%d-", offset)); + conn.setRequestProperty("Range", String.format(Locale.ROOT, "bytes=%d-", offset)); conn.connect(); is = conn.getInputStream(); } diff --git a/src/main/java/org/archive/uid/RecordIDGenerator.java b/src/main/java/org/archive/uid/RecordIDGenerator.java index 97f1a022..80cc5565 100644 --- a/src/main/java/org/archive/uid/RecordIDGenerator.java +++ b/src/main/java/org/archive/uid/RecordIDGenerator.java @@ -19,20 +19,18 @@ package org.archive.uid; import java.net.URI; -import java.net.URISyntaxException; import java.util.Map; /** * A record-id generator. * - * @contributor stack - * @contributor gojomo + * @author stack + * @author gojomo * @version $Revision$ $Date$ */ public interface RecordIDGenerator { /** * @return A URI that can serve as a record-id. - * @throws URISyntaxException */ public URI getRecordID(); diff --git a/src/main/java/org/archive/uid/package.html b/src/main/java/org/archive/uid/package.html index dc49f07b..bc69c9e3 100644 --- a/src/main/java/org/archive/uid/package.html +++ b/src/main/java/org/archive/uid/package.html @@ -8,13 +8,13 @@ Default is {@link org.archive.uid.UUIDGenerator}. To use another ID Generator, set the System Property org.archive.uid.GeneratorFactory.generator to point -at an alternate implementation of {@link org.archive.uid.Generator}. +at an alternate implementation of {@link org.archive.uid.RecordIDGenerator}.

TODO

  • MIME boundaries have upper-bound of 70 characters total including 'blank line' (CRLFCRLF) and two leading hyphens. Add to - {@link org.archive.uid.Generator} + {@link org.archive.uid.RecordIDGenerator} interface an upper-bound on generated ID length.
  • Add example of an actionable uid generator: e.g. http://archive.org/UID-SCHEME/ID diff --git a/src/main/java/org/archive/url/BasicURLCanonicalizer.java b/src/main/java/org/archive/url/BasicURLCanonicalizer.java index c09ad6e6..3957c9ef 100644 --- a/src/main/java/org/archive/url/BasicURLCanonicalizer.java +++ b/src/main/java/org/archive/url/BasicURLCanonicalizer.java @@ -6,7 +6,9 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -15,18 +17,18 @@ /** * Canonicalizer that does more or less basic fixup. Based initially on rules * specified at https://developers.google.com/safe-browsing/developers_guide_v2# - * Canonicalization. These rules are designed for clients of google's + * Canonicalization. These rules are designed for clients of Google's * "experimental" Safe Browsing API to "check URLs against Google's * constantly-updated blacklists of suspected phishing and malware pages". * *

    - * This class differs from google in treatment of non-ascii input. Google's + * This class differs from Google in treatment of non-ascii input. Google's * rules don't really address this except with one example test case, which * seems to suggest taking raw input bytes and pct-encoding them byte for byte. * Since the input to this class consists of java strings, not raw bytes, that - * wouldn't be possible, even if deemed preferable. Instead + * wouldn't be possible, even if deemed preferable. Instead, * BasicURLCanonicalizer expresses non-ascii characters pct-encoded UTF-8. */ public class BasicURLCanonicalizer implements URLCanonicalizer { @@ -34,7 +36,9 @@ public class BasicURLCanonicalizer implements URLCanonicalizer { .compile("^(0[0-7]*)(\\.[0-7]+)?(\\.[0-7]+)?(\\.[0-7]+)?$"); Pattern DECIMAL_IP = Pattern .compile("^([1-9][0-9]*)(\\.[0-9]+)?(\\.[0-9]+)?(\\.[0-9]+)?$"); + Pattern MULTIDOT = Pattern.compile("\\.{2,}"); + @Override public void canonicalize(HandyURL url) { url.setHash(null); url.setAuthUser(minimalEscape(url.getAuthUser())); @@ -55,8 +59,7 @@ public void canonicalize(HandyURL url) { host = hostE; } - host = host.replaceAll("^\\.+", "").replaceAll("\\.\\.+", ".") - .replaceAll("\\.$", ""); + host = normalizeDots(host); } String ip = null; @@ -64,7 +67,7 @@ public void canonicalize(HandyURL url) { if (ip != null) { host = ip; } else if (host != null) { - host = escapeOnce(host.toLowerCase()); + host = escapeOnce(host.toLowerCase(Locale.ROOT)); } url.setHost(host); // now the path: @@ -74,6 +77,36 @@ public void canonicalize(HandyURL url) { url.setPath(escapeOnce(normalizePath(path))); } + /** + * Normalize dots in the host name. + * + * @param host + * @return host name with all sequences of dots replaced with a single dot, + * and all leading and trailing dots removed + */ + private String normalizeDots(String host) { + if (host.indexOf('.') == -1) { + return host; + } + int start = 0, end = host.length(); + boolean changed = false; + while (start < end && host.charAt(start) == '.') { + start++; + changed = true; + } + while (end > start && host.charAt(end - 1) == '.') { + end--; + changed = true; + } + if (changed) { + host = host.substring(start, end); + } + if (host.contains("..")) { + host = MULTIDOT.matcher(host).replaceAll("."); + } + return host; + } + private static final Pattern SINGLE_FORWARDSLASH_PATTERN = Pattern .compile("/"); @@ -159,7 +192,7 @@ public String attemptIPFormats(String host) { // throws URIException { } ip[i] = octet; } - return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); + return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); } else { Matcher m2 = DECIMAL_IP.matcher(host); if (m2.matches()) { @@ -190,7 +223,7 @@ public String attemptIPFormats(String host) { // throws URIException { } ip[i] = octet; } - return String.format("%d.%d.%d.%d", ip[0], ip[1], ip[2], + return String.format(Locale.ROOT, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); } @@ -203,15 +236,16 @@ public String minimalEscape(String input) { return escapeOnce(unescapeRepeatedly(input)); } - protected static Charset _UTF8 = null; + protected static Charset _UTF8 = StandardCharsets.UTF_8; protected static Charset UTF8() { - if (_UTF8 == null) { - _UTF8 = Charset.forName("UTF-8"); - } return _UTF8; } + /** + * @param input String to be percent-encoded. Assumed to be fully unescaped. + * @return percent-encoded string + */ public String escapeOnce(String input) { if (input == null) { return null; @@ -243,8 +277,21 @@ public String escapeOnce(String input) { */ sb = new StringBuilder(input.substring(0, i)); } + if (b == '%' && i < utf8bytes.length - 2) { + // Any hex escapes left at this point represent non-UTF-8 encoded characters + // Unescape them, so they don't get double escaped + int hex1 = getHex(utf8bytes[i + 1]); + if (hex1 >= 0) { + int hex2 = getHex(utf8bytes[i + 2]); + if (hex2 >= 0) { + i = i+2; + b = hex1 * 16 + hex2; + } + } + + } sb.append("%"); - String hex = Integer.toHexString(b).toUpperCase(); + String hex = Integer.toHexString(b).toUpperCase(Locale.ROOT); if (hex.length() == 1) { sb.append('0'); } @@ -337,7 +384,7 @@ public String decode(String input) { * Decodes bytes in bbuf as utf-8 and appends decoded characters to sb. If * decoding of any portion fails, appends the un-decodable %xx%xx sequence * extracted from inputStr instead of decoded characters. See "bad unicode" - * tests in GoogleCanonicalizerTest#testDecode(). Variables only make sense + * tests in BasicURLCanonicalizerTest#testDecode(). Variables only make sense * within context of {@link #decode(String)}. * * @param sb diff --git a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java b/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java deleted file mode 100644 index 3d4d8581..00000000 --- a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIACanonicalizerRules - */ -public class DefaultIACanonicalizerRules extends AggressiveIACanonicalizerRules { -} diff --git a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java b/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java deleted file mode 100644 index 3d1f985d..00000000 --- a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIAURLCanonicalizer - */ -public class DefaultIAURLCanonicalizer extends AggressiveIAURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java b/src/main/java/org/archive/url/GoogleURLCanonicalizer.java deleted file mode 100644 index 388db8aa..00000000 --- a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use {@link BasicURLCanonicalizer} - */ -public class GoogleURLCanonicalizer extends BasicURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/HandyURL.java b/src/main/java/org/archive/url/HandyURL.java index 91539b3f..0c2c81f7 100644 --- a/src/main/java/org/archive/url/HandyURL.java +++ b/src/main/java/org/archive/url/HandyURL.java @@ -2,6 +2,7 @@ import java.net.MalformedURLException; import java.net.URL; +import java.util.Locale; public class HandyURL { public final static int DEFAULT_PORT = -1; @@ -277,7 +278,7 @@ public void setOpaque(String opaque) { } public String toDebugString() { - return String.format("Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)", + return String.format(Locale.ROOT, "Scheme(%s) UserName(%s) UserPass(%s) Host(%s) port(%d) Path(%s) Query(%s) Frag(%s)", scheme, authUser, authPass, host, port, path, query, hash); } diff --git a/src/main/java/org/archive/url/IAURLCanonicalizer.java b/src/main/java/org/archive/url/IAURLCanonicalizer.java index 029598f6..e964cd00 100644 --- a/src/main/java/org/archive/url/IAURLCanonicalizer.java +++ b/src/main/java/org/archive/url/IAURLCanonicalizer.java @@ -2,6 +2,7 @@ import java.util.Arrays; import java.util.Comparator; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -20,11 +21,11 @@ public void canonicalize(HandyURL url) { } if (rules.isSet(SCHEME_SETTINGS, SCHEME_LOWERCASE)) { if (url.getScheme() != null) { - url.setScheme(url.getScheme().toLowerCase()); + url.setScheme(url.getScheme().toLowerCase(Locale.ROOT)); } } if(rules.isSet(HOST_SETTINGS, HOST_LOWERCASE)) { - url.setHost(url.getHost().toLowerCase()); + url.setHost(url.getHost().toLowerCase(Locale.ROOT)); } if(rules.isSet(HOST_SETTINGS, HOST_MASSAGE)) { url.setHost(massageHost(url.getHost())); @@ -46,7 +47,7 @@ public void canonicalize(HandyURL url) { url.setPath(null); } else { if(rules.isSet(PATH_SETTINGS, PATH_LOWERCASE)) { - path = path.toLowerCase(); + path = path.toLowerCase(Locale.ROOT); } if(rules.isSet(PATH_SETTINGS, PATH_STRIP_SESSION_ID)) { path = URLRegexTransformer.stripPathSessionID(path); @@ -63,25 +64,24 @@ public void canonicalize(HandyURL url) { String query = url.getQuery(); if(query != null) { - if(query.equals("")) { - if(rules.isSet(QUERY_SETTINGS, QUERY_STRIP_EMPTY)) { - query = null; - } - } else { - // we have a query... what to do with it? + // we have a query... what to do with it? - // first remove uneeded: - if(rules.isSet(QUERY_SETTINGS, QUERY_STRIP_SESSION_ID)) { - query = URLRegexTransformer.stripQuerySessionID(query); - } - // lower-case: - if(rules.isSet(QUERY_SETTINGS, QUERY_LOWERCASE)) { - query = query.toLowerCase(); - } - // re-order? - if(rules.isSet(QUERY_SETTINGS, QUERY_ALPHA_REORDER)) { - query = alphaReorderQuery(query); - } + // first remove uneeded: + if(rules.isSet(QUERY_SETTINGS, QUERY_STRIP_SESSION_ID)) { + query = URLRegexTransformer.stripQuerySessionID(query); + } + // lower-case: + if(rules.isSet(QUERY_SETTINGS, QUERY_LOWERCASE)) { + query = query.toLowerCase(Locale.ROOT); + } + // re-order? + if(rules.isSet(QUERY_SETTINGS, QUERY_ALPHA_REORDER)) { + query = alphaReorderQuery(query); + } + if(query.equals("")) { + if(rules.isSet(QUERY_SETTINGS, QUERY_STRIP_EMPTY)) { + query = null; + } } url.setQuery(query); } @@ -156,7 +156,7 @@ public static String massageHost(String host) { return host; } public static int getDefaultPort(String scheme) { - String lcScheme = scheme.toLowerCase(); + String lcScheme = scheme.toLowerCase(Locale.ROOT); if(lcScheme.equals("http")) { return 80; } else if(lcScheme.equals("https")) { diff --git a/src/main/java/org/archive/url/LaxURI.java b/src/main/java/org/archive/url/LaxURI.java index 807333d3..9b7485c7 100644 --- a/src/main/java/org/archive/url/LaxURI.java +++ b/src/main/java/org/archive/url/LaxURI.java @@ -18,12 +18,12 @@ */ package org.archive.url; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.BitSet; - -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; -import org.apache.commons.httpclient.util.EncodingUtil; +import java.util.Locale; /** * URI subclass which allows partial/inconsistent encoding, matching @@ -121,13 +121,13 @@ protected static String decode(String component, String charset) "Component array of chars may not be null"); } byte[] rawdata = null; - // try { - rawdata = LaxURLCodec.decodeUrlLoose(EncodingUtil - .getAsciiBytes(component)); - // } catch (DecoderException e) { - // throw new URIException(e.getMessage()); - // } - return EncodingUtil.getString(rawdata, charset); + rawdata = LaxURLCodec.decodeUrlLoose(component.getBytes(StandardCharsets.US_ASCII)); + try { + Charset cs = Charset.forName(charset); + return new String(rawdata, cs); + } catch (IllegalCharsetNameException e) { + return new String(rawdata, StandardCharsets.US_ASCII); + } } // overidden to lax() the acceptable-char BitSet passed in @@ -183,7 +183,7 @@ protected BitSet lax(BitSet generous) { * two instances to one where possible, slimming * instances. * - * @see org.apache.commons.httpclient.URI#parseAuthority(java.lang.String, boolean) + * @see URI#parseAuthority(java.lang.String, boolean) */ protected void parseAuthority(String original, boolean escaped) throws URIException { @@ -204,14 +204,14 @@ protected void parseAuthority(String original, boolean escaped) * long-lived instance from a static field, saving 12-14 bytes * per instance. * - * @see org.apache.commons.httpclient.URI#setURI() + * @see URI#setURI() */ protected void setURI() { if (_scheme != null) { if (_scheme.length == 4 && Arrays.equals(_scheme, HTTP_SCHEME)) { _scheme = HTTP_SCHEME; } else if (_scheme.length == 5 - && Arrays.equals(_scheme, HTTP_SCHEME)) { + && Arrays.equals(_scheme, HTTPS_SCHEME)) { _scheme = HTTPS_SCHEME; } } @@ -243,8 +243,8 @@ protected void setURI() { * $3 = //jakarta.apache.org * authority = $4 = jakarta.apache.org * path = $5 = /ietf/uri/ - * $6 = - * query = $7 = + * $6 = <undefined> + * query = $7 = <undefined> * $8 = #Related * fragment = $9 = Related *

    @@ -324,7 +324,7 @@ protected void parseUriReference(String original, boolean escaped) *

    */ if (at > 0 && at < length && tmp.charAt(at) == ':') { - char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); + char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray(); if (validate(target, scheme)) { _scheme = target; from = ++at; diff --git a/src/main/java/org/archive/url/LaxURLCodec.java b/src/main/java/org/archive/url/LaxURLCodec.java index e27d9de0..b68a0c19 100644 --- a/src/main/java/org/archive/url/LaxURLCodec.java +++ b/src/main/java/org/archive/url/LaxURLCodec.java @@ -20,17 +20,16 @@ import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.BitSet; import org.apache.commons.codec.net.URLCodec; -import com.google.common.base.Charsets; - /** * @author gojomo */ public class LaxURLCodec extends URLCodec { - public static LaxURLCodec DEFAULT = new LaxURLCodec("UTF-8"); + public static LaxURLCodec DEFAULT = new LaxURLCodec(StandardCharsets.UTF_8.name()); // passthrough constructor public LaxURLCodec(String encoding) { @@ -155,6 +154,6 @@ public String encode(BitSet safe, String pString, String cs) if (pString == null) { return null; } - return new String(encodeUrl(safe,pString.getBytes(cs)), Charsets.US_ASCII); + return new String(encodeUrl(safe,pString.getBytes(cs)), StandardCharsets.US_ASCII); } } diff --git a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java index cd579eb0..830b7b92 100644 --- a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java +++ b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java @@ -1,10 +1,10 @@ package org.archive.url; public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { - private static final GoogleURLCanonicalizer google = - new GoogleURLCanonicalizer(); + private static final BasicURLCanonicalizer basic = + new BasicURLCanonicalizer(); private static CanonicalizeRules nonMassagingRules = - new DefaultIACanonicalizerRules(); + new AggressiveIACanonicalizerRules(); static { nonMassagingRules.setRule(CanonicalizeRules.HOST_SETTINGS, CanonicalizeRules.HOST_LOWERCASE); @@ -14,7 +14,7 @@ public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: - google.canonicalize(url); + basic.canonicalize(url); ia.canonicalize(url); } } diff --git a/src/main/java/org/archive/url/SURT.java b/src/main/java/org/archive/url/SURT.java index 2c8e1b02..9598f458 100644 --- a/src/main/java/org/archive/url/SURT.java +++ b/src/main/java/org/archive/url/SURT.java @@ -2,11 +2,10 @@ import java.io.BufferedReader; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.logging.Logger; -import org.apache.commons.httpclient.URIException; import org.archive.util.iterator.AbstractPeekableIterator; public class SURT { @@ -34,7 +33,7 @@ public static String toSURT(String input) { } public static void main(String[] args) { String line; - InputStreamReader isr = new InputStreamReader(System.in,Charset.forName("UTF-8")); + InputStreamReader isr = new InputStreamReader(System.in, StandardCharsets.UTF_8); BufferedReader br = new BufferedReader(isr); Iterator i = AbstractPeekableIterator.wrapReader(br); while(i.hasNext()) { diff --git a/src/main/java/org/archive/url/SURTTokenizer.java b/src/main/java/org/archive/url/SURTTokenizer.java index da8f58f2..52b80a03 100644 --- a/src/main/java/org/archive/url/SURTTokenizer.java +++ b/src/main/java/org/archive/url/SURTTokenizer.java @@ -19,7 +19,6 @@ */ package org.archive.url; -import org.apache.commons.httpclient.URIException; import org.archive.util.SURT; /** diff --git a/src/main/java/org/archive/url/URI.java b/src/main/java/org/archive/url/URI.java new file mode 100644 index 00000000..492f7772 --- /dev/null +++ b/src/main/java/org/archive/url/URI.java @@ -0,0 +1,3984 @@ +/* + * $HeadURL: https://svn.apache.org/repos/asf/jakarta/httpcomponents/oac.hc3x/tags/HTTPCLIENT_3_1/src/java/org/apache/commons/httpclient/URI.java $ + * $Revision: 564973 $ + * $Date: 2007-08-11 22:51:47 +0200 (Sat, 11 Aug 2007) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.url; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.net.URLCodec; + +import java.io.*; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Hashtable; +import java.util.Locale; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. + * This class has the purpose of supportting of parsing a URI reference to + * extend any specific protocols, the character encoding of the protocol to + * be transported and the charset of the document. + *

    + * A URI is always in an "escaped" form, since escaping or unescaping a + * completed URI might change its semantics. + *

    + * Implementers should be careful not to escape or unescape the same string + * more than once, since unescaping an already unescaped string might lead to + * misinterpreting a percent data character as another escaped character, + * or vice versa in the case of escaping an already escaped string. + *

    + * In order to avoid these problems, data types used as follows: + *

    + *   URI character sequence: char
    + *   octet sequence: byte
    + *   original character sequence: String
    + * 

    + * + * So, a URI is a sequence of characters as an array of a char type, which + * is not always represented as a sequence of octets as an array of byte. + *

    + * + * URI Syntactic Components + *

    + * - In general, written as follows:
    + *   Absolute URI = <scheme>:<scheme-specific-part>
    + *   Generic URI = <scheme>://<authority><path>?<query>
    + *
    + * - Syntax
    + *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
    + *   hier_part     = ( net_path | abs_path ) [ "?" query ]
    + *   net_path      = "//" authority [ abs_path ]
    + *   abs_path      = "/"  path_segments
    + * 

    + * + * The following examples illustrate URI that are in common use. + *

    + * ftp://ftp.is.co.za/rfc/rfc1808.txt
    + *    -- ftp scheme for File Transfer Protocol services
    + * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
    + *    -- gopher scheme for Gopher and Gopher+ Protocol services
    + * http://www.math.uio.no/faq/compression-faq/part1.html
    + *    -- http scheme for Hypertext Transfer Protocol services
    + * mailto:mduerst@ifi.unizh.ch
    + *    -- mailto scheme for electronic mail addresses
    + * news:comp.infosystems.www.servers.unix
    + *    -- news scheme for USENET news groups and articles
    + * telnet://melvyl.ucop.edu/
    + *    -- telnet scheme for interactive services via the TELNET Protocol
    + * 
    + * Please, notice that there are many modifications from URL(RFC 1738) and + * relative URL(RFC 1808). + *

    + * The expressions for a URI + *

    + * For escaped URI forms
    + *  - URI(char[]) // constructor
    + *  - char[] getRawXxx() // method
    + *  - String getEscapedXxx() // method
    + *  - String toString() // method
    + *
    + * For unescaped URI forms
    + *  - URI(String) // constructor
    + *  - String getXXX() // method
    + * 

    + * + * @author Sung-Gu + * @author Mike Bowler + * @version $Revision: 564973 $ $Date: 2002/03/14 15:14:01 + */ +class URI implements Cloneable, Comparable, Serializable { + + + // ----------------------------------------------------------- Constructors + + /** Create an instance as an internal use */ + protected URI() { + } + + /** + * Construct a URI from a string with the given charset. The input string can + * be either in escaped or unescaped form. + * + * @param s URI character sequence + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * @param charset the charset string to do escape encoding, if required + * + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if input string is null + * + * @see #getProtocolCharset + * + * @since 3.0 + */ + public URI(String s, boolean escaped, String charset) + throws URIException, NullPointerException { + protocolCharset = charset; + parseUriReference(s, escaped); + } + + /** + * Construct a URI from a string with the given charset. The input string can + * be either in escaped or unescaped form. + * + * @param s URI character sequence + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if input string is null + * + * @see #getProtocolCharset + * + * @since 3.0 + */ + public URI(String s, boolean escaped) + throws URIException, NullPointerException { + parseUriReference(s, escaped); + } + + /** + * Construct a URI as an escaped form of a character array with the given + * charset. + * + * @param escaped the URI character sequence + * @param charset the charset string to do escape encoding + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if escaped is null + * @see #getProtocolCharset + * + * @deprecated Use #URI(String, boolean, String) + */ + public URI(char[] escaped, String charset) + throws URIException, NullPointerException { + protocolCharset = charset; + parseUriReference(new String(escaped), true); + } + + + /** + * Construct a URI as an escaped form of a character array. + * An URI can be placed within double-quotes or angle brackets like + * "http://test.com/" and <http://test.com/> + * + * @param escaped the URI character sequence + * @throws URIException If the URI cannot be created. + * @throws NullPointerException if escaped is null + * @see #getDefaultProtocolCharset + * + * @deprecated Use #URI(String, boolean) + */ + public URI(char[] escaped) + throws URIException, NullPointerException { + parseUriReference(new String(escaped), true); + } + + + /** + * Construct a URI from the given string with the given charset. + * + * @param original the string to be represented to URI character sequence + * It is one of absoluteURI and relativeURI. + * @param charset the charset string to do escape encoding + * @throws URIException If the URI cannot be created. + * @see #getProtocolCharset + * + * @deprecated Use #URI(String, boolean, String) + */ + public URI(String original, String charset) throws URIException { + protocolCharset = charset; + parseUriReference(original, false); + } + + + /** + * Construct a URI from the given string. + *

    +     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    +     * 

    + * An URI can be placed within double-quotes or angle brackets like + * "http://test.com/" and <http://test.com/> + * + * @param original the string to be represented to URI character sequence + * It is one of absoluteURI and relativeURI. + * @throws URIException If the URI cannot be created. + * @see #getDefaultProtocolCharset + * + * @deprecated Use #URI(String, boolean) + */ + public URI(String original) throws URIException { + parseUriReference(original, false); + } + + + /** + * Construct a general URI from the given components. + *

    +     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    +     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
    +     *   opaque_part   = uric_no_slash *uric
    +     * 

    + * It's for absolute URI = <scheme>:<scheme-specific-part># + * <fragment>. + * + * @param scheme the scheme string + * @param schemeSpecificPart scheme_specific_part + * @param fragment the fragment string + * @throws URIException If the URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String schemeSpecificPart, String fragment) + throws URIException { + + // validate and contruct the URI character sequence + if (scheme == null) { + throw new URIException(URIException.PARSING, "scheme required"); + } + char[] s = scheme.toLowerCase(Locale.ROOT).toCharArray(); + if (validate(s, URI.scheme)) { + _scheme = s; // is_absoluteURI + } else { + throw new URIException(URIException.PARSING, "incorrect scheme"); + } + _opaque = encode(schemeSpecificPart, allowed_opaque_part, + getProtocolCharset()); + // Set flag + _is_opaque_part = true; + _fragment = fragment == null ? null : fragment.toCharArray(); + setURI(); + } + + + /** + * Construct a general URI from the given components. + *

    +     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    +     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
    +     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    +     *   hier_part     = ( net_path | abs_path ) [ "?" query ]
    +     * 

    + * It's for absolute URI = <scheme>:<path>?<query>#< + * fragment> and relative URI = <path>?<query>#<fragment + * >. + * + * @param scheme the scheme string + * @param authority the authority string + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String authority, String path, String query, + String fragment) throws URIException { + + // validate and contruct the URI character sequence + StringBuffer buff = new StringBuffer(); + if (scheme != null) { + buff.append(scheme); + buff.append(':'); + } + if (authority != null) { + buff.append("//"); + buff.append(authority); + } + if (path != null) { // accept empty path + if ((scheme != null || authority != null) + && !path.startsWith("/")) { + throw new URIException(URIException.PARSING, + "abs_path requested"); + } + buff.append(path); + } + if (query != null) { + buff.append('?'); + buff.append(query); + } + if (fragment != null) { + buff.append('#'); + buff.append(fragment); + } + parseUriReference(buff.toString(), false); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port) + throws URIException { + + this(scheme, userinfo, host, port, null, null, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path) throws URIException { + + this(scheme, userinfo, host, port, path, null, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path, String query) throws URIException { + + this(scheme, userinfo, host, port, path, query, null); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String userinfo, String host, int port, + String path, String query, String fragment) throws URIException { + + this(scheme, (host == null) ? null + : ((userinfo != null) ? userinfo + '@' : "") + host + + ((port != -1) ? ":" + port : ""), path, query, fragment); + } + + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param host the host string + * @param path the path string + * @param fragment the fragment string + * @throws URIException If the new URI cannot be created. + * @see #getDefaultProtocolCharset + */ + public URI(String scheme, String host, String path, String fragment) + throws URIException { + + this(scheme, host, path, null, fragment); + } + + + /** + * Construct a general URI with the given relative URI string. + * + * @param base the base URI + * @param relative the relative URI string + * @throws URIException If the new URI cannot be created. + * + * @deprecated Use #URI(URI, String, boolean) + */ + public URI(URI base, String relative) throws URIException { + this(base, new URI(relative)); + } + + + /** + * Construct a general URI with the given relative URI string. + * + * @param base the base URI + * @param relative the relative URI string + * @param escaped true if URI character sequence is in escaped form. + * false otherwise. + * + * @throws URIException If the new URI cannot be created. + * + * @since 3.0 + */ + public URI(URI base, String relative, boolean escaped) throws URIException { + this(base, new URI(relative, escaped)); + } + + + /** + * Construct a general URI with the given relative URI. + *

    +     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    +     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    +     * 

    + * Resolving Relative References to Absolute Form. + * + * Examples of Resolving Relative URI References + * + * Within an object with a well-defined base URI of + *

    +     *   http://a/b/c/d;p?q
    +     * 

    + * the relative URI would be resolved as follows: + * + * Normal Examples + * + *

    +     *   g:h           =  g:h
    +     *   g             =  http://a/b/c/g
    +     *   ./g           =  http://a/b/c/g
    +     *   g/            =  http://a/b/c/g/
    +     *   /g            =  http://a/g
    +     *   //g           =  http://g
    +     *   ?y            =  http://a/b/c/?y
    +     *   g?y           =  http://a/b/c/g?y
    +     *   #s            =  (current document)#s
    +     *   g#s           =  http://a/b/c/g#s
    +     *   g?y#s         =  http://a/b/c/g?y#s
    +     *   ;x            =  http://a/b/c/;x
    +     *   g;x           =  http://a/b/c/g;x
    +     *   g;x?y#s       =  http://a/b/c/g;x?y#s
    +     *   .             =  http://a/b/c/
    +     *   ./            =  http://a/b/c/
    +     *   ..            =  http://a/b/
    +     *   ../           =  http://a/b/
    +     *   ../g          =  http://a/b/g
    +     *   ../..         =  http://a/
    +     *   ../../        =  http://a/ 
    +     *   ../../g       =  http://a/g
    +     * 

    + * + * Some URI schemes do not allow a hierarchical syntax matching the + * syntax, and thus cannot use relative references. + * + * @param base the base URI + * @param relative the relative URI + * @throws URIException If the new URI cannot be created. + */ + public URI(URI base, URI relative) throws URIException { + + if (base._scheme == null) { + throw new URIException(URIException.PARSING, "base URI required"); + } + if (base._scheme != null) { + this._scheme = base._scheme; + this._authority = base._authority; + this._is_net_path = base._is_net_path; + } + if (base._is_opaque_part || relative._is_opaque_part) { + this._scheme = base._scheme; + this._is_opaque_part = base._is_opaque_part + || relative._is_opaque_part; + this._opaque = relative._opaque; + this._fragment = relative._fragment; + this.setURI(); + return; + } + boolean schemesEqual = Arrays.equals(base._scheme,relative._scheme); + if (relative._scheme != null + && (!schemesEqual || relative._authority != null)) { + this._scheme = relative._scheme; + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._is_server = relative._is_server; + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } else if (base._authority != null && relative._scheme == null) { + this._is_net_path = base._is_net_path; + this._authority = base._authority; + if (base._is_server) { + this._is_server = base._is_server; + this._userinfo = base._userinfo; + this._host = base._host; + this._port = base._port; + } else if (base._is_reg_name) { + this._is_reg_name = base._is_reg_name; + } + } + if (relative._authority != null) { + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._is_server = relative._is_server; + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } + // resolve the path and query if necessary + if (relative._authority == null + && (relative._scheme == null || schemesEqual)) { + if ((relative._path == null || relative._path.length == 0) + && relative._query == null) { + // handle a reference to the current document, see RFC 2396 + // section 5.2 step 2 + this._path = base._path; + this._query = base._query; + } else { + this._path = resolvePath(base._path, relative._path); + } + } + // base._query removed + if (relative._query != null) { + this._query = relative._query; + } + // base._fragment removed + if (relative._fragment != null) { + this._fragment = relative._fragment; + } + this.setURI(); + // reparse the newly built URI, this will ensure that all flags are set correctly. + // TODO there must be a better way to do this + parseUriReference(new String(_uri), true); + } + + // --------------------------------------------------- Instance Variables + + /** Version ID for serialization */ + static final long serialVersionUID = 604752400577948726L; + + + /** + * Cache the hash code for this URI. + */ + protected int hash = 0; + + + /** + * This Uniform Resource Identifier (URI). + * The URI is always in an "escaped" form, since escaping or unescaping + * a completed URI might change its semantics. + */ + protected char[] _uri = null; + + + /** + * The charset of the protocol used by this URI instance. + */ + protected String protocolCharset = null; + + + /** + * The default charset of the protocol. RFC 2277, 2396 + */ + protected static String defaultProtocolCharset = UTF_8.name(); + + + /** + * The default charset of the document. RFC 2277, 2396 + * The platform's charset is used for the document by default. + */ + protected static String defaultDocumentCharset = null; + protected static String defaultDocumentCharsetByLocale = null; + protected static String defaultDocumentCharsetByPlatform = null; + // Static initializer for defaultDocumentCharset + static { + Locale locale = Locale.getDefault(); + // in order to support backward compatiblity + if (locale != null) { + defaultDocumentCharsetByLocale = + LocaleToCharsetMap.getCharset(locale); + // set the default document charset + defaultDocumentCharset = defaultDocumentCharsetByLocale; + } + // in order to support platform encoding + try { + defaultDocumentCharsetByPlatform = System.getProperty("file.encoding"); + } catch (SecurityException ignore) { + } + if (defaultDocumentCharset == null) { + // set the default document charset + defaultDocumentCharset = defaultDocumentCharsetByPlatform; + } + } + + + /** + * The scheme. + */ + protected char[] _scheme = null; + + + /** + * The opaque. + */ + protected char[] _opaque = null; + + + /** + * The authority. + */ + protected char[] _authority = null; + + + /** + * The userinfo. + */ + protected char[] _userinfo = null; + + + /** + * The host. + */ + protected char[] _host = null; + + + /** + * The port. + */ + protected int _port = -1; + + + /** + * The path. + */ + protected char[] _path = null; + + + /** + * The query. + */ + protected char[] _query = null; + + + /** + * The fragment. + */ + protected char[] _fragment = null; + + + /** + * The root path. + */ + protected static final char[] rootPath = { '/' }; + + // ---------------------- Generous characters for each component validation + + /** + * The percent "%" character always has the reserved purpose of being the + * escape indicator, it must be escaped as "%25" in order to be used as + * data within a URI. + */ + protected static final BitSet percent = new BitSet(256); + // Static initializer for percent + static { + percent.set('%'); + } + + + /** + * BitSet for digit. + *

    +     * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
    +     *            "8" | "9"
    +     * 

    + */ + protected static final BitSet digit = new BitSet(256); + // Static initializer for digit + static { + for (int i = '0'; i <= '9'; i++) { + digit.set(i); + } + } + + + /** + * BitSet for alpha. + *

    +     * alpha         = lowalpha | upalpha
    +     * 

    + */ + protected static final BitSet alpha = new BitSet(256); + // Static initializer for alpha + static { + for (int i = 'a'; i <= 'z'; i++) { + alpha.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + alpha.set(i); + } + } + + + /** + * BitSet for alphanum (join of alpha & digit). + *

    +     *  alphanum      = alpha | digit
    +     * 

    + */ + protected static final BitSet alphanum = new BitSet(256); + // Static initializer for alphanum + static { + alphanum.or(alpha); + alphanum.or(digit); + } + + + /** + * BitSet for hex. + *

    +     * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
    +     *                         "a" | "b" | "c" | "d" | "e" | "f"
    +     * 

    + */ + protected static final BitSet hex = new BitSet(256); + // Static initializer for hex + static { + hex.or(digit); + for (int i = 'a'; i <= 'f'; i++) { + hex.set(i); + } + for (int i = 'A'; i <= 'F'; i++) { + hex.set(i); + } + } + + + /** + * BitSet for escaped. + *

    +     * escaped       = "%" hex hex
    +     * 

    + */ + protected static final BitSet escaped = new BitSet(256); + // Static initializer for escaped + static { + escaped.or(percent); + escaped.or(hex); + } + + + /** + * BitSet for mark. + *

    +     * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
    +     *                 "(" | ")"
    +     * 

    + */ + protected static final BitSet mark = new BitSet(256); + // Static initializer for mark + static { + mark.set('-'); + mark.set('_'); + mark.set('.'); + mark.set('!'); + mark.set('~'); + mark.set('*'); + mark.set('\''); + mark.set('('); + mark.set(')'); + } + + + /** + * Data characters that are allowed in a URI but do not have a reserved + * purpose are called unreserved. + *

    +     * unreserved    = alphanum | mark
    +     * 

    + */ + protected static final BitSet unreserved = new BitSet(256); + // Static initializer for unreserved + static { + unreserved.or(alphanum); + unreserved.or(mark); + } + + + /** + * BitSet for reserved. + *

    +     * reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
    +     *                 "$" | ","
    +     * 

    + */ + protected static final BitSet reserved = new BitSet(256); + // Static initializer for reserved + static { + reserved.set(';'); + reserved.set('/'); + reserved.set('?'); + reserved.set(':'); + reserved.set('@'); + reserved.set('&'); + reserved.set('='); + reserved.set('+'); + reserved.set('$'); + reserved.set(','); + } + + + /** + * BitSet for uric. + *

    +     * uric          = reserved | unreserved | escaped
    +     * 

    + */ + protected static final BitSet uric = new BitSet(256); + // Static initializer for uric + static { + uric.or(reserved); + uric.or(unreserved); + uric.or(escaped); + } + + + /** + * BitSet for fragment (alias for uric). + *

    +     * fragment      = *uric
    +     * 

    + */ + protected static final BitSet fragment = uric; + + + /** + * BitSet for query (alias for uric). + *

    +     * query         = *uric
    +     * 

    + */ + protected static final BitSet query = uric; + + + /** + * BitSet for pchar. + *

    +     * pchar         = unreserved | escaped |
    +     *                 ":" | "@" | "&" | "=" | "+" | "$" | ","
    +     * 

    + */ + protected static final BitSet pchar = new BitSet(256); + // Static initializer for pchar + static { + pchar.or(unreserved); + pchar.or(escaped); + pchar.set(':'); + pchar.set('@'); + pchar.set('&'); + pchar.set('='); + pchar.set('+'); + pchar.set('$'); + pchar.set(','); + } + + + /** + * BitSet for param (alias for pchar). + *

    +     * param         = *pchar
    +     * 

    + */ + protected static final BitSet param = pchar; + + + /** + * BitSet for segment. + *

    +     * segment       = *pchar *( ";" param )
    +     * 

    + */ + protected static final BitSet segment = new BitSet(256); + // Static initializer for segment + static { + segment.or(pchar); + segment.set(';'); + segment.or(param); + } + + + /** + * BitSet for path segments. + *

    +     * path_segments = segment *( "/" segment )
    +     * 

    + */ + protected static final BitSet path_segments = new BitSet(256); + // Static initializer for path_segments + static { + path_segments.set('/'); + path_segments.or(segment); + } + + + /** + * URI absolute path. + *

    +     * abs_path      = "/"  path_segments
    +     * 

    + */ + protected static final BitSet abs_path = new BitSet(256); + // Static initializer for abs_path + static { + abs_path.set('/'); + abs_path.or(path_segments); + } + + + /** + * URI bitset for encoding typical non-slash characters. + *

    +     * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
    +     *                 "&" | "=" | "+" | "$" | ","
    +     * 

    + */ + protected static final BitSet uric_no_slash = new BitSet(256); + // Static initializer for uric_no_slash + static { + uric_no_slash.or(unreserved); + uric_no_slash.or(escaped); + uric_no_slash.set(';'); + uric_no_slash.set('?'); + uric_no_slash.set(';'); + uric_no_slash.set('@'); + uric_no_slash.set('&'); + uric_no_slash.set('='); + uric_no_slash.set('+'); + uric_no_slash.set('$'); + uric_no_slash.set(','); + } + + + /** + * URI bitset that combines uric_no_slash and uric. + *

    +     * opaque_part   = uric_no_slash *uric
    +     * 

    + */ + protected static final BitSet opaque_part = new BitSet(256); + // Static initializer for opaque_part + static { + // it's generous. because first character must not include a slash + opaque_part.or(uric_no_slash); + opaque_part.or(uric); + } + + + /** + * URI bitset that combines absolute path and opaque part. + *

    +     * path          = [ abs_path | opaque_part ]
    +     * 

    + */ + protected static final BitSet path = new BitSet(256); + // Static initializer for path + static { + path.or(abs_path); + path.or(opaque_part); + } + + + /** + * Port, a logical alias for digit. + */ + protected static final BitSet port = digit; + + + /** + * Bitset that combines digit and dot fo IPv$address. + *

    +     * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
    +     * 

    + */ + protected static final BitSet IPv4address = new BitSet(256); + // Static initializer for IPv4address + static { + IPv4address.or(digit); + IPv4address.set('.'); + } + + + /** + * RFC 2373. + *

    +     * IPv6address = hexpart [ ":" IPv4address ]
    +     * 

    + */ + protected static final BitSet IPv6address = new BitSet(256); + // Static initializer for IPv6address reference + static { + IPv6address.or(hex); // hexpart + IPv6address.set(':'); + IPv6address.or(IPv4address); + } + + + /** + * RFC 2732, 2373. + *

    +     * IPv6reference   = "[" IPv6address "]"
    +     * 

    + */ + protected static final BitSet IPv6reference = new BitSet(256); + // Static initializer for IPv6reference + static { + IPv6reference.set('['); + IPv6reference.or(IPv6address); + IPv6reference.set(']'); + } + + + /** + * BitSet for toplabel. + *

    +     * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
    +     * 

    + */ + protected static final BitSet toplabel = new BitSet(256); + // Static initializer for toplabel + static { + toplabel.or(alphanum); + toplabel.set('-'); + } + + + /** + * BitSet for domainlabel. + *

    +     * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
    +     * 

    + */ + protected static final BitSet domainlabel = toplabel; + + + /** + * BitSet for hostname. + *

    +     * hostname      = *( domainlabel "." ) toplabel [ "." ]
    +     * 

    + */ + protected static final BitSet hostname = new BitSet(256); + // Static initializer for hostname + static { + hostname.or(toplabel); + // hostname.or(domainlabel); + hostname.set('.'); + } + + + /** + * BitSet for host. + *

    +     * host          = hostname | IPv4address | IPv6reference
    +     * 

    + */ + protected static final BitSet host = new BitSet(256); + // Static initializer for host + static { + host.or(hostname); + // host.or(IPv4address); + host.or(IPv6reference); // IPv4address + } + + + /** + * BitSet for hostport. + *

    +     * hostport      = host [ ":" port ]
    +     * 

    + */ + protected static final BitSet hostport = new BitSet(256); + // Static initializer for hostport + static { + hostport.or(host); + hostport.set(':'); + hostport.or(port); + } + + + /** + * Bitset for userinfo. + *

    +     * userinfo      = *( unreserved | escaped |
    +     *                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
    +     * 

    + */ + protected static final BitSet userinfo = new BitSet(256); + // Static initializer for userinfo + static { + userinfo.or(unreserved); + userinfo.or(escaped); + userinfo.set(';'); + userinfo.set(':'); + userinfo.set('&'); + userinfo.set('='); + userinfo.set('+'); + userinfo.set('$'); + userinfo.set(','); + } + + + /** + * BitSet for within the userinfo component like user and password. + */ + public static final BitSet within_userinfo = new BitSet(256); + // Static initializer for within_userinfo + static { + within_userinfo.or(userinfo); + within_userinfo.clear(';'); // reserved within authority + within_userinfo.clear(':'); + within_userinfo.clear('@'); + within_userinfo.clear('?'); + within_userinfo.clear('/'); + } + + + /** + * Bitset for server. + *

    +     * server        = [ [ userinfo "@" ] hostport ]
    +     * 

    + */ + protected static final BitSet server = new BitSet(256); + // Static initializer for server + static { + server.or(userinfo); + server.set('@'); + server.or(hostport); + } + + + /** + * BitSet for reg_name. + *

    +     * reg_name      = 1*( unreserved | escaped | "$" | "," |
    +     *                     ";" | ":" | "@" | "&" | "=" | "+" )
    +     * 

    + */ + protected static final BitSet reg_name = new BitSet(256); + // Static initializer for reg_name + static { + reg_name.or(unreserved); + reg_name.or(escaped); + reg_name.set('$'); + reg_name.set(','); + reg_name.set(';'); + reg_name.set(':'); + reg_name.set('@'); + reg_name.set('&'); + reg_name.set('='); + reg_name.set('+'); + } + + + /** + * BitSet for authority. + *

    +     * authority     = server | reg_name
    +     * 

    + */ + protected static final BitSet authority = new BitSet(256); + // Static initializer for authority + static { + authority.or(server); + authority.or(reg_name); + } + + + /** + * BitSet for scheme. + *

    +     * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
    +     * 

    + */ + protected static final BitSet scheme = new BitSet(256); + // Static initializer for scheme + static { + scheme.or(alpha); + scheme.or(digit); + scheme.set('+'); + scheme.set('-'); + scheme.set('.'); + } + + + /** + * BitSet for rel_segment. + *

    +     * rel_segment   = 1*( unreserved | escaped |
    +     *                     ";" | "@" | "&" | "=" | "+" | "$" | "," )
    +     * 

    + */ + protected static final BitSet rel_segment = new BitSet(256); + // Static initializer for rel_segment + static { + rel_segment.or(unreserved); + rel_segment.or(escaped); + rel_segment.set(';'); + rel_segment.set('@'); + rel_segment.set('&'); + rel_segment.set('='); + rel_segment.set('+'); + rel_segment.set('$'); + rel_segment.set(','); + } + + + /** + * BitSet for rel_path. + *

    +     * rel_path      = rel_segment [ abs_path ]
    +     * 

    + */ + protected static final BitSet rel_path = new BitSet(256); + // Static initializer for rel_path + static { + rel_path.or(rel_segment); + rel_path.or(abs_path); + } + + + /** + * BitSet for net_path. + *

    +     * net_path      = "//" authority [ abs_path ]
    +     * 

    + */ + protected static final BitSet net_path = new BitSet(256); + // Static initializer for net_path + static { + net_path.set('/'); + net_path.or(authority); + net_path.or(abs_path); + } + + + /** + * BitSet for hier_part. + *

    +     * hier_part     = ( net_path | abs_path ) [ "?" query ]
    +     * 

    + */ + protected static final BitSet hier_part = new BitSet(256); + // Static initializer for hier_part + static { + hier_part.or(net_path); + hier_part.or(abs_path); + // hier_part.set('?'); aleady included + hier_part.or(query); + } + + + /** + * BitSet for relativeURI. + *

    +     * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
    +     * 

    + */ + protected static final BitSet relativeURI = new BitSet(256); + // Static initializer for relativeURI + static { + relativeURI.or(net_path); + relativeURI.or(abs_path); + relativeURI.or(rel_path); + // relativeURI.set('?'); aleady included + relativeURI.or(query); + } + + + /** + * BitSet for absoluteURI. + *

    +     * absoluteURI   = scheme ":" ( hier_part | opaque_part )
    +     * 

    + */ + protected static final BitSet absoluteURI = new BitSet(256); + // Static initializer for absoluteURI + static { + absoluteURI.or(scheme); + absoluteURI.set(':'); + absoluteURI.or(hier_part); + absoluteURI.or(opaque_part); + } + + + /** + * BitSet for URI-reference. + *

    +     * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
    +     * 

    + */ + protected static final BitSet URI_reference = new BitSet(256); + // Static initializer for URI_reference + static { + URI_reference.or(absoluteURI); + URI_reference.or(relativeURI); + URI_reference.set('#'); + URI_reference.or(fragment); + } + + // ---------------------------- Characters disallowed within the URI syntax + // Excluded US-ASCII Characters are like control, space, delims and unwise + + /** + * BitSet for control. + */ + public static final BitSet control = new BitSet(256); + // Static initializer for control + static { + for (int i = 0; i <= 0x1F; i++) { + control.set(i); + } + control.set(0x7F); + } + + /** + * BitSet for space. + */ + public static final BitSet space = new BitSet(256); + // Static initializer for space + static { + space.set(0x20); + } + + + /** + * BitSet for delims. + */ + public static final BitSet delims = new BitSet(256); + // Static initializer for delims + static { + delims.set('<'); + delims.set('>'); + delims.set('#'); + delims.set('%'); + delims.set('"'); + } + + + /** + * BitSet for unwise. + */ + public static final BitSet unwise = new BitSet(256); + // Static initializer for unwise + static { + unwise.set('{'); + unwise.set('}'); + unwise.set('|'); + unwise.set('\\'); + unwise.set('^'); + unwise.set('['); + unwise.set(']'); + unwise.set('`'); + } + + + /** + * Disallowed rel_path before escaping. + */ + public static final BitSet disallowed_rel_path = new BitSet(256); + // Static initializer for disallowed_rel_path + static { + disallowed_rel_path.or(uric); + disallowed_rel_path.andNot(rel_path); + } + + + /** + * Disallowed opaque_part before escaping. + */ + public static final BitSet disallowed_opaque_part = new BitSet(256); + // Static initializer for disallowed_opaque_part + static { + disallowed_opaque_part.or(uric); + disallowed_opaque_part.andNot(opaque_part); + } + + // ----------------------- Characters allowed within and for each component + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_authority = new BitSet(256); + // Static initializer for allowed_authority + static { + allowed_authority.or(authority); + allowed_authority.clear('%'); + } + + + /** + * Those characters that are allowed for the opaque_part. + */ + public static final BitSet allowed_opaque_part = new BitSet(256); + // Static initializer for allowed_opaque_part + static { + allowed_opaque_part.or(opaque_part); + allowed_opaque_part.clear('%'); + } + + + /** + * Those characters that are allowed for the reg_name. + */ + public static final BitSet allowed_reg_name = new BitSet(256); + // Static initializer for allowed_reg_name + static { + allowed_reg_name.or(reg_name); + // allowed_reg_name.andNot(percent); + allowed_reg_name.clear('%'); + } + + + /** + * Those characters that are allowed for the userinfo component. + */ + public static final BitSet allowed_userinfo = new BitSet(256); + // Static initializer for allowed_userinfo + static { + allowed_userinfo.or(userinfo); + // allowed_userinfo.andNot(percent); + allowed_userinfo.clear('%'); + } + + + /** + * Those characters that are allowed for within the userinfo component. + */ + public static final BitSet allowed_within_userinfo = new BitSet(256); + // Static initializer for allowed_within_userinfo + static { + allowed_within_userinfo.or(within_userinfo); + allowed_within_userinfo.clear('%'); + } + + + /** + * Those characters that are allowed for the IPv6reference component. + * The characters '[', ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_IPv6reference = new BitSet(256); + // Static initializer for allowed_IPv6reference + static { + allowed_IPv6reference.or(IPv6reference); + // allowed_IPv6reference.andNot(unwise); + allowed_IPv6reference.clear('['); + allowed_IPv6reference.clear(']'); + } + + + /** + * Those characters that are allowed for the host component. + * The characters '[', ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_host = new BitSet(256); + // Static initializer for allowed_host + static { + allowed_host.or(hostname); + allowed_host.or(allowed_IPv6reference); + } + + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_within_authority = new BitSet(256); + // Static initializer for allowed_within_authority + static { + allowed_within_authority.or(server); + allowed_within_authority.or(reg_name); + allowed_within_authority.clear(';'); + allowed_within_authority.clear(':'); + allowed_within_authority.clear('@'); + allowed_within_authority.clear('?'); + allowed_within_authority.clear('/'); + } + + + /** + * Those characters that are allowed for the abs_path. + */ + public static final BitSet allowed_abs_path = new BitSet(256); + // Static initializer for allowed_abs_path + static { + allowed_abs_path.or(abs_path); + // allowed_abs_path.set('/'); // aleady included + allowed_abs_path.andNot(percent); + allowed_abs_path.clear('+'); + } + + + /** + * Those characters that are allowed for the rel_path. + */ + public static final BitSet allowed_rel_path = new BitSet(256); + // Static initializer for allowed_rel_path + static { + allowed_rel_path.or(rel_path); + allowed_rel_path.clear('%'); + allowed_rel_path.clear('+'); + } + + + /** + * Those characters that are allowed within the path. + */ + public static final BitSet allowed_within_path = new BitSet(256); + // Static initializer for allowed_within_path + static { + allowed_within_path.or(abs_path); + allowed_within_path.clear('/'); + allowed_within_path.clear(';'); + allowed_within_path.clear('='); + allowed_within_path.clear('?'); + } + + + /** + * Those characters that are allowed for the query component. + */ + public static final BitSet allowed_query = new BitSet(256); + // Static initializer for allowed_query + static { + allowed_query.or(uric); + allowed_query.clear('%'); + } + + + /** + * Those characters that are allowed within the query component. + */ + public static final BitSet allowed_within_query = new BitSet(256); + // Static initializer for allowed_within_query + static { + allowed_within_query.or(allowed_query); + allowed_within_query.andNot(reserved); // excluded 'reserved' + } + + + /** + * Those characters that are allowed for the fragment component. + */ + public static final BitSet allowed_fragment = new BitSet(256); + // Static initializer for allowed_fragment + static { + allowed_fragment.or(uric); + allowed_fragment.clear('%'); + } + + // ------------------------------------------- Flags for this URI-reference + + // TODO: Figure out what all these variables are for and provide javadoc + + // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + // absoluteURI = scheme ":" ( hier_part | opaque_part ) + protected boolean _is_hier_part; + protected boolean _is_opaque_part; + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + // hier_part = ( net_path | abs_path ) [ "?" query ] + protected boolean _is_net_path; + protected boolean _is_abs_path; + protected boolean _is_rel_path; + // net_path = "//" authority [ abs_path ] + // authority = server | reg_name + protected boolean _is_reg_name; + protected boolean _is_server; // = _has_server + // server = [ [ userinfo "@" ] hostport ] + // host = hostname | IPv4address | IPv6reference + protected boolean _is_hostname; + protected boolean _is_IPv4address; + protected boolean _is_IPv6reference; + + // ------------------------------------------ Character and escape encoding + + /** + * Encodes URI string. + * + * This is a two mapping, one from original characters to octets, and + * subsequently a second from octets to URI characters: + *

    +     *   original character sequence->octet sequence->URI character sequence
    +     * 

    + * + * An escaped octet is encoded as a character triplet, consisting of the + * percent character "%" followed by the two hexadecimal digits + * representing the octet code. For example, "%20" is the escaped + * encoding for the US-ASCII space character. + *

    + * Conversion from the local filesystem character set to UTF-8 will + * normally involve a two step process. First convert the local character + * set to the UCS; then convert the UCS to UTF-8. + * The first step in the process can be performed by maintaining a mapping + * table that includes the local character set code and the corresponding + * UCS code. + * The next step is to convert the UCS character code to the UTF-8 encoding. + *

    + * Mapping between vendor codepages can be done in a very similar manner + * as described above. + *

    + * The only time escape encodings can allowedly be made is when a URI is + * being created from its component parts. The escape and validate methods + * are internally performed within this method. + * + * @param original the original character sequence + * @param allowed those characters that are allowed within a component + * @param charset the protocol charset + * @return URI character sequence + * @throws URIException null component or unsupported character encoding + */ + + protected static char[] encode(String original, BitSet allowed, + String charset) throws URIException { + if (original == null) { + throw new IllegalArgumentException("Original string may not be null"); + } + if (allowed == null) { + throw new IllegalArgumentException("Allowed bitset may not be null"); + } + byte[] rawdata = URLCodec.encodeUrl(allowed, getBytes(original, charset)); + return new String(rawdata, StandardCharsets.US_ASCII).toCharArray(); + } + + private static byte[] getBytes(String original, String charset) { + try { + return original.getBytes(charset); + } catch (UnsupportedEncodingException e) { + return original.getBytes(UTF_8); + } + } + + /** + * Decodes URI encoded string. + * + * This is a two mapping, one from URI characters to octets, and + * subsequently a second from octets to original characters: + *

    +     *   URI character sequence->octet sequence->original character sequence
    +     * 

    + * + * A URI must be separated into its components before the escaped + * characters within those components can be allowedly decoded. + *

    + * Notice that there is a chance that URI characters that are non UTF-8 + * may be parsed as valid UTF-8. A recent non-scientific analysis found + * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a + * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% + * false reading. + *

    + * The percent "%" character always has the reserved purpose of being + * the escape indicator, it must be escaped as "%25" in order to be used + * as data within a URI. + *

    + * The unescape method is internally performed within this method. + * + * @param component the URI character sequence + * @param charset the protocol charset + * @return original character sequence + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + */ + protected static String decode(char[] component, String charset) + throws URIException { + if (component == null) { + throw new IllegalArgumentException("Component array of chars may not be null"); + } + return decode(new String(component), charset); + } + + /** + * Decodes URI encoded string. + * + * This is a two mapping, one from URI characters to octets, and + * subsequently a second from octets to original characters: + *

    +     *   URI character sequence->octet sequence->original character sequence
    +     * 

    + * + * A URI must be separated into its components before the escaped + * characters within those components can be allowedly decoded. + *

    + * Notice that there is a chance that URI characters that are non UTF-8 + * may be parsed as valid UTF-8. A recent non-scientific analysis found + * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a + * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% + * false reading. + *

    + * The percent "%" character always has the reserved purpose of being + * the escape indicator, it must be escaped as "%25" in order to be used + * as data within a URI. + *

    + * The unescape method is internally performed within this method. + * + * @param component the URI character sequence + * @param charset the protocol charset + * @return original character sequence + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * + * @since 3.0 + */ + protected static String decode(String component, String charset) + throws URIException { + if (component == null) { + throw new IllegalArgumentException("Component array of chars may not be null"); + } + byte[] rawdata = null; + try { + rawdata = URLCodec.decodeUrl(component.getBytes(StandardCharsets.US_ASCII)); + } catch (DecoderException e) { + throw new URIException(e.getMessage()); + } + try { + Charset cs = Charset.forName(charset); + return new String(rawdata, cs); + } catch (IllegalCharsetNameException e) { + return new String(rawdata, StandardCharsets.US_ASCII); + } + } + + /** + * Pre-validate the unescaped URI string within a specific component. + * + * @param component the component string within the component + * @param disallowed those characters disallowed within the component + * @return if true, it doesn't have the disallowed characters + * if false, the component is undefined or an incorrect one + */ + protected boolean prevalidate(String component, BitSet disallowed) { + // prevalidate the given component by disallowed characters + if (component == null) { + return false; // undefined + } + char[] target = component.toCharArray(); + for (int i = 0; i < target.length; i++) { + if (disallowed.get(target[i])) { + return false; + } + } + return true; + } + + + /** + * Validate the URI characters within a specific component. + * The component must be performed after escape encoding. Or it doesn't + * include escaped characters. + * + * @param component the characters sequence within the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + */ + protected boolean validate(char[] component, BitSet generous) { + // validate each component by generous characters + return validate(component, 0, -1, generous); + } + + + /** + * Validate the URI characters within a specific component. + * The component must be performed after escape encoding. Or it doesn't + * include escaped characters. + *

    + * It's not that much strict, generous. The strict validation might be + * performed before being called this method. + * + * @param component the characters sequence within the component + * @param soffset the starting offset of the given component + * @param eoffset the ending offset of the given component + * if -1, it means the length of the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + */ + protected boolean validate(char[] component, int soffset, int eoffset, + BitSet generous) { + // validate each component by generous characters + if (eoffset == -1) { + eoffset = component.length - 1; + } + for (int i = soffset; i <= eoffset; i++) { + if (!generous.get(component[i])) { + return false; + } + } + return true; + } + + + /** + * In order to avoid any possilbity of conflict with non-ASCII characters, + * Parse a URI reference as a String with the character + * encoding of the local system or the document. + *

    + * The following line is the regular expression for breaking-down a URI + * reference into its components. + *

    +     *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +     *    12            3  4          5       6  7        8 9
    +     * 

    + * For example, matching the above expression to + * http://jakarta.apache.org/ietf/uri/#Related + * results in the following subexpression matches: + *

    +     *               $1 = http:
    +     *  scheme    =  $2 = http
    +     *               $3 = //jakarta.apache.org
    +     *  authority =  $4 = jakarta.apache.org
    +     *  path      =  $5 = /ietf/uri/
    +     *               $6 = [undefined]
    +     *  query     =  $7 = [undefined]
    +     *               $8 = #Related
    +     *  fragment  =  $9 = Related
    +     * 

    + * + * @param original the original character sequence + * @param escaped true if original is escaped + * @throws URIException If an error occurs. + */ + protected void parseUriReference(String original, boolean escaped) + throws URIException { + + // validate and contruct the URI character sequence + if (original == null) { + throw new URIException("URI-Reference required"); + } + + /* @ + * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + */ + String tmp = original.trim(); + + /* + * The length of the string sequence of characters. + * It may not be equal to the length of the byte array. + */ + int length = tmp.length(); + + /* + * Remove the delimiters like angle brackets around an URI. + */ + if (length > 0) { + char[] firstDelimiter = { tmp.charAt(0) }; + if (validate(firstDelimiter, delims)) { + if (length >= 2) { + char[] lastDelimiter = { tmp.charAt(length - 1) }; + if (validate(lastDelimiter, delims)) { + tmp = tmp.substring(1, length - 1); + length = length - 2; + } + } + } + } + + /* + * The starting index + */ + int from = 0; + + /* + * The test flag whether the URI is started from the path component. + */ + boolean isStartedFromPath = false; + int atColon = tmp.indexOf(':'); + int atSlash = tmp.indexOf('/'); + if ((atColon <= 0 && !tmp.startsWith("//")) + || (atSlash >= 0 && atSlash < atColon)) { + isStartedFromPath = true; + } + + /* + *

    +         *     @@@@@@@@
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); + if (at == -1) { + at = 0; + } + + /* + * Parse the scheme. + *

    +         *  scheme    =  $2 = http
    +         *              @
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + if (at > 0 && at < length && tmp.charAt(at) == ':') { + char[] target = tmp.substring(0, at).toLowerCase(Locale.ROOT).toCharArray(); + if (validate(target, scheme)) { + _scheme = target; + } else { + throw new URIException("incorrect scheme"); + } + from = ++at; + } + + /* + * Parse the authority component. + *

    +         *  authority =  $4 = jakarta.apache.org
    +         *                  @@
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + // Reset flags + _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; + if (0 <= at && at < length && tmp.charAt(at) == '/') { + // Set flag + _is_hier_part = true; + if (at + 2 < length && tmp.charAt(at + 1) == '/' + && !isStartedFromPath) { + // the temporary index to start the search from + int next = indexFirstOf(tmp, "/?#", at + 2); + if (next == -1) { + next = (tmp.substring(at + 2).length() == 0) ? at + 2 + : tmp.length(); + } + parseAuthority(tmp.substring(at + 2, next), escaped); + from = at = next; + // Set flag + _is_net_path = true; + } + if (from == at) { + // Set flag + _is_abs_path = true; + } + } + + /* + * Parse the path component. + *

    +         *  path      =  $5 = /ietf/uri/
    +         *                                @@@@@@
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + if (from < length) { + // rel_path = rel_segment [ abs_path ] + int next = indexFirstOf(tmp, "?#", from); + if (next == -1) { + next = tmp.length(); + } + if (!_is_abs_path) { + if (!escaped + && prevalidate(tmp.substring(from, next), disallowed_rel_path) + || escaped + && validate(tmp.substring(from, next).toCharArray(), rel_path)) { + // Set flag + _is_rel_path = true; + } else if (!escaped + && prevalidate(tmp.substring(from, next), disallowed_opaque_part) + || escaped + && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { + // Set flag + _is_opaque_part = true; + } else { + // the path component may be empty + _path = null; + } + } + String s = tmp.substring(from, next); + if (escaped) { + setRawPath(s.toCharArray()); + } else { + setPath(s); + } + at = next; + } + + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + /* + * Parse the query component. + *

    +         *  query     =  $7 = 
    +         *                                        @@@@@@@@@
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { + int next = tmp.indexOf('#', at + 1); + if (next == -1) { + next = tmp.length(); + } + if (escaped) { + _query = tmp.substring(at + 1, next).toCharArray(); + if (!validate(_query, uric)) { + throw new URIException("Invalid query"); + } + } else { + _query = encode(tmp.substring(at + 1, next), allowed_query, charset); + } + at = next; + } + + /* + * Parse the fragment component. + *

    +         *  fragment  =  $9 = Related
    +         *                                                   @@@@@@@@
    +         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
    +         * 

    + */ + if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { + if (at + 1 == length) { // empty fragment + _fragment = "".toCharArray(); + } else { + _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() + : encode(tmp.substring(at + 1), allowed_fragment, charset); + } + } + + // set this URI. + setURI(); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims) { + return indexFirstOf(s, delims, -1); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @param offset the from index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims, int offset) { + if (s == null || s.length() == 0) { + return -1; + } + if (delims == null || delims.length() == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length()) { + return -1; + } + // s is never null + int min = s.length(); + char[] delim = delims.toCharArray(); + for (int i = 0; i < delim.length; i++) { + int at = s.indexOf(delim[i], offset); + if (at >= 0 && at < min) { + min = at; + } + } + return (min == s.length()) ? -1 : min; + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @return the ealier index if there are a delimiter + */ + protected int indexFirstOf(char[] s, char delim) { + return indexFirstOf(s, delim, 0); + } + + + /** + * Get the earlier index that to be searched for the first occurrance in + * one of any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @param offset The offset. + * @return the ealier index if there is a delimiter + */ + protected int indexFirstOf(char[] s, char delim, int offset) { + if (s == null || s.length == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length) { + return -1; + } + for (int i = offset; i < s.length; i++) { + if (s[i] == delim) { + return i; + } + } + return -1; + } + + + /** + * Parse the authority component. + * + * @param original the original character sequence of authority component + * @param escaped true if original is escaped + * @throws URIException If an error occurs. + */ + protected void parseAuthority(String original, boolean escaped) + throws URIException { + + // Reset flags + _is_reg_name = _is_server = + _is_hostname = _is_IPv4address = _is_IPv6reference = false; + + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + boolean hasPort = true; + int from = 0; + int next = original.indexOf('@'); + if (next != -1) { // neither -1 and 0 + // each protocol extented from URI supports the specific userinfo + _userinfo = (escaped) ? original.substring(0, next).toCharArray() + : encode(original.substring(0, next), allowed_userinfo, + charset); + from = next + 1; + } + next = original.indexOf('[', from); + if (next >= from) { + next = original.indexOf(']', from); + if (next == -1) { + throw new URIException(URIException.PARSING, "IPv6reference"); + } else { + next++; + } + // In IPv6reference, '[', ']' should be excluded + _host = (escaped) ? original.substring(from, next).toCharArray() + : encode(original.substring(from, next), allowed_IPv6reference, + charset); + // Set flag + _is_IPv6reference = true; + } else { // only for !_is_IPv6reference + next = original.indexOf(':', from); + if (next == -1) { + next = original.length(); + hasPort = false; + } + // REMINDME: it doesn't need the pre-validation + _host = original.substring(from, next).toCharArray(); + if (validate(_host, IPv4address)) { + // Set flag + _is_IPv4address = true; + } else if (validate(_host, hostname)) { + // Set flag + _is_hostname = true; + } else { + // Set flag + _is_reg_name = true; + } + } + if (_is_reg_name) { + // Reset flags for a server-based naming authority + _is_server = _is_hostname = _is_IPv4address = + _is_IPv6reference = false; + // set a registry-based naming authority + if (escaped) { + _authority = original.toCharArray(); + if (!validate(_authority, reg_name)) { + throw new URIException("Invalid authority"); + } + } else { + _authority = encode(original, allowed_reg_name, charset); + } + } else { + if (original.length() - 1 > next && hasPort + && original.charAt(next) == ':') { // not empty + from = next + 1; + try { + _port = Integer.parseInt(original.substring(from)); + } catch (NumberFormatException error) { + throw new URIException(URIException.PARSING, + "invalid port number"); + } + } + // set a server-based naming authority + StringBuffer buf = new StringBuffer(); + if (_userinfo != null) { // has_userinfo + buf.append(_userinfo); + buf.append('@'); + } + if (_host != null) { + buf.append(_host); + if (_port != -1) { + buf.append(':'); + buf.append(_port); + } + } + _authority = buf.toString().toCharArray(); + // Set flag + _is_server = true; + } + } + + + /** + * Once it's parsed successfully, set this URI. + * + * @see #getRawURI + */ + protected void setURI() { + // set _uri + StringBuffer buf = new StringBuffer(); + // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + buf.append(_authority); + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + // ignore the fragment identifier + _uri = buf.toString().toCharArray(); + hash = 0; + } + + // ----------------------------------------------------------- Test methods + + + /** + * Tell whether or not this URI is absolute. + * + * @return true iif this URI is absoluteURI + */ + public boolean isAbsoluteURI() { + return (_scheme != null); + } + + + /** + * Tell whether or not this URI is relative. + * + * @return true iif this URI is relativeURI + */ + public boolean isRelativeURI() { + return (_scheme == null); + } + + + /** + * Tell whether or not the absoluteURI of this URI is hier_part. + * + * @return true iif the absoluteURI is hier_part + */ + public boolean isHierPart() { + return _is_hier_part; + } + + + /** + * Tell whether or not the absoluteURI of this URI is opaque_part. + * + * @return true iif the absoluteURI is opaque_part + */ + public boolean isOpaquePart() { + return _is_opaque_part; + } + + + /** + * Tell whether or not the relativeURI or heir_part of this URI is net_path. + * It's the same function as the has_authority() method. + * + * @return true iif the relativeURI or heir_part is net_path + * @see #hasAuthority + */ + public boolean isNetPath() { + return _is_net_path || (_authority != null); + } + + + /** + * Tell whether or not the relativeURI or hier_part of this URI is abs_path. + * + * @return true iif the relativeURI or hier_part is abs_path + */ + public boolean isAbsPath() { + return _is_abs_path; + } + + + /** + * Tell whether or not the relativeURI of this URI is rel_path. + * + * @return true iif the relativeURI is rel_path + */ + public boolean isRelPath() { + return _is_rel_path; + } + + + /** + * Tell whether or not this URI has authority. + * It's the same function as the is_net_path() method. + * + * @return true iif this URI has authority + * @see #isNetPath + */ + public boolean hasAuthority() { + return (_authority != null) || _is_net_path; + } + + /** + * Tell whether or not the authority component of this URI is reg_name. + * + * @return true iif the authority component is reg_name + */ + public boolean isRegName() { + return _is_reg_name; + } + + + /** + * Tell whether or not the authority component of this URI is server. + * + * @return true iif the authority component is server + */ + public boolean isServer() { + return _is_server; + } + + + /** + * Tell whether or not this URI has userinfo. + * + * @return true iif this URI has userinfo + */ + public boolean hasUserinfo() { + return (_userinfo != null); + } + + + /** + * Tell whether or not the host part of this URI is hostname. + * + * @return true iif the host part is hostname + */ + public boolean isHostname() { + return _is_hostname; + } + + + /** + * Tell whether or not the host part of this URI is IPv4address. + * + * @return true iif the host part is IPv4address + */ + public boolean isIPv4address() { + return _is_IPv4address; + } + + + /** + * Tell whether or not the host part of this URI is IPv6reference. + * + * @return true iif the host part is IPv6reference + */ + public boolean isIPv6reference() { + return _is_IPv6reference; + } + + + /** + * Tell whether or not this URI has query. + * + * @return true iif this URI has query + */ + public boolean hasQuery() { + return (_query != null); + } + + + /** + * Tell whether or not this URI has fragment. + * + * @return true iif this URI has fragment + */ + public boolean hasFragment() { + return (_fragment != null); + } + + + // ---------------------------------------------------------------- Charset + + + /** + * Set the default charset of the protocol. + *

    + * The character set used to store files SHALL remain a local decision and + * MAY depend on the capability of local operating systems. Prior to the + * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format + * and UTF-8 encoded. This approach, while allowing international exchange + * of URIs, will still allow backward compatibility with older systems + * because the code set positions for ASCII characters are identical to the + * one byte sequence in UTF-8. + *

    + * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + * + *

    + * Always all the time, the setter method is always succeeded and throws + * DefaultCharsetChanged exception. + * + * So API programmer must follow the following way: + *

    +     *  import org.apache.util.URI$DefaultCharsetChanged;
    +     *      .
    +     *      .
    +     *      .
    +     *  try {
    +     *      URI.setDefaultProtocolCharset("UTF-8");
    +     *  } catch (DefaultCharsetChanged cc) {
    +     *      // CASE 1: the exception could be ignored, when it is set by user
    +     *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
    +     *      // CASE 2: let user know the default protocol charset changed
    +     *      } else {
    +     *      // CASE 2: let user know the default document charset changed
    +     *      }
    +     *  }
    +     *  
    + * + * The API programmer is responsible to set the correct charset. + * And each application should remember its own charset to support. + * + * @param charset the default charset for each protocol + * @throws DefaultCharsetChanged default charset changed + */ + public static void setDefaultProtocolCharset(String charset) + throws DefaultCharsetChanged { + + defaultProtocolCharset = charset; + throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET, + "the default protocol charset changed"); + } + + + /** + * Get the default charset of the protocol. + *

    + * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + *

    + * To work globally either requires support of a number of character sets + * and to be able to convert between them, or the use of a single preferred + * character set. + * For support of global compatibility it is STRONGLY RECOMMENDED that + * clients and servers use UTF-8 encoding when exchanging URIs. + * + * @return the default charset string + */ + public static String getDefaultProtocolCharset() { + return defaultProtocolCharset; + } + + + /** + * Get the protocol charset used by this current URI instance. + * It was set by the constructor for this instance. If it was not set by + * contructor, it will return the default protocol charset. + * + * @return the protocol charset string + * @see #getDefaultProtocolCharset + */ + public String getProtocolCharset() { + return (protocolCharset != null) + ? protocolCharset + : defaultProtocolCharset; + } + + + /** + * Set the default charset of the document. + *

    + * Notice that it will be possible to contain mixed characters (e.g. + * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional + * display of these character sets, the protocol charset could be simply + * used again. Because it's not yet implemented that the insertion of BIDI + * control characters at different points during composition is extracted. + *

    + * + * Always all the time, the setter method is always succeeded and throws + * DefaultCharsetChanged exception. + * + * So API programmer must follow the following way: + *

    +     *  import org.apache.util.URI$DefaultCharsetChanged;
    +     *      .
    +     *      .
    +     *      .
    +     *  try {
    +     *      URI.setDefaultDocumentCharset("EUC-KR");
    +     *  } catch (DefaultCharsetChanged cc) {
    +     *      // CASE 1: the exception could be ignored, when it is set by user
    +     *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
    +     *      // CASE 2: let user know the default document charset changed
    +     *      } else {
    +     *      // CASE 2: let user know the default protocol charset changed
    +     *      }
    +     *  }
    +     *  
    + * + * The API programmer is responsible to set the correct charset. + * And each application should remember its own charset to support. + * + * @param charset the default charset for the document + * @throws DefaultCharsetChanged default charset changed + */ + public static void setDefaultDocumentCharset(String charset) + throws DefaultCharsetChanged { + + defaultDocumentCharset = charset; + throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET, + "the default document charset changed"); + } + + + /** + * Get the recommended default charset of the document. + * + * @return the default charset string + */ + public static String getDefaultDocumentCharset() { + return defaultDocumentCharset; + } + + + /** + * Get the default charset of the document by locale. + * + * @return the default charset string by locale + */ + public static String getDefaultDocumentCharsetByLocale() { + return defaultDocumentCharsetByLocale; + } + + + /** + * Get the default charset of the document by platform. + * + * @return the default charset string by platform + */ + public static String getDefaultDocumentCharsetByPlatform() { + return defaultDocumentCharsetByPlatform; + } + + // ------------------------------------------------------------- The scheme + + /** + * Get the scheme. + * + * @return the scheme + */ + public char[] getRawScheme() { + return _scheme; + } + + + /** + * Get the scheme. + * + * @return the scheme + * null if undefined scheme + */ + public String getScheme() { + return (_scheme == null) ? null : new String(_scheme); + } + + // ---------------------------------------------------------- The authority + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. + *

    +     *   authority     = server | reg_name
    +     * 

    + * + * @param escapedAuthority the raw escaped authority + * @throws URIException If {@link + * #parseAuthority(String,boolean)} fails + * @throws NullPointerException null authority + */ + public void setRawAuthority(char[] escapedAuthority) + throws URIException, NullPointerException { + + parseAuthority(new String(escapedAuthority), true); + setURI(); + } + + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. + * Note that there is no setAuthority method by the escape encoding reason. + * + * @param escapedAuthority the escaped authority string + * @throws URIException If {@link + * #parseAuthority(String,boolean)} fails + */ + public void setEscapedAuthority(String escapedAuthority) + throws URIException { + + parseAuthority(escapedAuthority, true); + setURI(); + } + + + /** + * Get the raw-escaped authority. + * + * @return the raw-escaped authority + */ + public char[] getRawAuthority() { + return _authority; + } + + + /** + * Get the escaped authority. + * + * @return the escaped authority + */ + public String getEscapedAuthority() { + return (_authority == null) ? null : new String(_authority); + } + + + /** + * Get the authority. + * + * @return the authority + * @throws URIException If {@link #decode} fails + */ + public String getAuthority() throws URIException { + return (_authority == null) ? null : decode(_authority, + getProtocolCharset()); + } + + // ----------------------------------------------------------- The userinfo + + /** + * Get the raw-escaped userinfo. + * + * @return the raw-escaped userinfo + * @see #getAuthority + */ + public char[] getRawUserinfo() { + return _userinfo; + } + + + /** + * Get the escaped userinfo. + * + * @return the escaped userinfo + * @see #getAuthority + */ + public String getEscapedUserinfo() { + return (_userinfo == null) ? null : new String(_userinfo); + } + + + /** + * Get the userinfo. + * + * @return the userinfo + * @throws URIException If {@link #decode} fails + * @see #getAuthority + */ + public String getUserinfo() throws URIException { + return (_userinfo == null) ? null : decode(_userinfo, + getProtocolCharset()); + } + + // --------------------------------------------------------------- The host + + /** + * Get the host. + *

    +     *   host          = hostname | IPv4address | IPv6reference
    +     * 

    + * + * @return the host + * @see #getAuthority + */ + public char[] getRawHost() { + return _host; + } + + + /** + * Get the host. + *

    +     *   host          = hostname | IPv4address | IPv6reference
    +     * 

    + * + * @return the host + * @throws URIException If {@link #decode} fails + * @see #getAuthority + */ + public String getHost() throws URIException { + if (_host != null) { + return decode(_host, getProtocolCharset()); + } else { + return null; + } + } + + // --------------------------------------------------------------- The port + + /** + * Get the port. In order to get the specfic default port, the specific + * protocol-supported class extended from the URI class should be used. + * It has the server-based naming authority. + * + * @return the port + * if -1, it has the default port for the scheme or the server-based + * naming authority is not supported in the specific URI. + */ + public int getPort() { + return _port; + } + + // --------------------------------------------------------------- The path + + /** + * Set the raw-escaped path. + * + * @param escapedPath the path character sequence + * @throws URIException encoding error or not proper for initial instance + * @see #encode + */ + public void setRawPath(char[] escapedPath) throws URIException { + if (escapedPath == null || escapedPath.length == 0) { + _path = _opaque = escapedPath; + setURI(); + return; + } + // remove the fragment identifier + escapedPath = removeFragmentIdentifier(escapedPath); + if (_is_net_path || _is_abs_path) { + if (escapedPath[0] != '/') { + throw new URIException(URIException.PARSING, + "not absolute path"); + } + if (!validate(escapedPath, abs_path)) { + throw new URIException(URIException.ESCAPING, + "escaped absolute path not valid"); + } + _path = escapedPath; + } else if (_is_rel_path) { + int at = indexFirstOf(escapedPath, '/'); + if (at == 0) { + throw new URIException(URIException.PARSING, "incorrect path"); + } + if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) + && !validate(escapedPath, at, -1, abs_path) + || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) { + + throw new URIException(URIException.ESCAPING, + "escaped relative path not valid"); + } + _path = escapedPath; + } else if (_is_opaque_part) { + if (!uric_no_slash.get(escapedPath[0]) + && !validate(escapedPath, 1, -1, uric)) { + throw new URIException(URIException.ESCAPING, + "escaped opaque part not valid"); + } + _opaque = escapedPath; + } else { + throw new URIException(URIException.PARSING, "incorrect path"); + } + setURI(); + } + + + /** + * Set the escaped path. + * + * @param escapedPath the escaped path string + * @throws URIException encoding error or not proper for initial instance + * @see #encode + */ + public void setEscapedPath(String escapedPath) throws URIException { + if (escapedPath == null) { + _path = _opaque = null; + setURI(); + return; + } + setRawPath(escapedPath.toCharArray()); + } + + + /** + * Set the path. + * + * @param path the path string + * @throws URIException set incorrectly or fragment only + * @see #encode + */ + public void setPath(String path) throws URIException { + + if (path == null || path.length() == 0) { + _path = _opaque = (path == null) ? null : path.toCharArray(); + setURI(); + return; + } + // set the charset to do escape encoding + String charset = getProtocolCharset(); + + if (_is_net_path || _is_abs_path) { + _path = encode(path, allowed_abs_path, charset); + } else if (_is_rel_path) { + StringBuffer buff = new StringBuffer(path.length()); + int at = path.indexOf('/'); + if (at == 0) { // never 0 + throw new URIException(URIException.PARSING, + "incorrect relative path"); + } + if (at > 0) { + buff.append(encode(path.substring(0, at), allowed_rel_path, + charset)); + buff.append(encode(path.substring(at), allowed_abs_path, + charset)); + } else { + buff.append(encode(path, allowed_rel_path, charset)); + } + _path = buff.toString().toCharArray(); + } else if (_is_opaque_part) { + StringBuffer buf = new StringBuffer(); + buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset)); + buf.insert(1, encode(path.substring(1), uric, charset)); + _opaque = buf.toString().toCharArray(); + } else { + throw new URIException(URIException.PARSING, "incorrect path"); + } + setURI(); + } + + + /** + * Resolve the base and relative path. + * + * @param basePath a character array of the basePath + * @param relPath a character array of the relPath + * @return the resolved path + * @throws URIException no more higher path level to be resolved + */ + protected char[] resolvePath(char[] basePath, char[] relPath) + throws URIException { + + // REMINDME: paths are never null + String base = (basePath == null) ? "" : new String(basePath); + + // _path could be empty + if (relPath == null || relPath.length == 0) { + return normalize(basePath); + } else if (relPath[0] == '/') { + return normalize(relPath); + } else { + int at = base.lastIndexOf('/'); + if (at != -1) { + basePath = base.substring(0, at + 1).toCharArray(); + } + StringBuffer buff = new StringBuffer(base.length() + + relPath.length); + buff.append((at != -1) ? base.substring(0, at + 1) : "/"); + buff.append(relPath); + return normalize(buff.toString().toCharArray()); + } + } + + + /** + * Get the raw-escaped current hierarchy level in the given path. + * If the last namespace is a collection, the slash mark ('/') should be + * ended with at the last character of the path string. + * + * @param path the path + * @return the current hierarchy level + * @throws URIException no hierarchy level + */ + protected char[] getRawCurrentHierPath(char[] path) throws URIException { + + if (_is_opaque_part) { + throw new URIException(URIException.PARSING, "no hierarchy level"); + } + if (path == null) { + throw new URIException(URIException.PARSING, "empty path"); + } + String buff = new String(path); + int first = buff.indexOf('/'); + int last = buff.lastIndexOf('/'); + if (last == 0) { + return rootPath; + } else if (first != last && last != -1) { + return buff.substring(0, last).toCharArray(); + } + // FIXME: it could be a document on the server side + return path; + } + + + /** + * Get the raw-escaped current hierarchy level. + * + * @return the raw-escaped current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public char[] getRawCurrentHierPath() throws URIException { + return (_path == null) ? null : getRawCurrentHierPath(_path); + } + + + /** + * Get the escaped current hierarchy level. + * + * @return the escaped current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public String getEscapedCurrentHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the current hierarchy level. + * + * @return the current hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + * @see #decode + */ + public String getCurrentHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public char[] getRawAboveHierPath() throws URIException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : getRawCurrentHierPath(path); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + */ + public String getEscapedAboveHierPath() throws URIException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the level above the this hierarchy level. + * + * @return the above hierarchy level + * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. + * @see #decode + */ + public String getAboveHierPath() throws URIException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the raw-escaped path. + *

    +     *   path          = [ abs_path | opaque_part ]
    +     * 

    + * + * @return the raw-escaped path + */ + public char[] getRawPath() { + return _is_opaque_part ? _opaque : _path; + } + + + /** + * Get the escaped path. + *

    +     *   path          = [ abs_path | opaque_part ]
    +     *   abs_path      = "/"  path_segments 
    +     *   opaque_part   = uric_no_slash *uric
    +     * 

    + * + * @return the escaped path string + */ + public String getEscapedPath() { + char[] path = getRawPath(); + return (path == null) ? null : new String(path); + } + + + /** + * Get the path. + *

    +     *   path          = [ abs_path | opaque_part ]
    +     * 

    + * @return the path string + * @throws URIException If {@link #decode} fails. + * @see #decode + */ + public String getPath() throws URIException { + char[] path = getRawPath(); + return (path == null) ? null : decode(path, getProtocolCharset()); + } + + + /** + * Get the raw-escaped basename of the path. + * + * @return the raw-escaped basename + */ + public char[] getRawName() { + if (_path == null) { + return null; + } + + int at = 0; + for (int i = _path.length - 1; i >= 0; i--) { + if (_path[i] == '/') { + at = i + 1; + break; + } + } + int len = _path.length - at; + char[] basename = new char[len]; + System.arraycopy(_path, at, basename, 0, len); + return basename; + } + + + /** + * Get the escaped basename of the path. + * + * @return the escaped basename string + */ + public String getEscapedName() { + char[] basename = getRawName(); + return (basename == null) ? null : new String(basename); + } + + + /** + * Get the basename of the path. + * + * @return the basename string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getName() throws URIException { + char[] basename = getRawName(); + return (basename == null) ? null : decode(getRawName(), + getProtocolCharset()); + } + + // ----------------------------------------------------- The path and query + + /** + * Get the raw-escaped path and query. + * + * @return the raw-escaped path and query + */ + public char[] getRawPathQuery() { + + if (_path == null && _query == null) { + return null; + } + StringBuffer buff = new StringBuffer(); + if (_path != null) { + buff.append(_path); + } + if (_query != null) { + buff.append('?'); + buff.append(_query); + } + return buff.toString().toCharArray(); + } + + + /** + * Get the escaped query. + * + * @return the escaped path and query string + */ + public String getEscapedPathQuery() { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : new String(rawPathQuery); + } + + + /** + * Get the path and query. + * + * @return the path and query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getPathQuery() throws URIException { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : decode(rawPathQuery, + getProtocolCharset()); + } + + // -------------------------------------------------------------- The query + + /** + * Set the raw-escaped query. + * + * @param escapedQuery the raw-escaped query + * @throws URIException escaped query not valid + */ + public void setRawQuery(char[] escapedQuery) throws URIException { + if (escapedQuery == null || escapedQuery.length == 0) { + _query = escapedQuery; + setURI(); + return; + } + // remove the fragment identifier + escapedQuery = removeFragmentIdentifier(escapedQuery); + if (!validate(escapedQuery, query)) { + throw new URIException(URIException.ESCAPING, + "escaped query not valid"); + } + _query = escapedQuery; + setURI(); + } + + + /** + * Set the escaped query string. + * + * @param escapedQuery the escaped query string + * @throws URIException escaped query not valid + */ + public void setEscapedQuery(String escapedQuery) throws URIException { + if (escapedQuery == null) { + _query = null; + setURI(); + return; + } + setRawQuery(escapedQuery.toCharArray()); + } + + + /** + * Set the query. + *

    + * When a query string is not misunderstood the reserved special characters + * ("&", "=", "+", ",", and "$") within a query component, it is + * recommended to use in encoding the whole query with this method. + *

    + * The additional APIs for the special purpose using by the reserved + * special characters used in each protocol are implemented in each protocol + * classes inherited from URI. So refer to the same-named APIs + * implemented in each specific protocol instance. + * + * @param query the query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #encode + */ + public void setQuery(String query) throws URIException { + if (query == null || query.length() == 0) { + _query = (query == null) ? null : query.toCharArray(); + setURI(); + return; + } + setRawQuery(encode(query, allowed_query, getProtocolCharset())); + } + + + /** + * Get the raw-escaped query. + * + * @return the raw-escaped query + */ + public char[] getRawQuery() { + return _query; + } + + + /** + * Get the escaped query. + * + * @return the escaped query string + */ + public String getEscapedQuery() { + return (_query == null) ? null : new String(_query); + } + + + /** + * Get the query. + * + * @return the query string. + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getQuery() throws URIException { + return (_query == null) ? null : decode(_query, getProtocolCharset()); + } + + // ----------------------------------------------------------- The fragment + + /** + * Set the raw-escaped fragment. + * + * @param escapedFragment the raw-escaped fragment + * @throws URIException escaped fragment not valid + */ + public void setRawFragment(char[] escapedFragment) throws URIException { + if (escapedFragment == null || escapedFragment.length == 0) { + _fragment = escapedFragment; + hash = 0; + return; + } + if (!validate(escapedFragment, fragment)) { + throw new URIException(URIException.ESCAPING, + "escaped fragment not valid"); + } + _fragment = escapedFragment; + hash = 0; + } + + + /** + * Set the escaped fragment string. + * + * @param escapedFragment the escaped fragment string + * @throws URIException escaped fragment not valid + */ + public void setEscapedFragment(String escapedFragment) throws URIException { + if (escapedFragment == null) { + _fragment = null; + hash = 0; + return; + } + setRawFragment(escapedFragment.toCharArray()); + } + + + /** + * Set the fragment. + * + * @param fragment the fragment string. + * @throws URIException If an error occurs. + */ + public void setFragment(String fragment) throws URIException { + if (fragment == null || fragment.length() == 0) { + _fragment = (fragment == null) ? null : fragment.toCharArray(); + hash = 0; + return; + } + _fragment = encode(fragment, allowed_fragment, getProtocolCharset()); + hash = 0; + } + + + /** + * Get the raw-escaped fragment. + *

    + * The optional fragment identifier is not part of a URI, but is often used + * in conjunction with a URI. + *

    + * The format and interpretation of fragment identifiers is dependent on + * the media type [RFC2046] of the retrieval result. + *

    + * A fragment identifier is only meaningful when a URI reference is + * intended for retrieval and the result of that retrieval is a document + * for which the identified fragment is consistently defined. + * + * @return the raw-escaped fragment + */ + public char[] getRawFragment() { + return _fragment; + } + + + /** + * Get the escaped fragment. + * + * @return the escaped fragment string + */ + public String getEscapedFragment() { + return (_fragment == null) ? null : new String(_fragment); + } + + + /** + * Get the fragment. + * + * @return the fragment string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getFragment() throws URIException { + return (_fragment == null) ? null : decode(_fragment, + getProtocolCharset()); + } + + // ------------------------------------------------------------- Utilities + + /** + * Remove the fragment identifier of the given component. + * + * @param component the component that a fragment may be included + * @return the component that the fragment identifier is removed + */ + protected char[] removeFragmentIdentifier(char[] component) { + if (component == null) { + return null; + } + int lastIndex = new String(component).indexOf('#'); + if (lastIndex != -1) { + component = new String(component).substring(0, + lastIndex).toCharArray(); + } + return component; + } + + + /** + * Normalize the given hier path part. + * + *

    Algorithm taken from URI reference parser at + * http://www.apache.org/~fielding/uri/rev-2002/issues.html. + * + * @param path the path to normalize + * @return the normalized path + * @throws URIException no more higher path level to be normalized + */ + protected char[] normalize(char[] path) throws URIException { + + if (path == null) { + return null; + } + + String normalized = new String(path); + + // If the buffer begins with "./" or "../", the "." or ".." is removed. + if (normalized.startsWith("./")) { + normalized = normalized.substring(1); + } else if (normalized.startsWith("../")) { + normalized = normalized.substring(2); + } else if (normalized.startsWith("..")) { + normalized = normalized.substring(2); + } + + // All occurrences of "/./" in the buffer are replaced with "/" + int index = -1; + while ((index = normalized.indexOf("/./")) != -1) { + normalized = normalized.substring(0, index) + normalized.substring(index + 2); + } + + // If the buffer ends with "/.", the "." is removed. + if (normalized.endsWith("/.")) { + normalized = normalized.substring(0, normalized.length() - 1); + } + + int startIndex = 0; + + // All occurrences of "//../" in the buffer, where ".." + // and are complete path segments, are iteratively replaced + // with "/" in order from left to right until no matching pattern remains. + // If the buffer ends with "//..", that is also replaced + // with "/". Note that may be empty. + while ((index = normalized.indexOf("/../", startIndex)) != -1) { + int slashIndex = normalized.lastIndexOf('/', index - 1); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3); + } else { + startIndex = index + 3; + } + } + if (normalized.endsWith("/..")) { + int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); + if (slashIndex >= 0) { + normalized = normalized.substring(0, slashIndex + 1); + } + } + + // All prefixes of "/../" in the buffer, where ".." + // and are complete path segments, are iteratively replaced + // with "/" in order from left to right until no matching pattern remains. + // If the buffer ends with "/..", that is also replaced + // with "/". Note that may be empty. + while ((index = normalized.indexOf("/../")) != -1) { + int slashIndex = normalized.lastIndexOf('/', index - 1); + if (slashIndex >= 0) { + break; + } else { + normalized = normalized.substring(index + 3); + } + } + if (normalized.endsWith("/..")) { + int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); + if (slashIndex < 0) { + normalized = "/"; + } + } + + return normalized.toCharArray(); + } + + + /** + * Normalizes the path part of this URI. Normalization is only meant to be performed on + * URIs with an absolute path. Calling this method on a relative path URI will have no + * effect. + * + * @throws URIException no more higher path level to be normalized + * + * @see #isAbsPath() + */ + public void normalize() throws URIException { + if (isAbsPath()) { + _path = normalize(_path); + setURI(); + } + } + + + /** + * Test if the first array is equal to the second array. + * + * @param first the first character array + * @param second the second character array + * @return true if they're equal + */ + protected boolean equals(char[] first, char[] second) { + + if (first == null && second == null) { + return true; + } + if (first == null || second == null) { + return false; + } + if (first.length != second.length) { + return false; + } + for (int i = 0; i < first.length; i++) { + if (first[i] != second[i]) { + return false; + } + } + return true; + } + + + /** + * Test an object if this URI is equal to another. + * + * @param obj an object to compare + * @return true if two URI objects are equal + */ + public boolean equals(Object obj) { + + // normalize and test each components + if (obj == this) { + return true; + } + if (!(obj instanceof URI)) { + return false; + } + URI another = (URI) obj; + // scheme + if (!equals(_scheme, another._scheme)) { + return false; + } + // is_opaque_part or is_hier_part? and opaque + if (!equals(_opaque, another._opaque)) { + return false; + } + // is_hier_part + // has_authority + if (!equals(_authority, another._authority)) { + return false; + } + // path + if (!equals(_path, another._path)) { + return false; + } + // has_query + if (!equals(_query, another._query)) { + return false; + } + // has_fragment? should be careful of the only fragment case. + if (!equals(_fragment, another._fragment)) { + return false; + } + return true; + } + + // ---------------------------------------------------------- Serialization + + /** + * Write the content of this URI. + * + * @param oos the object-output stream + * @throws IOException If an IO problem occurs. + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + + oos.defaultWriteObject(); + } + + + /** + * Read a URI. + * + * @param ois the object-input stream + * @throws ClassNotFoundException If one of the classes specified in the + * input stream cannot be found. + * @throws IOException If an IO problem occurs. + */ + private void readObject(ObjectInputStream ois) + throws ClassNotFoundException, IOException { + + ois.defaultReadObject(); + } + + // -------------------------------------------------------------- Hash code + + /** + * Return a hash code for this URI. + * + * @return a has code value for this URI + */ + public int hashCode() { + if (hash == 0) { + char[] c = _uri; + if (c != null) { + for (int i = 0, len = c.length; i < len; i++) { + hash = 31 * hash + c[i]; + } + } + c = _fragment; + if (c != null) { + for (int i = 0, len = c.length; i < len; i++) { + hash = 31 * hash + c[i]; + } + } + } + return hash; + } + + // ------------------------------------------------------------- Comparison + + /** + * Compare this URI to another object. + * + * @param obj the object to be compared. + * @return 0, if it's same, + * -1, if failed, first being compared with in the authority component + * @throws ClassCastException not URI argument + */ + public int compareTo(Object obj) throws ClassCastException { + + URI another = (URI) obj; + if (!equals(_authority, another.getRawAuthority())) { + return -1; + } + return toString().compareTo(another.toString()); + } + + // ------------------------------------------------------------------ Clone + + /** + * Create and return a copy of this object, the URI-reference containing + * the userinfo component. Notice that the whole URI-reference including + * the userinfo component counld not be gotten as a String. + *

    + * To copy the identical URI object including the userinfo + * component, it should be used. + * + * @return a clone of this instance + */ + public synchronized Object clone() throws CloneNotSupportedException { + + URI instance = (URI) super.clone(); + + instance._uri = _uri; + instance._scheme = _scheme; + instance._opaque = _opaque; + instance._authority = _authority; + instance._userinfo = _userinfo; + instance._host = _host; + instance._port = _port; + instance._path = _path; + instance._query = _query; + instance._fragment = _fragment; + // the charset to do escape encoding for this instance + instance.protocolCharset = protocolCharset; + // flags + instance._is_hier_part = _is_hier_part; + instance._is_opaque_part = _is_opaque_part; + instance._is_net_path = _is_net_path; + instance._is_abs_path = _is_abs_path; + instance._is_rel_path = _is_rel_path; + instance._is_reg_name = _is_reg_name; + instance._is_server = _is_server; + instance._is_hostname = _is_hostname; + instance._is_IPv4address = _is_IPv4address; + instance._is_IPv6reference = _is_IPv6reference; + + return instance; + } + + // ------------------------------------------------------------ Get the URI + + /** + * It can be gotten the URI character sequence. It's raw-escaped. + * For the purpose of the protocol to be transported, it will be useful. + *

    + * It is clearly unwise to use a URL that contains a password which is + * intended to be secret. In particular, the use of a password within + * the 'userinfo' component of a URL is strongly disrecommended except + * in those rare cases where the 'password' parameter is intended to be + * public. + *

    + * When you want to get each part of the userinfo, you need to use the + * specific methods in the specific URL. It depends on the specific URL. + * + * @return the URI character sequence + */ + public char[] getRawURI() { + return _uri; + } + + + /** + * It can be gotten the URI character sequence. It's escaped. + * For the purpose of the protocol to be transported, it will be useful. + * + * @return the escaped URI string + */ + public String getEscapedURI() { + return (_uri == null) ? null : new String(_uri); + } + + + /** + * It can be gotten the URI character sequence. + * + * @return the original URI string + * @throws URIException incomplete trailing escape pattern or unsupported + * character encoding + * @see #decode + */ + public String getURI() throws URIException { + return (_uri == null) ? null : decode(_uri, getProtocolCharset()); + } + + + /** + * Get the URI reference character sequence. + * + * @return the URI reference character sequence + */ + public char[] getRawURIReference() { + if (_fragment == null) { + return _uri; + } + if (_uri == null) { + return _fragment; + } + // if _uri != null && _fragment != null + String uriReference = new String(_uri) + "#" + new String(_fragment); + return uriReference.toCharArray(); + } + + + /** + * Get the escaped URI reference string. + * + * @return the escaped URI reference string + */ + public String getEscapedURIReference() { + char[] uriReference = getRawURIReference(); + return (uriReference == null) ? null : new String(uriReference); + } + + + /** + * Get the original URI reference string. + * + * @return the original URI reference string + * @throws URIException If {@link #decode} fails. + */ + public String getURIReference() throws URIException { + char[] uriReference = getRawURIReference(); + return (uriReference == null) ? null : decode(uriReference, + getProtocolCharset()); + } + + + /** + * Get the escaped URI string. + *

    + * On the document, the URI-reference form is only used without the userinfo + * component like http://jakarta.apache.org/ by the security reason. + * But the URI-reference form with the userinfo component could be parsed. + *

    + * In other words, this URI and any its subclasses must not expose the + * URI-reference expression with the userinfo component like + * http://user:password@hostport/restricted_zone.
    + * It means that the API client programmer should extract each user and + * password to access manually. Probably it will be supported in the each + * subclass, however, not a whole URI-reference expression. + * + * @return the escaped URI string + * @see #clone() + */ + public String toString() { + return getEscapedURI(); + } + + + // ------------------------------------------------------------ Inner class + + /** + * The charset-changed normal operation to represent to be required to + * alert to user the fact the default charset is changed. + */ + public static class DefaultCharsetChanged extends RuntimeException { + + // ------------------------------------------------------- constructors + + /** + * The constructor with a reason string and its code arguments. + * + * @param reasonCode the reason code + * @param reason the reason + */ + public DefaultCharsetChanged(int reasonCode, String reason) { + super(reason); + this.reason = reason; + this.reasonCode = reasonCode; + } + + // ---------------------------------------------------------- constants + + /** No specified reason code. */ + public static final int UNKNOWN = 0; + + /** Protocol charset changed. */ + public static final int PROTOCOL_CHARSET = 1; + + /** Document charset changed. */ + public static final int DOCUMENT_CHARSET = 2; + + // ------------------------------------------------- instance variables + + /** The reason code. */ + private int reasonCode; + + /** The reason message. */ + private String reason; + + // ------------------------------------------------------------ methods + + /** + * Get the reason code. + * + * @return the reason code + */ + public int getReasonCode() { + return reasonCode; + } + + /** + * Get the reason message. + * + * @return the reason message + */ + public String getReason() { + return reason; + } + + } + + + /** + * A mapping to determine the (somewhat arbitrarily) preferred charset for a + * given locale. Supports all locales recognized in JDK 1.1. + *

    + * The distribution of this class is Servlets.com. It was originally + * written by Jason Hunter [jhunter at acm.org] and used by with permission. + */ + public static class LocaleToCharsetMap { + + /** A mapping of language code to charset */ + private static final Hashtable LOCALE_TO_CHARSET_MAP; + static { + LOCALE_TO_CHARSET_MAP = new Hashtable(); + LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6"); + LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7"); + LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8"); + LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS"); + LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR"); + LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2"); + LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1"); + LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9"); + LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5"); + LOCALE_TO_CHARSET_MAP.put("zh", "GB2312"); + LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5"); + } + + /** + * Get the preferred charset for the given locale. + * + * @param locale the locale + * @return the preferred charset or null if the locale is not + * recognized. + */ + public static String getCharset(Locale locale) { + // try for an full name match (may include country) + String charset = + (String) LOCALE_TO_CHARSET_MAP.get(locale.toString()); + if (charset != null) { + return charset; + } + + // if a full name didn't match, try just the language + charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage()); + return charset; // may be null + } + + } + +} + diff --git a/src/main/java/org/archive/url/URIException.java b/src/main/java/org/archive/url/URIException.java new file mode 100644 index 00000000..49fa2cb5 --- /dev/null +++ b/src/main/java/org/archive/url/URIException.java @@ -0,0 +1,180 @@ +/* + * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/URIException.java,v 1.12 2004/09/30 18:53:20 olegk Exp $ + * $Revision: 480424 $ + * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.url; + +import java.io.IOException; + +/** + * The URI parsing and escape encoding exception. + * + * @author Sung-Gu + * @author Oleg Kalnichevski + * @version $Revision: 480424 $ $Date: 2002/03/14 15:14:01 + */ +public class URIException extends IOException { + + // ----------------------------------------------------------- constructors + + /** + * Default constructor. + */ + public URIException() { + } + + + /** + * The constructor with a reason code argument. + * + * @param reasonCode the reason code + */ + public URIException(int reasonCode) { + this.reasonCode = reasonCode; + } + + + /** + * The constructor with a reason string and its code arguments. + * + * @param reasonCode the reason code + * @param reason the reason + */ + public URIException(int reasonCode, String reason) { + super(reason); // for backward compatibility of Throwable + this.reason = reason; + this.reasonCode = reasonCode; + } + + + /** + * The constructor with a reason string argument. + * + * @param reason the reason + */ + public URIException(String reason) { + super(reason); // for backward compatibility of Throwable + this.reason = reason; + this.reasonCode = UNKNOWN; + } + + // -------------------------------------------------------------- constants + + /** + * No specified reason code. + */ + public static final int UNKNOWN = 0; + + + /** + * The URI parsing error. + */ + public static final int PARSING = 1; + + + /** + * The unsupported character encoding. + */ + public static final int UNSUPPORTED_ENCODING = 2; + + + /** + * The URI escape encoding and decoding error. + */ + public static final int ESCAPING = 3; + + + /** + * The DNS punycode encoding or decoding error. + */ + public static final int PUNYCODE = 4; + + // ------------------------------------------------------------- properties + + /** + * The reason code. + */ + protected int reasonCode; + + + /** + * The reason message. + */ + protected String reason; + + // ---------------------------------------------------------------- methods + + /** + * Get the reason code. + * + * @return the reason code + */ + public int getReasonCode() { + return reasonCode; + } + + /** + * Set the reason code. + * + * @param reasonCode the reason code + * + * @deprecated Callers should set the reason code as a parameter to the + * constructor. + */ + public void setReasonCode(int reasonCode) { + this.reasonCode = reasonCode; + } + + + /** + * Get the reason message. + * + * @return the reason message + * + * @deprecated You should instead call {@link #getMessage()}. + */ + public String getReason() { + return reason; + } + + + /** + * Set the reason message. + * + * @param reason the reason message + * + * @deprecated Callers should instead set this via a parameter to the constructor. + */ + public void setReason(String reason) { + this.reason = reason; + } + + +} + diff --git a/src/main/java/org/archive/url/URLParser.java b/src/main/java/org/archive/url/URLParser.java index 98e4c1aa..bcd0b7fb 100644 --- a/src/main/java/org/archive/url/URLParser.java +++ b/src/main/java/org/archive/url/URLParser.java @@ -30,7 +30,7 @@ public class URLParser { * The numbers in the second line above are only to assist readability; * they indicate the reference points for each subexpression (i.e., each * paired parenthesis). We refer to the value matched for subexpression - * as $. For example, matching the above expression to + * <n> as $<n>. For example, matching the above expression to * * http://www.ics.uci.edu/pub/ietf/uri/#Related * @@ -41,12 +41,12 @@ public class URLParser { * $3 = //www.ics.uci.edu * $4 = www.ics.uci.edu * $5 = /pub/ietf/uri/ - * $6 = - * $7 = + * $6 = <undefined> + * $7 = <undefined> * $8 = #Related * $9 = Related * - * where indicates that the component is not present, as is + * where <undefined> indicates that the component is not present, as is * the case for the query component in the above example. Therefore, we * can determine the value of the four components and fragment as * @@ -226,7 +226,16 @@ public static HandyURL parse(String urlString) throws URISyntaxException { String colonPort = null; int atIndex = uriAuthority.indexOf(COMMERCIAL_AT); - int portColonIndex = uriAuthority.indexOf(COLON,(atIndex<0)?0:atIndex); + int portColonIndex = -1; + int startColonIndex = 0; + if (atIndex > -1) { + startColonIndex = atIndex; + } + if (uriAuthority.charAt(startColonIndex) == '[') { + // IPv6 address + startColonIndex = uriAuthority.indexOf(']', (startColonIndex + 1)); + } + portColonIndex = uriAuthority.indexOf(COLON, startColonIndex); if(atIndex<0 && portColonIndex<0) { // most common case: neither userinfo nor port @@ -246,16 +255,20 @@ public static HandyURL parse(String urlString) throws URISyntaxException { colonPort = uriAuthority.substring(portColonIndex); } if(colonPort != null) { - if(colonPort.startsWith(":")) { - try { - port = Integer.parseInt(colonPort.substring(1)); - } catch(NumberFormatException e) { - throw new URISyntaxException(urlString, "bad port " - + colonPort.substring(1)); - } - } else { - // XXX: what's happened?! - } + if(colonPort.startsWith(":")) { + if (colonPort.length() == 1) { + // a bare colon (http://example.com:/), use default port + } else { + try { + port = Integer.parseInt(colonPort.substring(1)); + } catch(NumberFormatException e) { + throw new URISyntaxException(urlString, "bad port " + + colonPort.substring(1)); + } + } + } else { + // XXX: what's happened?! + } } if(userInfo != null) { int passColonIndex = userInfo.indexOf(COLON); diff --git a/src/main/java/org/archive/url/URLRegexTransformer.java b/src/main/java/org/archive/url/URLRegexTransformer.java index 930f5b34..182eb218 100644 --- a/src/main/java/org/archive/url/URLRegexTransformer.java +++ b/src/main/java/org/archive/url/URLRegexTransformer.java @@ -1,5 +1,6 @@ package org.archive.url; +import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -16,18 +17,18 @@ public class URLRegexTransformer { private static final OptimizedPattern QUERY_OPTS[] = { - new OptimizedPattern("(?i)^(.+)(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2), - new OptimizedPattern("(?i)^(.+)(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2), - new OptimizedPattern("(?i)^(.+)(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2), - new OptimizedPattern("(?i)^(.+)(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2), - new OptimizedPattern("(?i)^(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", "cftoken=", 1, 2), + new OptimizedPattern("(?i)^(.*)(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2), + new OptimizedPattern("(?i)^(.*)(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2), + new OptimizedPattern("(?i)^(.*)(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2), + new OptimizedPattern("(?i)^(.*)(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2), + new OptimizedPattern("(?i)^(.*)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", "cftoken=", 1, 2), }; public static String stripOpts(String orig, OptimizedPattern op[]) { - String origLC = orig.toLowerCase(); + String origLC = orig.toLowerCase(Locale.ROOT); StringBuilder sb = null; int i = 0; int max = op.length; @@ -101,7 +102,7 @@ public static String hostToPublicSuffix(String host) { InternetDomainName idn; try { - idn = InternetDomainName.fromLenient(host); + idn = InternetDomainName.from(host); } catch(IllegalArgumentException e) { return host; } @@ -109,7 +110,7 @@ public static String hostToPublicSuffix(String host) { if(tmp == null) { return host; } - String pubSuff = tmp.name(); + String pubSuff = tmp.toString(); int idx = host.lastIndexOf(".", host.length() - (pubSuff.length()+2)); if(idx == -1) { return host; @@ -121,6 +122,10 @@ public static String hostToSURT(String host) { // TODO: ensure we DONT reverse IP addresses! String parts[] = host.split("\\.",-1); if(parts.length == 1) { + // strip enclosing "[" and "]" from IPv6 hosts + if (host.charAt(0) == '[' && host.charAt(host.length() - 1) == ']') { + return host.substring(1, host.length() - 1); + } return host; } StringBuilder sb = new StringBuilder(host.length()); diff --git a/src/main/java/org/archive/url/UsableURI.java b/src/main/java/org/archive/url/UsableURI.java index b9c4ff9d..b7d0cf71 100644 --- a/src/main/java/org/archive/url/UsableURI.java +++ b/src/main/java/org/archive/url/UsableURI.java @@ -18,6 +18,7 @@ */ package org.archive.url; +import gnu.inet.encoding.IDNA; import java.io.File; import java.io.IOException; import java.io.ObjectOutputStream; @@ -25,14 +26,13 @@ import java.net.URI; import java.net.URISyntaxException; -import org.apache.commons.httpclient.URIException; import org.archive.util.SURT; import org.archive.util.TextUtils; /** * Usable URI. * - * This class wraps {@link org.apache.commons.httpclient.URI} adding caching + * This class wraps {@link org.archive.url.URI} adding caching * and methods. It cannot be instantiated directly. Go via UURIFactory. * *

    We used to use {@link java.net.URI} for parsing URIs but ran across @@ -49,7 +49,7 @@ * @author gojomo * @author stack * - * @see org.apache.commons.httpclient.URI + * @see org.archive.url.URI */ public class UsableURI extends LaxURI implements CharSequence, Serializable { @@ -120,7 +120,6 @@ protected UsableURI() { * @param uri String representation of an absolute URI. * @param escaped If escaped. * @param charset Charset to use. - * @throws org.apache.commons.httpclient.URIException */ protected UsableURI(String uri, boolean escaped, String charset) throws URIException { @@ -131,7 +130,6 @@ protected UsableURI(String uri, boolean escaped, String charset) /** * @param relative String representation of URI. * @param base Parent UURI to use derelativizing. - * @throws org.apache.commons.httpclient.URIException */ protected UsableURI(UsableURI base, UsableURI relative) throws URIException { super(base, relative); @@ -271,6 +269,55 @@ public String toString() { return toCustomString(); } + /** + * In the case of a puny encoded IDN, this method returns the decoded Unicode version. + *

    + * Most of this implementation is copied from {@link org.archive.url.URI#setURI()}. + * + * @return decoded IDN version of URI + */ + public String toUnicodeHostString() { + if (!_is_hostname) { + return toString(); + } + + try { + StringBuilder buf = new StringBuilder(); + + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + if (_userinfo != null) { + buf.append(_userinfo).append('@'); + } + buf.append(IDNA.toUnicode(getHost())); + if (_port >= 0) { + buf.append(':').append(_port); + } + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + return buf.toString(); + } catch (URIException ex) { + throw new RuntimeException(ex); + } + } + public synchronized String getEscapedURI() { if (this.cachedEscapedURI == null) { this.cachedEscapedURI = super.getEscapedURI(); diff --git a/src/main/java/org/archive/url/UsableURIFactory.java b/src/main/java/org/archive/url/UsableURIFactory.java index 9118b850..3038ada5 100644 --- a/src/main/java/org/archive/url/UsableURIFactory.java +++ b/src/main/java/org/archive/url/UsableURIFactory.java @@ -20,17 +20,15 @@ import gnu.inet.encoding.IDNA; import gnu.inet.encoding.IDNAException; -import it.unimi.dsi.lang.MutableString; import java.io.UnsupportedEncodingException; import java.util.BitSet; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.httpclient.URI; -import org.apache.commons.httpclient.URIException; import org.archive.util.TextUtils; /** @@ -49,8 +47,8 @@ * @author stack */ public class UsableURIFactory extends URI { - - private static final long serialVersionUID = -6146295130382209042L; + + private static final long serialVersionUID = 2L; /** * Logging instance. @@ -88,7 +86,7 @@ public class UsableURIFactory extends URI { * The numbers in the second line above are only to assist readability; * they indicate the reference points for each subexpression (i.e., each * paired parenthesis). We refer to the value matched for subexpression - * as $. For example, matching the above expression to + * <n> as $<n>. For example, matching the above expression to * * http://www.ics.uci.edu/pub/ietf/uri/#Related * @@ -99,12 +97,12 @@ public class UsableURIFactory extends URI { * $3 = //www.ics.uci.edu * $4 = www.ics.uci.edu * $5 = /pub/ietf/uri/ - * $6 = - * $7 = + * $6 = <undefined> + * $7 = <undefined> * $8 = #Related * $9 = Related * - * where indicates that the component is not present, as is + * where <undefined> indicates that the component is not present, as is * the case for the query component in the above example. Therefore, we * can determine the value of the four components and fragment as * @@ -340,9 +338,7 @@ protected UsableURI validityCheck(UsableURI uuri) throws URIException { * * @param uri URI as string. * @param base May be null. - * @param e True if the uri is already escaped. * @return A fixed up URI string. - * @throws URIException */ private String fixup(String uri, final URI base, final String charset) throws URIException { @@ -395,9 +391,6 @@ private String fixup(String uri, final URI base, final String charset) } TextUtils.recycleMatcher(matcher); - // now, minimally escape any whitespace - uri = escapeWhitespace(uri); - // For further processing, get uri elements. See the RFC2396REGEX // comment above for explanation of group indices used in the below. // matcher = RFC2396REGEX.matcher(uri); @@ -490,7 +483,7 @@ private String fixup(String uri, final URI base, final String charset) // Preallocate. The '1's and '2's in below are space for ':', // '//', etc. URI characters. - MutableString s = new MutableString( + StringBuilder s = new StringBuilder( ((uriScheme != null)? uriScheme.length(): 0) + 1 // ';' + ((uriAuthority != null)? uriAuthority.length(): 0) @@ -507,8 +500,8 @@ private String fixup(String uri, final URI base, final String charset) /** * If http(s) scheme, check scheme specific part begins '//'. - * @throws URIException - * @see http://www.faqs.org/rfcs/rfc1738.html Section 3.1. Common Internet + * @throws URIException + * @see Section 3.1. Common Internet * Scheme Syntax */ protected void checkHttpSchemeSpecificPartSlashPrefix(final URI base, @@ -617,7 +610,7 @@ private String fixupDomainlabel(String label) throw ue; } } - label = label.toLowerCase(); + label = label.toLowerCase(Locale.ROOT); return label; } @@ -663,51 +656,6 @@ private String ensureMinimalEscaping(String u, final String charset, return u; } - /** - * Escape any whitespace found. - * - * The parent class takes care of the bulk of escaping. But if any - * instance of escaping is found in the URI, then we ask for parent - * to do NO escaping. Here we escape any whitespace found irrespective - * of whether the uri has already been escaped. We do this for - * case where uri has been judged already-escaped only, its been - * incompletly done and whitespace remains. Spaces, etc., in the URI are - * a real pain. Their presence will break log file and ARC parsing. - * @param uri URI string to check. - * @return uri with spaces escaped if any found. - */ - protected String escapeWhitespace(String uri) { - // Just write a new string anyways. The perl '\s' is not - // as inclusive as the Character.isWhitespace so there are - // whitespace characters we could miss. So, rather than - // write some awkward regex, just go through the string - // a character at a time. Only create buffer first time - // we find a space. - MutableString buffer = null; - for (int i = 0; i < uri.length(); i++) { - char c = uri.charAt(i); - if (Character.isWhitespace(c)) { - if (buffer == null) { - buffer = new MutableString(uri.length() + - 2 /*If space, two extra characters (at least)*/); - buffer.append(uri.substring(0, i)); - } - buffer.append("%"); - String hexStr = Integer.toHexString(c); - if ((hexStr.length() % 2) > 0) { - buffer.append("0"); - } - buffer.append(hexStr); - - } else { - if (buffer != null) { - buffer.append(c); - } - } - } - return (buffer != null)? buffer.toString(): uri; - } - /** * Check port on passed http authority. Make sure the size is not larger * than allowed: See the 'port' definition on this @@ -757,7 +705,7 @@ private String checkPort(String uriAuthority) * @param substr Suffix or prefix to use if str is not null. * @param suffix True if substr is a suffix. */ - private void appendNonNull(MutableString b, String str, String substr, + private void appendNonNull(StringBuilder b, String str, String substr, boolean suffix) { if (str != null && str.length() > 0) { if (!suffix) { @@ -808,6 +756,6 @@ private String checkUriElement(String element) { */ private String checkUriElementAndLowerCase(String element) { String tmp = checkUriElement(element); - return (tmp != null)? tmp.toLowerCase(): tmp; + return (tmp != null)? tmp.toLowerCase(Locale.ROOT): tmp; } } diff --git a/src/main/java/org/archive/url/WaybackURLKeyMaker.java b/src/main/java/org/archive/url/WaybackURLKeyMaker.java index 99fb92e9..56f51b49 100644 --- a/src/main/java/org/archive/url/WaybackURLKeyMaker.java +++ b/src/main/java/org/archive/url/WaybackURLKeyMaker.java @@ -5,7 +5,7 @@ public class WaybackURLKeyMaker implements URLKeyMaker { // URLCanonicalizer canonicalizer = new NonMassagingIAURLCanonicalizer(); - URLCanonicalizer canonicalizer = new DefaultIAURLCanonicalizer(); + URLCanonicalizer canonicalizer = new AggressiveIAURLCanonicalizer(); public URLCanonicalizer getCanonicalizer() { return canonicalizer; diff --git a/src/main/java/org/archive/util/ArchiveUtils.java b/src/main/java/org/archive/util/ArchiveUtils.java index c41c0bc0..cce411df 100644 --- a/src/main/java/org/archive/util/ArchiveUtils.java +++ b/src/main/java/org/archive/util/ArchiveUtils.java @@ -49,10 +49,12 @@ import org.archive.format.gzip.GZIPDecoder; import org.archive.format.gzip.GZIPFormatException; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Miscellaneous useful methods. * - * @author gojomo & others + * @author gojomo & others */ public class ArchiveUtils { private static final Logger LOGGER = Logger.getLogger(ArchiveUtils.class.getName()); @@ -104,7 +106,7 @@ public class ArchiveUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } @@ -317,11 +319,6 @@ public static String get12DigitDate(Date d) { /** * A version of getDate which returns the default instead of throwing an exception if parsing fails - * - * @param d - * @param defaultDate - * @return - * @throws ParseException */ public static Date getDate(String d, Date defaultDate) { @@ -337,14 +334,13 @@ public static Date getDate(String d, Date defaultDate) } /** - * Parses an ARC-style date. If passed String is < 12 characters in length, - * we pad. At a minimum, String should contain a year (>=4 characters). + * Parses an ARC-style date. If passed String is < 12 characters in length, + * we pad. At a minimum, String should contain a year (>=4 characters). * Parse will also fail if day or month are incompletely specified. Depends * on the above getXXDigitDate methods. - * @param A 4-17 digit date in ARC style (yyyy to + * @param d A 4-17 digit date in ARC style (yyyy to * yyyyMMddHHmmssSSS) formatting. * @return A Date object representing the passed String. - * @throws ParseException */ public static Date getDate(String d) throws ParseException { Date date = null; @@ -393,9 +389,9 @@ public static Date getDate(String d) throws ParseException { } final static SimpleDateFormat dateToTimestampFormats[] = - {new SimpleDateFormat("MM/dd/yyyy"), - new SimpleDateFormat("MM/yyyy"), - new SimpleDateFormat("yyyy")}; + {new SimpleDateFormat("MM/dd/yyyy", Locale.ENGLISH), + new SimpleDateFormat("MM/yyyy", Locale.ENGLISH), + new SimpleDateFormat("yyyy", Locale.ENGLISH)}; /** * Convert a user-entered date into a timestamp @@ -605,7 +601,7 @@ public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) { /** * Converts a double to a string. * @param val The double to convert - * @param precision How many characters to include after '.' + * @param maxFractionDigits How many characters to include after '.' * @return the double as a string. */ public static String doubleToString(double val, int maxFractionDigits){ @@ -628,7 +624,7 @@ public static String doubleToString(double val, int maxFractionDigits, int minFr * Takes a byte size and formats it for display with 'friendly' units. *

    * This involves converting it to the largest unit - * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. + * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. *

    * Additionally, at least 2 significant digits are always displayed. *

    @@ -807,7 +803,6 @@ public static String prettyString(Object obj) { /** * Provide a improved String of a Map's entries * - * @param Map * @return prettified (in curly brackets) string of Map contents */ public static String prettyString(Map map) { @@ -830,7 +825,6 @@ public static String prettyString(Map map) { /** * Provide a slightly-improved String of Object[] * - * @param Object[] * @return prettified (in square brackets) of Object[] */ public static String prettyString(Object[] array) { @@ -859,7 +853,7 @@ private static String loadVersion() { BufferedReader br = null; String version; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); version = br.readLine(); br.readLine(); } catch (IOException e) { @@ -881,7 +875,7 @@ private static String loadVersion() { br = null; String timestamp; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); timestamp = br.readLine(); } catch (IOException e) { return version; @@ -902,13 +896,13 @@ private static String loadVersion() { TLDS = new HashSet(); InputStream is = ArchiveUtils.class.getResourceAsStream("tlds-alpha-by-domain.txt"); try { - BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, UTF_8)); String line; while((line = reader.readLine())!=null) { if (line.startsWith("#")) { continue; } - TLDS.add(line.trim().toLowerCase()); + TLDS.add(line.trim().toLowerCase(Locale.ROOT)); } } catch (Exception e) { LOGGER.log(Level.SEVERE,"TLD list unavailable",e); @@ -925,7 +919,7 @@ private static String loadVersion() { * @return boolean true if recognized as TLD */ public static boolean isTld(String dom) { - return TLDS.contains(dom.toLowerCase()); + return TLDS.contains(dom.toLowerCase(Locale.ROOT)); } public static void closeQuietly(Object input) { @@ -989,12 +983,12 @@ public static int readFully(InputStream input, byte[] buf) */ public static BufferedReader getBufferedReader(File source) throws IOException { InputStream is = new BufferedInputStream(new FileInputStream(source)); - boolean isGzipped = source.getName().toLowerCase(). + boolean isGzipped = source.getName().toLowerCase(Locale.ROOT). endsWith(GZIP_SUFFIX); if(isGzipped) { is = new GZIPInputStream(is); } - return new BufferedReader(new InputStreamReader(is)); + return new BufferedReader(new InputStreamReader(is, UTF_8)); } /** @@ -1010,8 +1004,8 @@ public static BufferedReader getBufferedReader(URL source) throws IOException { || conn.getContentEncoding() != null && conn.getContentEncoding().equalsIgnoreCase("gzip"); InputStream uis = conn.getInputStream(); return new BufferedReader(isGzipped? - new InputStreamReader(new GZIPInputStream(uis)): - new InputStreamReader(uis)); + new InputStreamReader(new GZIPInputStream(uis), UTF_8): + new InputStreamReader(uis, UTF_8)); } /** diff --git a/src/main/java/org/archive/util/ByteOp.java b/src/main/java/org/archive/util/ByteOp.java index c5a245c9..74a52f15 100755 --- a/src/main/java/org/archive/util/ByteOp.java +++ b/src/main/java/org/archive/util/ByteOp.java @@ -178,7 +178,7 @@ public static byte[] readNBytes(InputStream is, int n) * @return array of bytes read, INCLUDING TRAILING NULL * @throws IOException if the underlying stream throws on, OR if the default * maximum buffer size is reached before a null byte is found - * @throws ShortByteReadException if EOF is encountered before a null byte + * @throws EOFException if EOF is encountered before a null byte */ public static byte[] readToNull(InputStream is) throws IOException { return readToNull(is,MAX_READ_SIZE); @@ -191,7 +191,7 @@ public static byte[] readToNull(InputStream is) throws IOException { * @return array of bytes read, INCLUDING TRAILING NULL * @throws IOException if the underlying stream throws on, OR if the * specified maximum buffer size is reached before a null byte is found - * @throws ShortByteReadException if EOF is encountered before a null byte + * @throws EOFException if EOF is encountered before a null byte */ public static byte[] readToNull(InputStream is, int maxSize) throws IOException { @@ -237,7 +237,7 @@ public static String drawHex(byte[] b, int bytesPerRow) { return drawHex(b,0,b.length,bytesPerRow); } public static String drawHex(byte[] b, int offset, int length, int bytesPerRow) { - int rows = (int) Math.ceil(length / bytesPerRow); + int rows = (int) Math.ceil((double) length / bytesPerRow); if(rows == 0) { rows = 1; } diff --git a/src/main/java/org/archive/util/ChunkedInputStream.java b/src/main/java/org/archive/util/ChunkedInputStream.java new file mode 100644 index 00000000..b6a604c8 --- /dev/null +++ b/src/main/java/org/archive/util/ChunkedInputStream.java @@ -0,0 +1,323 @@ +/* + * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $ + * $Revision: 480424 $ + * $Date: 2006-11-29 06:56:49 +0100 (Wed, 29 Nov 2006) $ + * + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.archive.util; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + + +/** + *

    Transparently coalesces chunks of a HTTP stream that uses + * Transfer-Encoding chunked.

    + * + *

    Note that this class NEVER closes the underlying stream, even when close + * gets called. Instead, it will read until the "end" of its chunking on close, + * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while + * not requiring the client to remember to read the entire contents of the + * response.

    + * + * @author Ortwin Glueck + * @author Sean C. Sullivan + * @author Martin Elwin + * @author Eric Johnson + * @author Mike Bowler + * @author Michael Becke + * @author Oleg Kalnichevski + * + * @since 2.0 + * + */ +class ChunkedInputStream extends InputStream { + /** The inputstream that we're wrapping */ + private InputStream in; + + /** The chunk size */ + private int chunkSize; + + /** The current position within the current chunk */ + private int pos; + + /** True if we'are at the beginning of stream */ + private boolean bof = true; + + /** True if we've reached the end of stream */ + private boolean eof = false; + + /** True if this stream is closed */ + private boolean closed = false; + + /** + * ChunkedInputStream constructor + * + * @param in the raw input stream + * + */ + public ChunkedInputStream(final InputStream in) { + + if (in == null) { + throw new IllegalArgumentException("InputStream parameter may not be null"); + } + this.in = in; + this.pos = 0; + } + + /** + *

    Returns all the data in a chunked stream in coalesced form. A chunk + * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0 + * is detected.

    + * + *

    Trailer headers are read automcatically at the end of the stream and + * can be obtained with the getResponseFooters() method.

    + * + * @return -1 of the end of the stream has been reached or the next data + * byte + * @throws IOException If an IO problem occurs + */ + public int read() throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + pos++; + return in.read(); + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @param off The offset into the byte array at which bytes will start to be + * placed. + * @param len the maximum number of bytes that can be returned. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see InputStream#read(byte[], int, int) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b, int off, int len) throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + len = Math.min(len, chunkSize - pos); + int count = in.read(b, off, len); + pos += count; + return count; + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see InputStream#read(byte[]) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Read the CRLF terminator. + * @throws IOException If an IO error occurs. + */ + private void readCRLF() throws IOException { + int cr = in.read(); + int lf = in.read(); + if ((cr != '\r') || (lf != '\n')) { + throw new IOException( + "CRLF expected at end of chunk: " + cr + "/" + lf); + } + } + + + /** + * Read the next chunk. + * @throws IOException If an IO error occurs. + */ + private void nextChunk() throws IOException { + if (!bof) { + readCRLF(); + } + chunkSize = getChunkSizeFromInputStream(in); + bof = false; + pos = 0; + if (chunkSize == 0) { + eof = true; + parseTrailerHeaders(); + } + } + + /** + * Expects the stream to start with a chunksize in hex with optional + * comments after a semicolon. The line must end with a CRLF: "a3; some + * comment\r\n" Positions the stream at the start of the next line. + * + * @param in The new input stream. + * + * @return the chunk size as integer + * + * @throws IOException when the chunk size could not be parsed + */ + private static int getChunkSizeFromInputStream(final InputStream in) + throws IOException { + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end + int state = 0; + while (state != -1) { + int b = in.read(); + if (b == -1) { + throw new IOException("chunked stream ended unexpectedly"); + } + switch (state) { + case 0: + switch (b) { + case '\r': + state = 1; + break; + case '\"': + state = 2; + /* fall through */ + default: + baos.write(b); + } + break; + + case 1: + if (b == '\n') { + state = -1; + } else { + // this was not CRLF + throw new IOException("Protocol violation: Unexpected" + + " single newline character in chunk size"); + } + break; + + case 2: + switch (b) { + case '\\': + b = in.read(); + baos.write(b); + break; + case '\"': + state = 0; + /* fall through */ + default: + baos.write(b); + } + break; + default: throw new RuntimeException("assertion failed"); + } + } + + //parse data + String dataString = baos.toString(StandardCharsets.US_ASCII.name()); + int separator = dataString.indexOf(';'); + dataString = (separator > 0) + ? dataString.substring(0, separator).trim() + : dataString.trim(); + + int result; + try { + result = Integer.parseInt(dataString.trim(), 16); + } catch (NumberFormatException e) { + throw new IOException ("Bad chunk size: " + dataString); + } + return result; + } + + /** + * Reads and stores the Trailer headers. + * @throws IOException If an IO problem occurs + */ + private void parseTrailerHeaders() throws IOException { + LaxHttpParser.parseHeaders(in, StandardCharsets.US_ASCII.name()); + } + + /** + * Upon close, this reads the remainder of the chunked message, + * leaving the underlying socket at a position to start reading the + * next response without scanning. + * @throws IOException If an IO problem occurs. + */ + public void close() throws IOException { + if (!closed) { + try { + if (!eof) { + exhaustInputStream(this); + } + } finally { + eof = true; + closed = true; + } + } + } + + /** + * Exhaust an input stream, reading until EOF has been encountered. + * + *

    Note that this function is intended as a non-public utility. + * This is a little weird, but it seemed silly to make a utility + * class for this one function, so instead it is just static and + * shared that way.

    + * + * @param inStream The {@link InputStream} to exhaust. + * @throws IOException If an IO problem occurs + */ + static void exhaustInputStream(InputStream inStream) throws IOException { + // read and discard the remainder of the message + byte buffer[] = new byte[1024]; + while (inStream.read(buffer) >= 0) { + ; + } + } +} diff --git a/src/main/java/org/archive/util/DateUtils.java b/src/main/java/org/archive/util/DateUtils.java index e7fe78b7..7d6a7c98 100755 --- a/src/main/java/org/archive/util/DateUtils.java +++ b/src/main/java/org/archive/util/DateUtils.java @@ -65,7 +65,7 @@ public class DateUtils { private static ThreadLocal threadLocalDateFormat(final String pattern) { ThreadLocal tl = new ThreadLocal() { protected SimpleDateFormat initialValue() { - SimpleDateFormat df = new SimpleDateFormat(pattern); + SimpleDateFormat df = new SimpleDateFormat(pattern, Locale.ENGLISH); df.setTimeZone(TimeZone.getTimeZone("GMT")); return df; } @@ -165,7 +165,7 @@ public static String get14DigitDate(long date){ * in the format yyyyMMddHHmmss. * Date stamps are in the UTC time zone * - * @param date Date for timestamp + * @param d Date for timestamp * @return the date stamp */ public static String get14DigitDate(Date d) { @@ -203,7 +203,7 @@ public static String get12DigitDate(long date){ * in the format yyyyMMddHHmm. * Date stamps are in the UTC time zone * - * @param date Date object to format + * @param d Date object to format * @return the date stamp */ public static String get12DigitDate(Date d) { @@ -301,11 +301,11 @@ public static String getLog14Date(Date date){ */ /** - * Parses an ARC-style date. If passed String is < 12 characters in length, - * we pad. At a minimum, String should contain a year (>=4 characters). + * Parses an ARC-style date. If passed String is < 12 characters in length, + * we pad. At a minimum, String should contain a year (>=4 characters). * Parse will also fail if day or month are incompletely specified. Depends * on the above getXXDigitDate methods. - * @param A 4-17 digit date in ARC style (yyyy to + * @param d A 4-17 digit date in ARC style (yyyy to * yyyyMMddHHmmssSSS) formatting. * @return A Date object representing the passed String. * @throws ParseException @@ -539,7 +539,7 @@ public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) { /** * Converts a double to a string. * @param val The double to convert - * @param precision How many characters to include after '.' + * @param maxFractionDigits How many characters to include after '.' * @return the double as a string. */ public static String doubleToString(double val, int maxFractionDigits){ @@ -557,7 +557,7 @@ private static String doubleToString(double val, int maxFractionDigits, int minF * Takes a byte size and formats it for display with 'friendly' units. *

    * This involves converting it to the largest unit - * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. + * (of B, KiB, MiB, GiB, TiB) for which the amount will be > 1. *

    * Additionally, at least 2 significant digits are always displayed. *

    diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java index d630a0b1..7ee4b13a 100644 --- a/src/main/java/org/archive/util/DevUtils.java +++ b/src/main/java/org/archive/util/DevUtils.java @@ -25,6 +25,7 @@ import java.io.StringWriter; import java.util.logging.Logger; +import static java.nio.charset.StandardCharsets.UTF_8; /** * Write a message and stack trace to the 'org.archive.util.DevUtils' logger. @@ -78,15 +79,6 @@ public static String extraInfo() { return sw.toString(); } - /** - * Nothing to see here, move along. - * @deprecated This method was never used. - */ - @Deprecated - public static void betterPrintStack(RuntimeException re) { - re.printStackTrace(System.err); - } - /** * Send this JVM process a SIGQUIT; giving a thread dump and possibly * a heap histogram (if using -XX:+PrintClassHistogram). @@ -101,7 +93,7 @@ public static void sigquitSelf() { Process p = Runtime.getRuntime().exec( new String[] {"perl", "-e", "print getppid(). \"\n\";"}); BufferedReader br = - new BufferedReader(new InputStreamReader(p.getInputStream())); + new BufferedReader(new InputStreamReader(p.getInputStream(), UTF_8)); String ppid = br.readLine(); Runtime.getRuntime().exec( new String[] {"sh", "-c", "kill -3 "+ppid}).waitFor(); diff --git a/src/main/java/org/archive/util/FileNameSpec.java b/src/main/java/org/archive/util/FileNameSpec.java index a3312cfc..7ace8b59 100644 --- a/src/main/java/org/archive/util/FileNameSpec.java +++ b/src/main/java/org/archive/util/FileNameSpec.java @@ -1,5 +1,6 @@ package org.archive.util; +import java.util.Locale; import java.util.concurrent.atomic.AtomicInteger; public class FileNameSpec { @@ -15,7 +16,7 @@ public FileNameSpec(String prefix, String suffix) { public String getNextName() { StringBuilder sb = new StringBuilder(); sb.append(prefix); - sb.append(String.format("%06d",aInt.incrementAndGet())); + sb.append(String.format(Locale.ROOT, "%06d",aInt.incrementAndGet())); sb.append(suffix); return sb.toString(); } diff --git a/src/main/java/org/archive/util/FileUtils.java b/src/main/java/org/archive/util/FileUtils.java index 3de276a9..271d0212 100644 --- a/src/main/java/org/archive/util/FileUtils.java +++ b/src/main/java/org/archive/util/FileUtils.java @@ -32,6 +32,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; @@ -39,13 +40,13 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.io.filefilter.IOFileFilter; -import org.apache.commons.lang.math.LongRange; +import org.apache.commons.lang3.LongRange; /** Utility methods for manipulating files and directories. * - * @contributor John Erik Halse - * @contributor gojomo + * @author John Erik Halse + * @author gojomo */ public class FileUtils { private static final Logger LOGGER = @@ -219,8 +220,8 @@ protected static void workaroundCopyFile(final File src, FileFilter prefixFilter = new FileFilter() { public boolean accept(File pathname) { - return pathname.getName().toLowerCase(). - startsWith(prefix.toLowerCase()); + return pathname.getName().toLowerCase(Locale.ROOT). + startsWith(prefix.toLowerCase(Locale.ROOT)); } }; return dir.listFiles(prefixFilter); @@ -283,7 +284,7 @@ public static boolean isReadableWithExtensionAndMagic(final File f, throws IOException { boolean result = false; FileUtils.assertReadable(f); - if(f.getName().toLowerCase().endsWith(uncompressedExtension)) { + if(f.getName().toLowerCase(Locale.ROOT).endsWith(uncompressedExtension)) { FileInputStream fis = new FileInputStream(f); try { byte [] b = new byte[magic.length()]; @@ -384,7 +385,7 @@ public static boolean moveAsideIfExists(File file) throws IOException { * want this number of lines ending with a line containing * the position; if positive, want this number of lines, * all starting at or after position. - * @param lines List to insert found lines + * @param lines list to insert found lines * @param lineEstimate int estimate of line size, 0 means use default * of 128 * @return LongRange indicating the file offsets corresponding to @@ -392,7 +393,6 @@ public static boolean moveAsideIfExists(File file) throws IOException { * after the end of the last line returned * @throws IOException */ - @SuppressWarnings("unchecked") public static LongRange pagedLines(File file, long position, int signedDesiredLineCount, List lines, int lineEstimate) throws IOException { @@ -424,12 +424,12 @@ public static LongRange pagedLines(File file, long position, } // read that reasonable chunk - FileInputStream fis = new FileInputStream(file); - fis.getChannel().position(startPosition); byte[] buf = new byte[bufferSize]; - ArchiveUtils.readFully(fis, buf); - IOUtils.closeQuietly(fis); - + try (FileInputStream fis = new FileInputStream(file)) { + fis.getChannel().position(startPosition); + ArchiveUtils.readFully(fis, buf); + } + // find all line starts fully in buffer // (positions after a line-end, per line-end definition in // BufferedReader.readLine) @@ -473,7 +473,7 @@ public static LongRange pagedLines(File file, long position, if(signedDesiredLineCount>0) { if(startPosition+bufferSize == fileEnd) { // nothing more to read: return nothing - return new LongRange(fileEnd,fileEnd); + return LongRange.of(fileEnd,fileEnd); } else { // retry with larger lineEstimate return pagedLines(file, position, signedDesiredLineCount, lines, Math.max(bufferSize,lineEstimate)); @@ -501,7 +501,7 @@ public static LongRange pagedLines(File file, long position, } int firstLine = lineStarts.getFirst(); int partialLine = lineStarts.getLast(); - LongRange range = new LongRange(startPosition + firstLine, startPosition + partialLine); + LongRange range = LongRange.of(startPosition + firstLine, startPosition + partialLine); List foundLines = IOUtils.readLines(new ByteArrayInputStream(buf,firstLine,partialLine-firstLine)); @@ -510,7 +510,7 @@ public static LongRange pagedLines(File file, long position, range = expandRange( range, pagedLines(file, - range.getMinimumLong()-1, + range.getMinimum()-1, signedDesiredLineCount+foundFullLines, lines, bufferSize/foundFullLines)); @@ -519,7 +519,7 @@ public static LongRange pagedLines(File file, long position, lines.addAll(foundLines); - if(signedDesiredLineCount < 0 && range.getMaximumLong() < position) { + if(signedDesiredLineCount < 0 && range.getMaximum() < position) { // did not get line containining start position range = expandRange( range, @@ -530,12 +530,12 @@ public static LongRange pagedLines(File file, long position, bufferSize/foundFullLines)); } - if(signedDesiredLineCount > 0 && foundFullLines < desiredLineCount && range.getMaximumLong() < fileEnd) { + if(signedDesiredLineCount > 0 && foundFullLines < desiredLineCount && range.getMaximum() < fileEnd) { // need more forward lines range = expandRange( range, pagedLines(file, - range.getMaximumLong(), + range.getMaximum(), desiredLineCount - foundFullLines, lines, bufferSize/foundFullLines)); @@ -545,8 +545,8 @@ public static LongRange pagedLines(File file, long position, } public static LongRange expandRange(LongRange range1, LongRange range2) { - return new LongRange(Math.min(range1.getMinimumLong(), range2.getMinimumLong()), - Math.max(range1.getMaximumLong(), range2.getMaximumLong())); + return LongRange.of(Math.min(range1.getMinimum(), range2.getMinimum()), + Math.max(range1.getMaximum(), range2.getMaximum())); } @@ -700,13 +700,12 @@ public static File tryToCanonicalize(File file) { public static void appendTo(File fileToAppendTo, File fileToAppendFrom) throws IOException { // optimal io block size according to http://lingrok.org/xref/coreutils/src/ioblksize.h byte[] buf = new byte[65536]; - FileOutputStream out = new FileOutputStream(fileToAppendTo, true); - FileInputStream in = new FileInputStream(fileToAppendFrom); - for (int n = in.read(buf); n > 0; n = in.read(buf)) { - out.write(buf, 0, n); - } - in.close(); - out.flush(); - out.close(); + try (FileInputStream in = new FileInputStream(fileToAppendFrom); + FileOutputStream out = new FileOutputStream(fileToAppendTo, true)) { + for (int n = in.read(buf); n > 0; n = in.read(buf)) { + out.write(buf, 0, n); + } + out.flush(); + } } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/util/Grep.java b/src/main/java/org/archive/util/Grep.java index e446e47e..892429bd 100644 --- a/src/main/java/org/archive/util/Grep.java +++ b/src/main/java/org/archive/util/Grep.java @@ -1,10 +1,13 @@ package org.archive.util; +import static java.nio.charset.StandardCharsets.UTF_8; + import java.io.BufferedReader; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; +import java.nio.charset.Charset; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; @@ -119,14 +122,14 @@ protected void doTheGrepThing() throws Exception { if (files != null) { if (files.size() == 1) { - grep(new BufferedReader(new FileReader(files.get(0))), ""); + grep(new BufferedReader(new InputStreamReader(new FileInputStream(files.get(0)), UTF_8)), ""); } else { for (String path : files) { - grep(new BufferedReader(new FileReader(path)), path + ": "); + grep(new BufferedReader(new InputStreamReader(new FileInputStream(path), UTF_8)), path + ": "); } } } else { - grep(new BufferedReader(new InputStreamReader(System.in)), ""); + grep(new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset())), ""); } } diff --git a/src/main/java/org/archive/util/HMACSigner.java b/src/main/java/org/archive/util/HMACSigner.java index d7a5208e..b502b4fb 100644 --- a/src/main/java/org/archive/util/HMACSigner.java +++ b/src/main/java/org/archive/util/HMACSigner.java @@ -1,5 +1,7 @@ package org.archive.util; +import java.nio.charset.StandardCharsets; + /** * Generate an HMAC key given a secret sig, key name and optional id and an expiration time * @@ -63,11 +65,11 @@ public static String hmacDigest(String msg, String keyString, String algo) { String digest = null; try { SecretKeySpec key = new SecretKeySpec( - (keyString).getBytes("UTF-8"), algo); + (keyString).getBytes(StandardCharsets.UTF_8), algo); Mac mac = Mac.getInstance(algo); mac.init(key); - byte[] bytes = mac.doFinal(msg.getBytes("ASCII")); + byte[] bytes = mac.doFinal(msg.getBytes(StandardCharsets.US_ASCII)); StringBuilder hash = new StringBuilder(); diff --git a/src/main/java/org/archive/util/IAUtils.java b/src/main/java/org/archive/util/IAUtils.java index ed563d02..334a31b4 100644 --- a/src/main/java/org/archive/util/IAUtils.java +++ b/src/main/java/org/archive/util/IAUtils.java @@ -24,17 +24,27 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; +import java.util.Properties; + +import static java.nio.charset.StandardCharsets.UTF_8; /** * Miscellaneous useful methods. * - * @author gojomo & others + * @author gojomo & others */ public class IAUtils { - public final static Charset UTF8 = Charset.forName("utf-8"); + public final static Charset UTF8 = UTF_8; final public static String COMMONS_VERSION = loadCommonsVersion(); + final public static String PUBLISHER = loadCommons("publisher"); + final public static String OPERATOR = loadCommons("operator"); + final public static String WAT_WARCINFO_DESCRIPTION = loadCommons("wat.warcinfo.description"); + final public static String WARC_FORMAT = loadCommons("warc.format"); + final public static String WARC_FORMAT_CONFORMS_TO = loadCommons("warc.format.conforms.to"); public static String loadCommonsVersion() { InputStream input = IAUtils.class.getResourceAsStream( @@ -45,7 +55,7 @@ public static String loadCommonsVersion() { BufferedReader br = null; String version; try { - br = new BufferedReader(new InputStreamReader(input)); + br = new BufferedReader(new InputStreamReader(input, UTF_8)); version = br.readLine(); br.readLine(); } catch (IOException e) { @@ -57,6 +67,27 @@ public static String loadCommonsVersion() { return version.trim(); } + public static String loadCommons(String id) { + InputStream input = IAUtils.class.getResourceAsStream("/org/archive/commons.properties"); + Reader reader = null; + if (input == null) { + return "UNKNOWN"; + } + reader = new InputStreamReader(input, UTF_8); + Properties prop = new Properties(); + try { + prop.load(reader); + } catch (IOException e1) { + return "UNKNOWN"; + } + if (prop.getProperty(id) != null) { + return prop.getProperty(id); + } else { + return "UNKNOWN"; + } + + } + public static void closeQuietly(Object input) { if(input == null || ! (input instanceof Closeable)) { return; diff --git a/src/main/java/org/archive/util/IterableLineIterator.java b/src/main/java/org/archive/util/IterableLineIterator.java index 6e0d9dc8..c9010031 100644 --- a/src/main/java/org/archive/util/IterableLineIterator.java +++ b/src/main/java/org/archive/util/IterableLineIterator.java @@ -9,7 +9,7 @@ * A LineIterator that also implements Iterable, so that it can be used with * the java enhanced for-each loop syntax. * - * @contributor nlevitt + * @author nlevitt */ public class IterableLineIterator extends LineIterator implements Iterable { @@ -19,7 +19,6 @@ public IterableLineIterator(final Reader reader) super(reader); } - @SuppressWarnings("unchecked") public Iterator iterator() { return this; } diff --git a/src/main/java/org/archive/util/LaxHttpParser.java b/src/main/java/org/archive/util/LaxHttpParser.java index c1f768f0..434522c8 100644 --- a/src/main/java/org/archive/util/LaxHttpParser.java +++ b/src/main/java/org/archive/util/LaxHttpParser.java @@ -35,13 +35,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.logging.Logger; -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.util.EncodingUtil; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.archive.format.http.HttpHeader; /** * A Modified version of HttpParser which doesn't throw exceptions on bad header lines @@ -57,7 +56,7 @@ public class LaxHttpParser { /** Log object for this class. */ - private static final Log LOG = LogFactory.getLog(LaxHttpParser.class); + private static final Logger LOG = Logger.getLogger(LaxHttpParser.class.getName()); /** * Constructor for LaxHttpParser. @@ -66,7 +65,7 @@ protected LaxHttpParser() { } /** * Return byte array from an (unchunked) input stream. - * Stop reading when "\n" terminator encountered + * Stop reading when "\n" terminator encountered * If the stream ends before the line terminator is found, * the last part of the string will still be returned. * If no input data available, null is returned. @@ -77,7 +76,7 @@ protected LaxHttpParser() { } * @return a byte array from the stream */ public static byte[] readRawLine(InputStream inputStream) throws IOException { - LOG.trace("enter LaxHttpParser.readRawLine()"); + LOG.finest("enter LaxHttpParser.readRawLine()"); ByteArrayOutputStream buf = new ByteArrayOutputStream(); int ch; @@ -94,7 +93,7 @@ public static byte[] readRawLine(InputStream inputStream) throws IOException { } /** - * Read up to "\n" from an (unchunked) input stream. + * Read up to "\n" from an (unchunked) input stream. * If the stream ends before the line terminator is found, * the last part of the string will still be returned. * If no input data available, null is returned. @@ -108,7 +107,7 @@ public static byte[] readRawLine(InputStream inputStream) throws IOException { * @since 3.0 */ public static String readLine(InputStream inputStream, String charset) throws IOException { - LOG.trace("enter LaxHttpParser.readLine(InputStream, String)"); + LOG.finest("enter LaxHttpParser.readLine(InputStream, String)"); byte[] rawdata = readRawLine(inputStream); if (rawdata == null) { return null; @@ -126,11 +125,15 @@ public static String readLine(InputStream inputStream, String charset) throws IO } } } - return EncodingUtil.getString(rawdata, 0, len - offset, charset); + try { + return new String(rawdata, 0, len - offset, charset); + } catch (UnsupportedEncodingException e) { + return new String(rawdata, 0, len - offset, StandardCharsets.ISO_8859_1); + } } /** - * Read up to "\n" from an (unchunked) input stream. + * Read up to "\n" from an (unchunked) input stream. * If the stream ends before the line terminator is found, * the last part of the string will still be returned. * If no input data available, null is returned @@ -144,8 +147,8 @@ public static String readLine(InputStream inputStream, String charset) throws IO */ public static String readLine(InputStream inputStream) throws IOException { - LOG.trace("enter LaxHttpParser.readLine(InputStream)"); - return readLine(inputStream, "US-ASCII"); + LOG.finest("enter LaxHttpParser.readLine(InputStream)"); + return readLine(inputStream, StandardCharsets.US_ASCII.name()); } /** @@ -158,14 +161,13 @@ public static String readLine(InputStream inputStream) throws IOException { * @return an array of headers in the order in which they were parsed * * @throws IOException if an IO error occurs while reading from the stream - * @throws HttpException if there is an error parsing a header value - * + * * @since 3.0 */ - public static Header[] parseHeaders(InputStream is, String charset) throws IOException, HttpException { - LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)"); + public static HttpHeader[] parseHeaders(InputStream is, String charset) throws IOException { + LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)"); - ArrayList

    headers = new ArrayList
    (); + ArrayList headers = new ArrayList<>(); String name = null; StringBuffer value = null; for (; ;) { @@ -188,7 +190,7 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc } else { // make sure we save the previous name,value pair if present if (name != null) { - headers.add(new Header(name, value.toString())); + headers.add(new HttpHeader(name, value.toString())); } // Otherwise we should have normal HTTP header line @@ -216,10 +218,10 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc // make sure we save the last name,value pair if present if (name != null) { - headers.add(new Header(name, value.toString())); + headers.add(new HttpHeader(name, value.toString())); } - return (Header[]) headers.toArray(new Header[headers.size()]); + return headers.toArray(new HttpHeader[0]); } /** @@ -231,12 +233,11 @@ public static Header[] parseHeaders(InputStream is, String charset) throws IOExc * @return an array of headers in the order in which they were parsed * * @throws IOException if an IO error occurs while reading from the stream - * @throws HttpException if there is an error parsing a header value - * + * * @deprecated use #parseHeaders(InputStream, String) */ - public static Header[] parseHeaders(InputStream is) throws IOException, HttpException { - LOG.trace("enter HeaderParser.parseHeaders(InputStream, String)"); - return parseHeaders(is, "US-ASCII"); + public static HttpHeader[] parseHeaders(InputStream is) throws IOException { + LOG.finest("enter HeaderParser.parseHeaders(InputStream, String)"); + return parseHeaders(is, StandardCharsets.US_ASCII.name()); } } diff --git a/src/main/java/org/archive/util/ProcessUtils.java b/src/main/java/org/archive/util/ProcessUtils.java index af792981..0a3eeb67 100644 --- a/src/main/java/org/archive/util/ProcessUtils.java +++ b/src/main/java/org/archive/util/ProcessUtils.java @@ -26,6 +26,8 @@ import java.util.logging.Level; import java.util.logging.Logger; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Class to run an external process. * @author stack @@ -55,7 +57,7 @@ protected StreamGobbler(InputStream is, String name) { public void run() { try { BufferedReader br = - new BufferedReader(new InputStreamReader(this.is)); + new BufferedReader(new InputStreamReader(this.is, UTF_8)); for (String line = null; (line = br.readLine()) != null;) { this.sink.append(line); } diff --git a/src/main/java/org/archive/util/PropertyUtils.java b/src/main/java/org/archive/util/PropertyUtils.java index 083615f6..659b2820 100644 --- a/src/main/java/org/archive/util/PropertyUtils.java +++ b/src/main/java/org/archive/util/PropertyUtils.java @@ -21,13 +21,13 @@ import java.util.Properties; import java.util.regex.Matcher; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; /** * Utilities for dealing with Java Properties (incl. System Properties) * - * @contributor stack - * @contributor gojomo + * @author stack + * @author gojomo * @version $Date$ $Revision$ */ public class PropertyUtils { @@ -67,7 +67,6 @@ public static int getIntProperty(final String key, final int fallback) { * the expression is replaced with the empty-string. * * @param original String - * @param properties Properties to try in order; first value found (if any) is used * @return modified String */ public static String interpolateWithProperties(String original) { diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index 425344bb..9f10ec92 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -25,25 +25,24 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; +import java.util.Locale; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.DeflaterInputStream; import java.util.zip.GZIPInputStream; -import org.apache.commons.httpclient.ChunkedInputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.archive.io.GenericReplayCharSequence; import org.archive.io.RecordingInputStream; import org.archive.io.RecordingOutputStream; import org.archive.io.ReplayCharSequence; import org.archive.io.ReplayInputStream; -import com.google.common.base.Charsets; - /** * Pairs together a RecordingInputStream and RecordingOutputStream @@ -96,7 +95,7 @@ public class Recorder { * (current behavior is for consistency with our prior but perhaps not * optimal behavior) */ - protected Charset charset = Charsets.UTF_8; + protected Charset charset = StandardCharsets.UTF_8; /** whether recording-input (ris) message-body is chunked */ protected boolean inputIsChunked = false; @@ -310,8 +309,7 @@ public static Recorder getHttpRecorder() { } /** - * @param characterEncoding Character encoding of input recording. - * @return actual charset in use after attempt to set + * @param cs Character encoding of input recording. */ public void setCharset(Charset cs) { this.charset = cs; @@ -324,9 +322,6 @@ public Charset getCharset() { return this.charset; } - /** - * @param characterEncoding Character encoding of input recording. - */ public void setInputIsChunked(boolean chunked) { this.inputIsChunked = chunked; } @@ -343,8 +338,8 @@ public void setInputIsChunked(boolean chunked) { * @param contentEncoding declared content-encoding of input recording. */ public void setContentEncoding(String contentEncoding) { - String lowerCoding = contentEncoding.toLowerCase(); - if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase())) { + String lowerCoding = contentEncoding.toLowerCase(Locale.ROOT); + if(!SUPPORTED_ENCODINGS.contains(contentEncoding.toLowerCase(Locale.ROOT))) { throw new IllegalArgumentException("contentEncoding unsupported: "+contentEncoding); } this.contentEncoding = lowerCoding; @@ -356,16 +351,6 @@ public void setContentEncoding(String contentEncoding) { public String getContentEncoding() { return this.contentEncoding; } - - - /** - * @return - * @throws IOException - * @deprecated use getContentReplayCharSequence - */ - public ReplayCharSequence getReplayCharSequence() throws IOException { - return getContentReplayCharSequence(); - } /** * @return A ReplayCharSequence. Caller may call @@ -374,7 +359,7 @@ public ReplayCharSequence getReplayCharSequence() throws IOException { * processing has finished; in that context it's preferable not * to close, so that processors can reuse the same instance. * @throws IOException - * @see {@link #endReplays()} + * @see #endReplays() */ public ReplayCharSequence getContentReplayCharSequence() throws IOException { if (replayCharSequence == null || !replayCharSequence.isOpen() @@ -390,7 +375,7 @@ public ReplayCharSequence getContentReplayCharSequence() throws IOException { /** - * @param characterEncoding Encoding of recorded stream. + * @param requestedCharset Encoding of recorded stream. * @return A ReplayCharSequence Will return null if an IOException. Call * close on returned RCS when done. * @throws IOException diff --git a/src/main/java/org/archive/util/Reporter.java b/src/main/java/org/archive/util/Reporter.java index 2fcb8cd8..8da38afa 100644 --- a/src/main/java/org/archive/util/Reporter.java +++ b/src/main/java/org/archive/util/Reporter.java @@ -31,15 +31,13 @@ public interface Reporter { * @param writer to receive report */ public void reportTo(PrintWriter writer) throws IOException; - + /** - * Write a short single-line summary report - * - * @param writer to receive report + * Write a short single-line summary report + * + * @param pw writer to receive report */ - @Deprecated public void shortReportLineTo(PrintWriter pw) throws IOException; - /** * @return Same data that's in the single line report, as key-value pairs diff --git a/src/main/java/org/archive/util/SURT.java b/src/main/java/org/archive/util/SURT.java index 69daf247..99347e9f 100644 --- a/src/main/java/org/archive/util/SURT.java +++ b/src/main/java/org/archive/util/SURT.java @@ -27,11 +27,14 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.PrintStream; +import java.nio.charset.Charset; import java.util.regex.Matcher; -import org.apache.commons.httpclient.URIException; +import org.archive.url.URIException; import org.archive.url.UsableURIFactory; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Sort-friendly URI Reordering Transform. * @@ -238,10 +241,10 @@ public static void main(String[] args) throws IOException { InputStream in = args.length > 0 ? new BufferedInputStream( new FileInputStream(args[0])) : System.in; PrintStream out = args.length > 1 ? new PrintStream( - new BufferedOutputStream(new FileOutputStream(args[1]))) + new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name()) : System.out; BufferedReader br = - new BufferedReader(new InputStreamReader(in)); + new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())); String line; while((line = br.readLine())!=null) { if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#")); diff --git a/src/main/java/org/archive/util/StreamCopy.java b/src/main/java/org/archive/util/StreamCopy.java index 52523edc..53a1b812 100755 --- a/src/main/java/org/archive/util/StreamCopy.java +++ b/src/main/java/org/archive/util/StreamCopy.java @@ -27,7 +27,7 @@ public static long copy(InputStream i, OutputStream o, int bytes) throws IOExcep } public static long copyLength(InputStream i, OutputStream o, long bytes) throws IOException { - return copyLength(i,o,DEFAULT_READ_SIZE); + return copyLength(i,o,bytes,DEFAULT_READ_SIZE); } public static long copyLength(InputStream i, OutputStream o, long bytes, int readSize) throws IOException { diff --git a/src/main/java/org/archive/util/SurtPrefixSet.java b/src/main/java/org/archive/util/SurtPrefixSet.java index b0da4321..b2f0ea4f 100644 --- a/src/main/java/org/archive/util/SurtPrefixSet.java +++ b/src/main/java/org/archive/util/SurtPrefixSet.java @@ -31,11 +31,14 @@ import java.io.PrintStream; import java.io.Reader; import java.util.Iterator; +import java.util.Locale; import org.archive.url.UsableURI; import org.archive.util.iterator.LineReadingIterator; import org.archive.util.iterator.RegexLineIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + /** * Specialized TreeSet for keeping a set of String prefixes. * @@ -56,7 +59,6 @@ public class SurtPrefixSet extends PrefixSet { * with redundant entries removed. * * @param r reader over file of SURT_format strings - * @throws IOException */ public void importFrom(Reader r) { BufferedReader reader = new BufferedReader(r); @@ -71,7 +73,7 @@ public void importFrom(Reader r) { while (iter.hasNext()) { s = (String) iter.next(); - add(s.toLowerCase()); + add(s.toLowerCase(Locale.ROOT)); } } @@ -146,7 +148,7 @@ public boolean considerAsAddDirective(String suri) { } if(u.indexOf("(")>0) { // formal SURT prefix; toLowerCase just in case - add(u.toLowerCase()); + add(u.toLowerCase(Locale.ROOT)); } else { // hostname/normal form URI from which // to deduce SURT prefix @@ -236,7 +238,7 @@ public static String asPrefix(String s) { * Calculate the SURT form URI to use as a candidate against prefixes * from the given Object (CandidateURI or UURI) * - * @param object CandidateURI or UURI + * @param u CandidateURI or UURI * @return SURT form of URI for evaluation, or null if unavailable */ public static String getCandidateSurt(UsableURI u) { @@ -343,10 +345,10 @@ public static void main(String[] args) throws IOException { InputStream in = args.length > 0 ? new BufferedInputStream( new FileInputStream(args[0])) : System.in; PrintStream out = args.length > 1 ? new PrintStream( - new BufferedOutputStream(new FileOutputStream(args[1]))) + new BufferedOutputStream(new FileOutputStream(args[1])), false, UTF_8.name()) : System.out; BufferedReader br = - new BufferedReader(new InputStreamReader(in)); + new BufferedReader(new InputStreamReader(in, UTF_8.name())); String line; while((line = br.readLine())!=null) { if(line.indexOf("#")>0) line=line.substring(0,line.indexOf("#")); diff --git a/src/main/java/org/archive/util/TextUtils.java b/src/main/java/org/archive/util/TextUtils.java index 707f93c7..627d411a 100644 --- a/src/main/java/org/archive/util/TextUtils.java +++ b/src/main/java/org/archive/util/TextUtils.java @@ -30,14 +30,16 @@ import java.net.URLEncoder; import java.util.HashMap; import java.util.Map; -import java.util.concurrent.ConcurrentMap; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang3.StringEscapeUtils; -import com.google.common.base.Function; -import com.google.common.collect.MapMaker; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; + +import static java.nio.charset.StandardCharsets.UTF_8; public class TextUtils { private static final String FIRSTWORD = "^([^\\s]*).*$"; @@ -51,11 +53,11 @@ protected Map initialValue() { }; /** global soft-cache of Patterns, by string key */ - private static final ConcurrentMap PATTERNS = new MapMaker() + private static final LoadingCache PATTERNS = CacheBuilder.newBuilder() .concurrencyLevel(16) .softValues() - .makeComputingMap(new Function() { - public Pattern apply(String regex) { + .build(new CacheLoader() { + public Pattern load(String regex) { return Pattern.compile(regex); } }); @@ -84,7 +86,7 @@ public static Matcher getMatcher(String pattern, CharSequence input) { final Map matchers = TL_MATCHER_MAP.get(); Matcher m = (Matcher)matchers.get(pattern); if(m == null) { - m = PATTERNS.get(pattern).matcher(input); + m = PATTERNS.getUnchecked(pattern).matcher(input); } else { matchers.put(pattern,null); m.reset(input); @@ -197,7 +199,7 @@ public static String getFirstWord(String s) { * @return The same string escaped. */ public static String escapeForHTMLJavascript(String s) { - return escapeForHTML(StringEscapeUtils.escapeJavaScript(s)); + return escapeForHTML(StringEscapeUtils.escapeEcmaScript(s)); } /** @@ -229,7 +231,7 @@ public static String escapeForHTML(String s) { * escaping it for HTML display, without constructing another large String * of the whole content. * @param s String to write - * @param out destination JspWriter + * @param w destination Writer * @throws IOException */ public static void writeEscapedForHTML(String s, Writer w) @@ -238,7 +240,7 @@ public static void writeEscapedForHTML(String s, Writer w) BufferedReader reader = new BufferedReader(new StringReader(s)); String line; while((line=reader.readLine()) != null){ - out.println(StringEscapeUtils.escapeHtml(line)); + out.println(StringEscapeUtils.escapeHtml3(line)); } } @@ -252,7 +254,7 @@ public static CharSequence unescapeHtml(final CharSequence cs) { return cs; } - return StringEscapeUtils.unescapeHtml(cs.toString()); + return StringEscapeUtils.unescapeHtml4(cs.toString()); } /** @@ -278,14 +280,11 @@ public static String exceptionToString(String message, Throwable e) { * @param s String to escape * @return URL-escaped string */ - @SuppressWarnings("deprecation") public static String urlEscape(String s) { try { - return URLEncoder.encode(s,"UTF8"); + return URLEncoder.encode(s, UTF_8.name()); } catch (UnsupportedEncodingException e) { - // should be impossible; all JVMs must support UTF8 - // but have a fallback just in case - return URLEncoder.encode(s); + return s; } } @@ -295,14 +294,11 @@ public static String urlEscape(String s) { * @param s String do unescape * @return URL-unescaped String */ - @SuppressWarnings("deprecation") public static String urlUnescape(String s) { try { - return URLDecoder.decode(s, "UTF8"); + return URLDecoder.decode(s, UTF_8.name()); } catch (UnsupportedEncodingException e) { - // should be impossible; all JVMs must support UTF8 - // but have a fallback just in case - return URLDecoder.decode(s); + return s; } } } \ No newline at end of file diff --git a/src/main/java/org/archive/util/anvl/ANVLRecord.java b/src/main/java/org/archive/util/anvl/ANVLRecord.java index de2d3101..e548f432 100644 --- a/src/main/java/org/archive/util/anvl/ANVLRecord.java +++ b/src/main/java/org/archive/util/anvl/ANVLRecord.java @@ -72,11 +72,6 @@ public ANVLRecord(Collection c) { super(c); } - /** @deprecated */ - public ANVLRecord(int initialCapacity) { - super(); - } - public boolean addLabel(final String l) { return super.add(new Element(new Label(l))); } @@ -190,11 +185,11 @@ public static ANVLRecord load(final InputStream is) return load(new String(baos.toByteArray(), UTF8)); } - /** + /** * Parse passed String for an ANVL Record. * Looked at writing javacc grammer but preprocessing is required to * handle folding: See - * https://javacc.dev.java.net/servlets/BrowseList?list=users&by=thread&from=56173. + * here. * Looked at Terence Parr's ANTLR. More capable. Can set lookahead count. * A value of 3 would help with folding. But its a pain defining UNICODE * grammers -- needed by ANVL -- and support seems incomplete @@ -202,7 +197,7 @@ public static ANVLRecord load(final InputStream is) * For now, go with the below hand-rolled parser. * @param s String with an ANVLRecord. * @return ANVLRecord parsed from passed String. - * @throws IOException + * @throws IOException */ public static ANVLRecord load(final String s) throws IOException { diff --git a/src/main/java/org/archive/util/anvl/package.html b/src/main/java/org/archive/util/anvl/package.html index 4a2a8963..f5164631 100644 --- a/src/main/java/org/archive/util/anvl/package.html +++ b/src/main/java/org/archive/util/anvl/package.html @@ -37,6 +37,5 @@

    Implementation Details

    of combinations of atom, quoted-string, and specials tokens, or else consisting of texts> -

    diff --git a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java index de57278e..17d411fa 100644 --- a/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/AbstractSeekableLineReader.java @@ -7,13 +7,14 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.ByteStreams; public abstract class AbstractSeekableLineReader implements SeekableLineReader { - public final static Charset UTF8 = Charset.forName("UTF-8"); + public final static Charset UTF8 = StandardCharsets.UTF_8; protected int blockSize = 128 * 1024; diff --git a/src/main/java/org/archive/util/binsearch/ByteBufferInputStream.java b/src/main/java/org/archive/util/binsearch/ByteBufferInputStream.java index f5f75fa4..35cb0ee8 100644 --- a/src/main/java/org/archive/util/binsearch/ByteBufferInputStream.java +++ b/src/main/java/org/archive/util/binsearch/ByteBufferInputStream.java @@ -37,7 +37,7 @@ *

    Java's {@linkplain FileChannel#map(MapMode, long, long) memory-mapping facilities} have * the severe limitation of mapping at most {@link Integer#MAX_VALUE} bytes, as they * expose the content of a file using a {@link MappedByteBuffer}. This class can {@linkplain #map(FileChannel, FileChannel.MapMode) expose - * a file of arbitrary length} as a {@linkplain RepositionableStream repositionable} {@link MeasurableInputStream} + * a file of arbitrary length} as a repositionable input stream * that is actually based on an array of {@link MappedByteBuffer}s, each mapping * a chunk of {@link #CHUNK_SIZE} bytes. * diff --git a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java index 76b7b2b9..45c2ee04 100644 --- a/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java +++ b/src/main/java/org/archive/util/binsearch/SeekCDXBenchmarker.java @@ -3,6 +3,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.Charset; import org.archive.url.WaybackURLKeyMaker; import org.archive.util.binsearch.impl.MappedSeekableLineReaderFactory; @@ -52,7 +53,7 @@ public static void main(String[] args) throws IOException { SortedTextFile sorted = new SortedTextFile(factory); sorted.setBinsearchBlockSize(blocksize); - BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); + BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, Charset.defaultCharset())); WaybackURLKeyMaker keymaker = new WaybackURLKeyMaker(true); diff --git a/src/main/java/org/archive/util/binsearch/SortedTextFile.java b/src/main/java/org/archive/util/binsearch/SortedTextFile.java index ab8118b7..bb4a1f66 100644 --- a/src/main/java/org/archive/util/binsearch/SortedTextFile.java +++ b/src/main/java/org/archive/util/binsearch/SortedTextFile.java @@ -2,12 +2,15 @@ import java.io.IOException; import java.util.Comparator; +import java.util.Locale; import java.util.logging.Level; import java.util.logging.Logger; import org.archive.util.GeneralURIStreamFactory; import org.archive.util.iterator.CloseableIterator; +import static java.nio.charset.StandardCharsets.UTF_8; + public class SortedTextFile { public static class NumericComparator implements Comparator @@ -142,14 +145,14 @@ public long binaryFindOffset(SeekableLineReader slr, final String key, Comparato if (comparator.compare(key, line) > 0) { if(LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Search(%d) (%s)/(%s) : After", + LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : After", mid * blockSize, key,line)); } min = mid; } else { if(LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Search(%d) (%s)/(%s) : Before", + LOGGER.fine(String.format(Locale.ROOT, "Search(%d) (%s)/(%s) : Before", mid * blockSize, key,line)); } max = mid; @@ -370,7 +373,7 @@ private long searchOffset(SeekableLineReader slr, String prev = null; while(true) { if (line != null) { - offset += line.getBytes().length + 1; + offset += line.getBytes(UTF_8).length + 1; } line = slr.readLine(); if(line == null) break; @@ -379,7 +382,7 @@ private long searchOffset(SeekableLineReader slr, } if (lessThan && prev != null) { - offset -= prev.getBytes().length + 1; + offset -= prev.getBytes(UTF_8).length + 1; } return offset; @@ -391,7 +394,7 @@ private CloseableIterator search(SeekableLineReader slr, long min = binaryFindOffset(slr, key, comparator); if (LOGGER.isLoggable(Level.FINE)) { - LOGGER.fine(String.format("Aligning(%d)",min)); + LOGGER.fine(String.format(Locale.ROOT, "Aligning(%d)",min)); } slr.seek(min); diff --git a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java index 621c6bce..93757a45 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/HDFSSeekableLineReader.java @@ -6,7 +6,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class HDFSSeekableLineReader extends AbstractSeekableLineReader { private FSDataInputStream fsdis; @@ -23,7 +23,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { fsdis.seek(offset); if (maxLength >= 0) { - return new LimitInputStream(fsdis, maxLength); + return ByteStreams.limit(fsdis, maxLength); } else { return fsdis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java index b4a23db0..69189862 100644 --- a/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java +++ b/src/main/java/org/archive/util/binsearch/impl/HTTPSeekableLineReaderFactory.java @@ -3,7 +3,6 @@ import java.io.IOException; import org.archive.util.binsearch.SeekableLineReaderFactory; -import org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory; import org.archive.util.binsearch.impl.http.ApacheHttp43SLRFactory; import org.archive.util.binsearch.impl.http.HTTPURLConnSLRFactory; @@ -20,14 +19,13 @@ protected HTTPSeekableLineReaderFactory() public enum HttpLibs { - APACHE_31, APACHE_43, URLCONN, } public static HTTPSeekableLineReaderFactory getHttpFactory() { - return getHttpFactory(HttpLibs.APACHE_31); + return getHttpFactory(HttpLibs.APACHE_43); } public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type) @@ -37,7 +35,7 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type) public static HTTPSeekableLineReaderFactory getHttpFactory(String defaultURL) { - return getHttpFactory(HttpLibs.APACHE_31, defaultURL); + return getHttpFactory(HttpLibs.APACHE_43, defaultURL); } public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String defaultURL) @@ -45,10 +43,6 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String HTTPSeekableLineReaderFactory factory = null; switch (type) { - case APACHE_31: - factory = new ApacheHttp31SLRFactory(); - break; - case URLCONN: factory = new HTTPURLConnSLRFactory(); break; @@ -59,7 +53,7 @@ public static HTTPSeekableLineReaderFactory getHttpFactory(HttpLibs type, String } if (factory == null) { - factory = new ApacheHttp31SLRFactory(); + factory = new ApacheHttp43SLRFactory(); } factory.defaultURL = defaultURL; diff --git a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java index b211db16..5131dd06 100644 --- a/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java +++ b/src/main/java/org/archive/util/binsearch/impl/RandomAccessFileSeekableLineReader.java @@ -7,7 +7,7 @@ import org.archive.util.binsearch.AbstractSeekableLineReader; -import com.google.common.io.LimitInputStream; +import com.google.common.io.ByteStreams; public class RandomAccessFileSeekableLineReader extends AbstractSeekableLineReader { @@ -24,7 +24,7 @@ public InputStream doSeekLoad(long offset, int maxLength) throws IOException { FileInputStream fis = new FileInputStream(raf.getFD()); if (maxLength > 0) { - return new LimitInputStream(fis, maxLength); + return ByteStreams.limit(fis, maxLength); } else { return fis; } diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java deleted file mode 100644 index c4fdbba8..00000000 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLR.java +++ /dev/null @@ -1,231 +0,0 @@ -package org.archive.util.binsearch.impl.http; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URISyntaxException; - -import org.apache.commons.httpclient.Header; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.cookie.CookiePolicy; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.HeadMethod; -import org.apache.commons.io.input.CountingInputStream; -import org.archive.util.binsearch.impl.HTTPSeekableLineReader; - -public class ApacheHttp31SLR extends HTTPSeekableLineReader { - - private HttpClient http; - private String url; - private long length = -1; - - protected CountingInputStream cin; - - private GetMethod activeMethod; - - public ApacheHttp31SLR(HttpClient http, String url) { - this.http = http; - this.url = url; - } - - private void acquireLength() throws URISyntaxException, HttpException, IOException { - HttpMethod head = new HeadMethod(url); - int code = http.executeMethod(head); - if(code != 200) { - throw new IOException("Unable to retrieve from " + url); - } - Header lengthHeader = head.getResponseHeader(CONTENT_LENGTH); - if(lengthHeader == null) { - throw new IOException("No Content-Length header for " + url); - } - String val = lengthHeader.getValue(); - try { - length = Long.parseLong(val); - } catch(NumberFormatException e) { - throw new IOException("Bad Content-Length value " +url+ ": " + val); - } - } - - protected String getHeader(String header) throws URISyntaxException, HttpException, IOException { - HttpMethod head = new HeadMethod(url); - int code = http.executeMethod(head); - if(code != 200) { - throw new IOException("Unable to retrieve from " + url); - } - Header theHeader = head.getResponseHeader(header); - if(theHeader == null) { - throw new IOException("No " + header + " header for " + url); - } - String val = theHeader.getValue(); - return val; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getUrl() - */ - @Override - public String getUrl() - { - return url; - } - -// public void seek(long offset, boolean gzip) throws IOException { -// is = doSeekLoad(offset, -1); -// -// if (gzip) { -// is = new GZIPMembersInputStream(is, blockSize); -// } -// } - -// public void seekWithMaxRead(long offset, boolean gzip, int maxLength) throws IOException { -// is = doSeekLoad(offset, maxLength); -// -// if (bufferFully && (maxLength > 0) && (maxLength < 1e10)) { -// try { -// byte[] buffer = new byte[maxLength]; -// ByteStreams.readFully(is, buffer); -// is.close(); -// -// // Create new stream -// is = new ByteArrayInputStream(buffer); -// } finally { -// activeMethod.releaseConnection(); -// activeMethod = null; -// } -// } -// -// if (gzip) { -// is = new GZIPMembersInputStream(is, blockSize); -// } -// } - - protected InputStream doSeekLoad(long offset, int maxLength) throws IOException { - if (activeMethod != null) { - doClose(); - } - - br = null; - - try { - - activeMethod = new GetMethod(url); - - String rangeHeader = makeRangeHeader(offset, maxLength); - - if (rangeHeader != null) { - activeMethod.setRequestHeader("Range", rangeHeader); - } - - if (this.isNoKeepAlive()) { - activeMethod.setRequestHeader("Connection", "close"); - } - - if (this.getCookie() != null) { - activeMethod.getParams().setCookiePolicy(CookiePolicy.IGNORE_COOKIES); - activeMethod.setRequestHeader("Cookie", this.getCookie()); - } - - int code = http.executeMethod(activeMethod); - - connectedUrl = activeMethod.getURI().toString(); - - if ((code != 206) && (code != 200)) { - throw new BadHttpStatusException(code, connectedUrl + " " + rangeHeader); - } - - InputStream is = activeMethod.getResponseBodyAsStream(); - cin = new CountingInputStream(is); - return cin; - - } catch (IOException io) { - if (saveErrHeader != null) { - errHeader = getHeaderValue(saveErrHeader); - } - - connectedUrl = activeMethod.getURI().toString(); - doClose(); - throw io; - } - } - - public GetMethod getHttpMethod() - { - return activeMethod; - } - - public void doClose() throws IOException { - - if (activeMethod == null) { - return; - } - - try { - long contentLength = activeMethod.getResponseContentLength(); - - long bytesRead = (cin != null ? cin.getByteCount() : 0); - - // If fully read, close gracefully, otherwise abort - if ((contentLength > 0) && (contentLength == bytesRead)) { -// try { -// cin.close(); -// } catch (IOException e) { -// activeMethod.abort(); -// } - } else { - activeMethod.abort(); - } - - activeMethod.releaseConnection(); - activeMethod = null; - - } finally { - if (activeMethod != null) { - activeMethod.abort(); - activeMethod.releaseConnection(); - activeMethod = null; - } - } - - cin = null; - is = null; - br = null; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getSize() - */ - @Override - public long getSize() throws IOException { - if (length < 0) { - try { - if (activeMethod != null) { - length = activeMethod.getResponseContentLength(); - } else { - acquireLength(); - } - } catch (URISyntaxException e) { - throw new IOException(e); - } - } - return length; - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReader#getHeaderValue(java.lang.String) - */ - @Override - public String getHeaderValue(String headerName) { - if (activeMethod == null) { - return null; - } - - Header header = activeMethod.getResponseHeader(headerName); - - if (header == null) { - return null; - } - - return header.getValue(); - } -} diff --git a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java b/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java deleted file mode 100644 index 9bd7542b..00000000 --- a/src/main/java/org/archive/util/binsearch/impl/http/ApacheHttp31SLRFactory.java +++ /dev/null @@ -1,186 +0,0 @@ -package org.archive.util.binsearch.impl.http; - -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.logging.Logger; - -import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; -import org.apache.commons.httpclient.HostConfiguration; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpConnectionManager; -import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; -import org.apache.commons.httpclient.params.HttpClientParams; -import org.archive.util.binsearch.impl.HTTPSeekableLineReader; -import org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory; - -public class ApacheHttp31SLRFactory extends HTTPSeekableLineReaderFactory { - private final static Logger LOGGER = Logger.getLogger(ApacheHttp31SLRFactory.class.getName()); - - private HttpConnectionManager connectionManager = null; - private HostConfiguration hostConfiguration = null; - private HttpClient http = null; - - public ApacheHttp31SLRFactory(String uriString) { - this(); - } - - public ApacheHttp31SLRFactory() { - connectionManager = new MultiThreadedHttpConnectionManager(); - //connectionManager = new ThreadLocalHttpConnectionManager(); - hostConfiguration = new HostConfiguration(); - HttpClientParams params = new HttpClientParams(); - http = new HttpClient(params,connectionManager); - http.setHostConfiguration(hostConfiguration); - } - - public void close() throws IOException - { - //connectionManager.deleteClosedConnections(); - connectionManager.closeIdleConnections(0); - } - - @Override - public ApacheHttp31SLR get(String url) throws IOException { - -// if (LOGGER.isLoggable(Level.FINEST)) { -// LOGGER.finest("Connections: " + connectionManager.getConnectionsInPool(hostConfiguration)); -// } - - return new ApacheHttp31SLR(http, url); - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setProxyHostPort(java.lang.String) - */ - @Override - public void setProxyHostPort(String hostPort) { - int colonIdx = hostPort.indexOf(':'); - if(colonIdx > 0) { - String host = hostPort.substring(0,colonIdx); - int port = Integer.valueOf(hostPort.substring(colonIdx+1)); - -// http.getHostConfiguration().setProxy(host, port); - hostConfiguration.setProxy(host, port); - } - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setMaxTotalConnections(int) - */ - @Override - public void setMaxTotalConnections(int maxTotalConnections) { - connectionManager.getParams(). - setMaxTotalConnections(maxTotalConnections); - } - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getMaxTotalConnections() - */ - @Override - public int getMaxTotalConnections() { - return connectionManager.getParams().getMaxTotalConnections(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setMaxHostConnections(int) - */ - @Override - public void setMaxHostConnections(int maxHostConnections) { - connectionManager.getParams().setDefaultMaxConnectionsPerHost(maxHostConnections); - connectionManager.getParams().setMaxConnectionsPerHost(hostConfiguration, maxHostConnections); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getMaxHostConnections() - */ - @Override - public int getMaxHostConnections() { - return connectionManager.getParams(). - getMaxConnectionsPerHost(hostConfiguration); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getConnectionTimeoutMS() - */ - @Override - public int getConnectionTimeoutMS() { - return connectionManager.getParams().getConnectionTimeout(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setConnectionTimeoutMS(int) - */ - @Override - public void setConnectionTimeoutMS(int connectionTimeoutMS) { - connectionManager.getParams().setConnectionTimeout(connectionTimeoutMS); - http.getParams().setConnectionManagerTimeout(connectionTimeoutMS); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getSocketTimeoutMS() - */ - @Override - public int getSocketTimeoutMS() { - return connectionManager.getParams().getSoTimeout(); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setSocketTimeoutMS(int) - */ - @Override - public void setSocketTimeoutMS(int socketTimeoutMS) { - connectionManager.getParams().setSoTimeout(socketTimeoutMS); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#setStaleChecking(boolean) - */ - @Override - public void setStaleChecking(boolean enabled) - { - connectionManager.getParams().setStaleCheckingEnabled(enabled); - } - - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#isStaleChecking() - */ - @Override - public boolean isStaleChecking() - { - return connectionManager.getParams().isStaleCheckingEnabled(); - } - - // Experimental - /* (non-Javadoc) - * @see org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory#getModTime() - */ - @Override - public long getModTime() - { - HTTPSeekableLineReader reader = null; - SimpleDateFormat lastModFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz"); - - try { - reader = get(); - String result = reader.getHeaderValue(HTTPSeekableLineReader.LAST_MODIFIED); - Date date = lastModFormat.parse(result); - return date.getTime(); - - } catch (Exception e) { - e.printStackTrace(); - } finally { - if (reader != null) { - try { - reader.close(); - } catch (IOException e) { - - } - } - } - - return 0; - } - - @Override - public void setNumRetries(int numRetries) { - http.getParams().setParameter(HttpClientParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(numRetries, true)); - } -} diff --git a/src/main/java/org/archive/util/iterator/CloseableIteratorWrapper.java b/src/main/java/org/archive/util/iterator/CloseableIteratorWrapper.java index f35c85e5..d29d2ce2 100644 --- a/src/main/java/org/archive/util/iterator/CloseableIteratorWrapper.java +++ b/src/main/java/org/archive/util/iterator/CloseableIteratorWrapper.java @@ -4,7 +4,7 @@ import java.util.Iterator; /** - * Wrap a regular Iterator to create a CloseableIterator where the close() is a no-op + * Wrap a regular Iterator<S> to create a CloseableIterator<S> where the close() is a no-op * @author ilya * * @param diff --git a/src/main/java/org/archive/util/zip/GZIPMembersInputStream.java b/src/main/java/org/archive/util/zip/GZIPMembersInputStream.java index 1838f4b9..82476b6a 100644 --- a/src/main/java/org/archive/util/zip/GZIPMembersInputStream.java +++ b/src/main/java/org/archive/util/zip/GZIPMembersInputStream.java @@ -61,7 +61,7 @@ * position. Calling nextMember() after receiving an EOF will allow reading * to proceed into the next member (if any). * - * @contributor gojomo + * @author gojomo */ public class GZIPMembersInputStream extends OpenJDK7GZIPInputStream { protected long memberNumber = 0; @@ -273,7 +273,6 @@ public Inflater getInflater() { * stream from next() should finish (reaching EOF) before the iterator's * hasNext() or next() is called. * - * @return Iterator of * @deprecated for backward compatibility; better to use direct facilities in future */ public Iterator memberIterator() { diff --git a/src/main/java/org/archive/util/zip/OpenJDK7GZIPInputStream.java b/src/main/java/org/archive/util/zip/OpenJDK7GZIPInputStream.java index 044597f0..3c010c39 100644 --- a/src/main/java/org/archive/util/zip/OpenJDK7GZIPInputStream.java +++ b/src/main/java/org/archive/util/zip/OpenJDK7GZIPInputStream.java @@ -41,7 +41,7 @@ * This class implements a stream filter for reading compressed data in * the GZIP file format. * - * @see InflaterInputStream + * @see OpenJDK7InflaterInputStream * @author David Connelly * */ @@ -76,7 +76,7 @@ private void ensureOpen() throws IOException { * @exception ZipException if a GZIP format error has occurred or the * compression method used is unsupported * @exception IOException if an I/O error has occurred - * @exception IllegalArgumentException if size is <= 0 + * @exception IllegalArgumentException if size is <= 0 */ public OpenJDK7GZIPInputStream(InputStream in, int size) throws IOException { super(in, new Inflater(true), size); diff --git a/src/main/java/org/archive/util/zip/OpenJDK7InflaterInputStream.java b/src/main/java/org/archive/util/zip/OpenJDK7InflaterInputStream.java index 5f4c5122..11d458e1 100644 --- a/src/main/java/org/archive/util/zip/OpenJDK7InflaterInputStream.java +++ b/src/main/java/org/archive/util/zip/OpenJDK7InflaterInputStream.java @@ -79,7 +79,7 @@ private void ensureOpen() throws IOException { * @param in the input stream * @param inf the decompressor ("inflater") * @param size the input buffer size - * @exception IllegalArgumentException if size is <= 0 + * @exception IllegalArgumentException if size is <= 0 */ public OpenJDK7InflaterInputStream(InputStream in, Inflater inf, int size) { super(in); @@ -195,7 +195,7 @@ public int available() throws IOException { * @param n the number of bytes to skip * @return the actual number of bytes skipped. * @exception IOException if an I/O error has occurred - * @exception IllegalArgumentException if n < 0 + * @exception IllegalArgumentException if n < 0 */ public long skip(long n) throws IOException { if (n < 0) { diff --git a/src/main/resources/effective_tld_names.dat b/src/main/resources/effective_tld_names.dat deleted file mode 100644 index 7c4a0860..00000000 --- a/src/main/resources/effective_tld_names.dat +++ /dev/null @@ -1,7045 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// ===BEGIN ICANN DOMAINS=== - -// ac : http://en.wikipedia.org/wiki/.ac -ac -com.ac -edu.ac -gov.ac -net.ac -mil.ac -org.ac - -// ad : http://en.wikipedia.org/wiki/.ad -ad -nom.ad - -// ae : http://en.wikipedia.org/wiki/.ae -// see also: "Domain Name Eligibility Policy" at http://www.aeda.ae/eng/aepolicy.php -ae -co.ae -net.ae -org.ae -sch.ae -ac.ae -gov.ae -mil.ae - -// aero : see http://www.information.aero/index.php?id=66 -aero -accident-investigation.aero -accident-prevention.aero -aerobatic.aero -aeroclub.aero -aerodrome.aero -agents.aero -aircraft.aero -airline.aero -airport.aero -air-surveillance.aero -airtraffic.aero -air-traffic-control.aero -ambulance.aero -amusement.aero -association.aero -author.aero -ballooning.aero -broker.aero -caa.aero -cargo.aero -catering.aero -certification.aero -championship.aero -charter.aero -civilaviation.aero -club.aero -conference.aero -consultant.aero -consulting.aero -control.aero -council.aero -crew.aero -design.aero -dgca.aero -educator.aero -emergency.aero -engine.aero -engineer.aero -entertainment.aero -equipment.aero -exchange.aero -express.aero -federation.aero -flight.aero -freight.aero -fuel.aero -gliding.aero -government.aero -groundhandling.aero -group.aero -hanggliding.aero -homebuilt.aero -insurance.aero -journal.aero -journalist.aero -leasing.aero -logistics.aero -magazine.aero -maintenance.aero -marketplace.aero -media.aero -microlight.aero -modelling.aero -navigation.aero -parachuting.aero -paragliding.aero -passenger-association.aero -pilot.aero -press.aero -production.aero -recreation.aero -repbody.aero -res.aero -research.aero -rotorcraft.aero -safety.aero -scientist.aero -services.aero -show.aero -skydiving.aero -software.aero -student.aero -taxi.aero -trader.aero -trading.aero -trainer.aero -union.aero -workinggroup.aero -works.aero - -// af : http://www.nic.af/help.jsp -af -gov.af -com.af -org.af -net.af -edu.af - -// ag : http://www.nic.ag/prices.htm -ag -com.ag -org.ag -net.ag -co.ag -nom.ag - -// ai : http://nic.com.ai/ -ai -off.ai -com.ai -net.ai -org.ai - -// al : http://www.ert.gov.al/ert_alb/faq_det.html?Id=31 -al -com.al -edu.al -gov.al -mil.al -net.al -org.al - -// am : http://en.wikipedia.org/wiki/.am -am - -// an : http://www.una.an/an_domreg/default.asp -an -com.an -net.an -org.an -edu.an - -// ao : http://en.wikipedia.org/wiki/.ao -// http://www.dns.ao/REGISTR.DOC -ao -ed.ao -gv.ao -og.ao -co.ao -pb.ao -it.ao - -// aq : http://en.wikipedia.org/wiki/.aq -aq - -// ar : http://en.wikipedia.org/wiki/.ar -*.ar -!congresodelalengua3.ar -!educ.ar -!gobiernoelectronico.ar -!mecon.ar -!nacion.ar -!nic.ar -!promocion.ar -!retina.ar -!uba.ar - -// arpa : http://en.wikipedia.org/wiki/.arpa -// Confirmed by registry 2008-06-18 -e164.arpa -in-addr.arpa -ip6.arpa -iris.arpa -uri.arpa -urn.arpa - -// as : http://en.wikipedia.org/wiki/.as -as -gov.as - -// asia : http://en.wikipedia.org/wiki/.asia -asia - -// at : http://en.wikipedia.org/wiki/.at -// Confirmed by registry 2008-06-17 -at -ac.at -co.at -gv.at -or.at - -// au : http://en.wikipedia.org/wiki/.au -// http://www.auda.org.au/ -// 2LDs -com.au -net.au -org.au -edu.au -gov.au -asn.au -id.au -csiro.au -// Historic 2LDs (closed to new registration, but sites still exist) -info.au -conf.au -oz.au -// CGDNs - http://www.cgdn.org.au/ -act.au -nsw.au -nt.au -qld.au -sa.au -tas.au -vic.au -wa.au -// 3LDs -act.edu.au -nsw.edu.au -nt.edu.au -qld.edu.au -sa.edu.au -tas.edu.au -vic.edu.au -wa.edu.au -act.gov.au -// Removed at request of Shae.Donelan@services.nsw.gov.au, 2010-03-04 -// nsw.gov.au -nt.gov.au -qld.gov.au -sa.gov.au -tas.gov.au -vic.gov.au -wa.gov.au - -// aw : http://en.wikipedia.org/wiki/.aw -aw -com.aw - -// ax : http://en.wikipedia.org/wiki/.ax -ax - -// az : http://en.wikipedia.org/wiki/.az -az -com.az -net.az -int.az -gov.az -org.az -edu.az -info.az -pp.az -mil.az -name.az -pro.az -biz.az - -// ba : http://en.wikipedia.org/wiki/.ba -ba -org.ba -net.ba -edu.ba -gov.ba -mil.ba -unsa.ba -unbi.ba -co.ba -com.ba -rs.ba - -// bb : http://en.wikipedia.org/wiki/.bb -bb -biz.bb -com.bb -edu.bb -gov.bb -info.bb -net.bb -org.bb -store.bb - -// bd : http://en.wikipedia.org/wiki/.bd -*.bd - -// be : http://en.wikipedia.org/wiki/.be -// Confirmed by registry 2008-06-08 -be -ac.be - -// bf : http://en.wikipedia.org/wiki/.bf -bf -gov.bf - -// bg : http://en.wikipedia.org/wiki/.bg -// https://www.register.bg/user/static/rules/en/index.html -bg -a.bg -b.bg -c.bg -d.bg -e.bg -f.bg -g.bg -h.bg -i.bg -j.bg -k.bg -l.bg -m.bg -n.bg -o.bg -p.bg -q.bg -r.bg -s.bg -t.bg -u.bg -v.bg -w.bg -x.bg -y.bg -z.bg -0.bg -1.bg -2.bg -3.bg -4.bg -5.bg -6.bg -7.bg -8.bg -9.bg - -// bh : http://en.wikipedia.org/wiki/.bh -bh -com.bh -edu.bh -net.bh -org.bh -gov.bh - -// bi : http://en.wikipedia.org/wiki/.bi -// http://whois.nic.bi/ -bi -co.bi -com.bi -edu.bi -or.bi -org.bi - -// biz : http://en.wikipedia.org/wiki/.biz -biz - -// bj : http://en.wikipedia.org/wiki/.bj -bj -asso.bj -barreau.bj -gouv.bj - -// bm : http://www.bermudanic.bm/dnr-text.txt -bm -com.bm -edu.bm -gov.bm -net.bm -org.bm - -// bn : http://en.wikipedia.org/wiki/.bn -*.bn - -// bo : http://www.nic.bo/ -bo -com.bo -edu.bo -gov.bo -gob.bo -int.bo -org.bo -net.bo -mil.bo -tv.bo - -// br : http://registro.br/dominio/dpn.html -// Updated by registry 2011-03-01 -br -adm.br -adv.br -agr.br -am.br -arq.br -art.br -ato.br -b.br -bio.br -blog.br -bmd.br -cim.br -cng.br -cnt.br -com.br -coop.br -ecn.br -eco.br -edu.br -emp.br -eng.br -esp.br -etc.br -eti.br -far.br -flog.br -fm.br -fnd.br -fot.br -fst.br -g12.br -ggf.br -gov.br -imb.br -ind.br -inf.br -jor.br -jus.br -leg.br -lel.br -mat.br -med.br -mil.br -mus.br -net.br -nom.br -not.br -ntr.br -odo.br -org.br -ppg.br -pro.br -psc.br -psi.br -qsl.br -radio.br -rec.br -slg.br -srv.br -taxi.br -teo.br -tmp.br -trd.br -tur.br -tv.br -vet.br -vlog.br -wiki.br -zlg.br - -// bs : http://www.nic.bs/rules.html -bs -com.bs -net.bs -org.bs -edu.bs -gov.bs - -// bt : http://en.wikipedia.org/wiki/.bt -bt -com.bt -edu.bt -gov.bt -net.bt -org.bt - -// bv : No registrations at this time. -// Submitted by registry 2006-06-16 - -// bw : http://en.wikipedia.org/wiki/.bw -// http://www.gobin.info/domainname/bw.doc -// list of other 2nd level tlds ? -bw -co.bw -org.bw - -// by : http://en.wikipedia.org/wiki/.by -// http://tld.by/rules_2006_en.html -// list of other 2nd level tlds ? -by -gov.by -mil.by -// Official information does not indicate that com.by is a reserved -// second-level domain, but it's being used as one (see www.google.com.by and -// www.yahoo.com.by, for example), so we list it here for safety's sake. -com.by - -// http://hoster.by/ -of.by - -// bz : http://en.wikipedia.org/wiki/.bz -// http://www.belizenic.bz/ -bz -com.bz -net.bz -org.bz -edu.bz -gov.bz - -// ca : http://en.wikipedia.org/wiki/.ca -ca -// ca geographical names -ab.ca -bc.ca -mb.ca -nb.ca -nf.ca -nl.ca -ns.ca -nt.ca -nu.ca -on.ca -pe.ca -qc.ca -sk.ca -yk.ca -// gc.ca: http://en.wikipedia.org/wiki/.gc.ca -// see also: http://registry.gc.ca/en/SubdomainFAQ -gc.ca - -// cat : http://en.wikipedia.org/wiki/.cat -cat - -// cc : http://en.wikipedia.org/wiki/.cc -cc - -// cd : http://en.wikipedia.org/wiki/.cd -// see also: https://www.nic.cd/domain/insertDomain_2.jsp?act=1 -cd -gov.cd - -// cf : http://en.wikipedia.org/wiki/.cf -cf - -// cg : http://en.wikipedia.org/wiki/.cg -cg - -// ch : http://en.wikipedia.org/wiki/.ch -ch - -// ci : http://en.wikipedia.org/wiki/.ci -// http://www.nic.ci/index.php?page=charte -ci -org.ci -or.ci -com.ci -co.ci -edu.ci -ed.ci -ac.ci -net.ci -go.ci -asso.ci -aéroport.ci -int.ci -presse.ci -md.ci -gouv.ci - -// ck : http://en.wikipedia.org/wiki/.ck -*.ck -!www.ck - -// cl : http://en.wikipedia.org/wiki/.cl -cl -gov.cl -gob.cl -co.cl -mil.cl - -// cm : http://en.wikipedia.org/wiki/.cm -cm -gov.cm - -// cn : http://en.wikipedia.org/wiki/.cn -// Submitted by registry 2008-06-11 -cn -ac.cn -com.cn -edu.cn -gov.cn -net.cn -org.cn -mil.cn -公司.cn -网络.cn -網絡.cn -// cn geographic names -ah.cn -bj.cn -cq.cn -fj.cn -gd.cn -gs.cn -gz.cn -gx.cn -ha.cn -hb.cn -he.cn -hi.cn -hl.cn -hn.cn -jl.cn -js.cn -jx.cn -ln.cn -nm.cn -nx.cn -qh.cn -sc.cn -sd.cn -sh.cn -sn.cn -sx.cn -tj.cn -xj.cn -xz.cn -yn.cn -zj.cn -hk.cn -mo.cn -tw.cn - -// co : http://en.wikipedia.org/wiki/.co -// Submitted by registry 2008-06-11 -co -arts.co -com.co -edu.co -firm.co -gov.co -info.co -int.co -mil.co -net.co -nom.co -org.co -rec.co -web.co - -// com : http://en.wikipedia.org/wiki/.com -com - -// coop : http://en.wikipedia.org/wiki/.coop -coop - -// cr : http://www.nic.cr/niccr_publico/showRegistroDominiosScreen.do -cr -ac.cr -co.cr -ed.cr -fi.cr -go.cr -or.cr -sa.cr - -// cu : http://en.wikipedia.org/wiki/.cu -cu -com.cu -edu.cu -org.cu -net.cu -gov.cu -inf.cu - -// cv : http://en.wikipedia.org/wiki/.cv -cv - -// cw : http://www.una.cw/cw_registry/ -// Confirmed by registry 2013-03-26 -cw -com.cw -edu.cw -net.cw -org.cw - -// cx : http://en.wikipedia.org/wiki/.cx -// list of other 2nd level tlds ? -cx -gov.cx - -// cy : http://en.wikipedia.org/wiki/.cy -*.cy - -// cz : http://en.wikipedia.org/wiki/.cz -cz - -// de : http://en.wikipedia.org/wiki/.de -// Confirmed by registry (with technical -// reservations) 2008-07-01 -de - -// dj : http://en.wikipedia.org/wiki/.dj -dj - -// dk : http://en.wikipedia.org/wiki/.dk -// Confirmed by registry 2008-06-17 -dk - -// dm : http://en.wikipedia.org/wiki/.dm -dm -com.dm -net.dm -org.dm -edu.dm -gov.dm - -// do : http://en.wikipedia.org/wiki/.do -do -art.do -com.do -edu.do -gob.do -gov.do -mil.do -net.do -org.do -sld.do -web.do - -// dz : http://en.wikipedia.org/wiki/.dz -dz -com.dz -org.dz -net.dz -gov.dz -edu.dz -asso.dz -pol.dz -art.dz - -// ec : http://www.nic.ec/reg/paso1.asp -// Submitted by registry 2008-07-04 -ec -com.ec -info.ec -net.ec -fin.ec -k12.ec -med.ec -pro.ec -org.ec -edu.ec -gov.ec -gob.ec -mil.ec - -// edu : http://en.wikipedia.org/wiki/.edu -edu - -// ee : http://www.eenet.ee/EENet/dom_reeglid.html#lisa_B -ee -edu.ee -gov.ee -riik.ee -lib.ee -med.ee -com.ee -pri.ee -aip.ee -org.ee -fie.ee - -// eg : http://en.wikipedia.org/wiki/.eg -eg -com.eg -edu.eg -eun.eg -gov.eg -mil.eg -name.eg -net.eg -org.eg -sci.eg - -// er : http://en.wikipedia.org/wiki/.er -*.er - -// es : https://www.nic.es/site_ingles/ingles/dominios/index.html -es -com.es -nom.es -org.es -gob.es -edu.es - -// et : http://en.wikipedia.org/wiki/.et -*.et - -// eu : http://en.wikipedia.org/wiki/.eu -eu - -// fi : http://en.wikipedia.org/wiki/.fi -fi -// aland.fi : http://en.wikipedia.org/wiki/.ax -// This domain is being phased out in favor of .ax. As there are still many -// domains under aland.fi, we still keep it on the list until aland.fi is -// completely removed. -// TODO: Check for updates (expected to be phased out around Q1/2009) -aland.fi - -// fj : http://en.wikipedia.org/wiki/.fj -*.fj - -// fk : http://en.wikipedia.org/wiki/.fk -*.fk - -// fm : http://en.wikipedia.org/wiki/.fm -fm - -// fo : http://en.wikipedia.org/wiki/.fo -fo - -// fr : http://www.afnic.fr/ -// domaines descriptifs : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-descriptifs -fr -com.fr -asso.fr -nom.fr -prd.fr -presse.fr -tm.fr -// domaines sectoriels : http://www.afnic.fr/obtenir/chartes/nommage-fr/annexe-sectoriels -aeroport.fr -assedic.fr -avocat.fr -avoues.fr -cci.fr -chambagri.fr -chirurgiens-dentistes.fr -experts-comptables.fr -geometre-expert.fr -gouv.fr -greta.fr -huissier-justice.fr -medecin.fr -notaires.fr -pharmacien.fr -port.fr -veterinaire.fr - -// ga : http://en.wikipedia.org/wiki/.ga -ga - -// gb : This registry is effectively dormant -// Submitted by registry 2008-06-12 - -// gd : http://en.wikipedia.org/wiki/.gd -gd - -// ge : http://www.nic.net.ge/policy_en.pdf -ge -com.ge -edu.ge -gov.ge -org.ge -mil.ge -net.ge -pvt.ge - -// gf : http://en.wikipedia.org/wiki/.gf -gf - -// gg : http://www.channelisles.net/applic/avextn.shtml -gg -co.gg -org.gg -net.gg -sch.gg -gov.gg - -// gh : http://en.wikipedia.org/wiki/.gh -// see also: http://www.nic.gh/reg_now.php -// Although domains directly at second level are not possible at the moment, -// they have been possible for some time and may come back. -gh -com.gh -edu.gh -gov.gh -org.gh -mil.gh - -// gi : http://www.nic.gi/rules.html -gi -com.gi -ltd.gi -gov.gi -mod.gi -edu.gi -org.gi - -// gl : http://en.wikipedia.org/wiki/.gl -// http://nic.gl -gl - -// gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm -gm - -// gn : http://psg.com/dns/gn/gn.txt -// Submitted by registry 2008-06-17 -ac.gn -com.gn -edu.gn -gov.gn -org.gn -net.gn - -// gov : http://en.wikipedia.org/wiki/.gov -gov - -// gp : http://www.nic.gp/index.php?lang=en -gp -com.gp -net.gp -mobi.gp -edu.gp -org.gp -asso.gp - -// gq : http://en.wikipedia.org/wiki/.gq -gq - -// gr : https://grweb.ics.forth.gr/english/1617-B-2005.html -// Submitted by registry 2008-06-09 -gr -com.gr -edu.gr -net.gr -org.gr -gov.gr - -// gs : http://en.wikipedia.org/wiki/.gs -gs - -// gt : http://www.gt/politicas_de_registro.html -gt -com.gt -edu.gt -gob.gt -ind.gt -mil.gt -net.gt -org.gt - -// gu : http://gadao.gov.gu/registration.txt -*.gu - -// gw : http://en.wikipedia.org/wiki/.gw -gw - -// gy : http://en.wikipedia.org/wiki/.gy -// http://registry.gy/ -gy -co.gy -com.gy -net.gy - -// hk : https://www.hkdnr.hk -// Submitted by registry 2008-06-11 -hk -com.hk -edu.hk -gov.hk -idv.hk -net.hk -org.hk -公司.hk -教育.hk -敎育.hk -政府.hk -個人.hk -个人.hk -箇人.hk -網络.hk -网络.hk -组織.hk -網絡.hk -网絡.hk -组织.hk -組織.hk -組织.hk - -// hm : http://en.wikipedia.org/wiki/.hm -hm - -// hn : http://www.nic.hn/politicas/ps02,,05.html -hn -com.hn -edu.hn -org.hn -net.hn -mil.hn -gob.hn - -// hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf -hr -iz.hr -from.hr -name.hr -com.hr - -// ht : http://www.nic.ht/info/charte.cfm -ht -com.ht -shop.ht -firm.ht -info.ht -adult.ht -net.ht -pro.ht -org.ht -med.ht -art.ht -coop.ht -pol.ht -asso.ht -edu.ht -rel.ht -gouv.ht -perso.ht - -// hu : http://www.domain.hu/domain/English/sld.html -// Confirmed by registry 2008-06-12 -hu -co.hu -info.hu -org.hu -priv.hu -sport.hu -tm.hu -2000.hu -agrar.hu -bolt.hu -casino.hu -city.hu -erotica.hu -erotika.hu -film.hu -forum.hu -games.hu -hotel.hu -ingatlan.hu -jogasz.hu -konyvelo.hu -lakas.hu -media.hu -news.hu -reklam.hu -sex.hu -shop.hu -suli.hu -szex.hu -tozsde.hu -utazas.hu -video.hu - -// id : https://register.pandi.or.id/ -id -ac.id -biz.id -co.id -go.id -mil.id -my.id -net.id -or.id -sch.id -web.id - -// ie : http://en.wikipedia.org/wiki/.ie -ie -gov.ie - -// il : http://en.wikipedia.org/wiki/.il -*.il - -// im : https://www.nic.im/pdfs/imfaqs.pdf -im -co.im -ltd.co.im -plc.co.im -net.im -gov.im -org.im -nic.im -ac.im - -// in : http://en.wikipedia.org/wiki/.in -// see also: http://www.inregistry.in/policies/ -// Please note, that nic.in is not an offical eTLD, but used by most -// government institutions. -in -co.in -firm.in -net.in -org.in -gen.in -ind.in -nic.in -ac.in -edu.in -res.in -gov.in -mil.in - -// info : http://en.wikipedia.org/wiki/.info -info - -// int : http://en.wikipedia.org/wiki/.int -// Confirmed by registry 2008-06-18 -int -eu.int - -// io : http://www.nic.io/rules.html -// list of other 2nd level tlds ? -io -com.io - -// iq : http://www.cmc.iq/english/iq/iqregister1.htm -iq -gov.iq -edu.iq -mil.iq -com.iq -org.iq -net.iq - -// ir : http://www.nic.ir/Terms_and_Conditions_ir,_Appendix_1_Domain_Rules -// Also see http://www.nic.ir/Internationalized_Domain_Names -// Two .ir entries added at request of , 2010-04-16 -ir -ac.ir -co.ir -gov.ir -id.ir -net.ir -org.ir -sch.ir -// xn--mgba3a4f16a.ir (.ir, Persian YEH) -ایران.ir -// xn--mgba3a4fra.ir (.ir, Arabic YEH) -ايران.ir - -// is : http://www.isnic.is/domain/rules.php -// Confirmed by registry 2008-12-06 -is -net.is -com.is -edu.is -gov.is -org.is -int.is - -// it : http://en.wikipedia.org/wiki/.it -it -gov.it -edu.it -// list of reserved geo-names : -// http://www.nic.it/documenti/regolamenti-e-linee-guida/regolamento-assegnazione-versione-6.0.pdf -// (There is also a list of reserved geo-names corresponding to Italian -// municipalities : http://www.nic.it/documenti/appendice-c.pdf , but it is -// not included here.) -agrigento.it -ag.it -alessandria.it -al.it -ancona.it -an.it -aosta.it -aoste.it -ao.it -arezzo.it -ar.it -ascoli-piceno.it -ascolipiceno.it -ap.it -asti.it -at.it -avellino.it -av.it -bari.it -ba.it -andria-barletta-trani.it -andriabarlettatrani.it -trani-barletta-andria.it -tranibarlettaandria.it -barletta-trani-andria.it -barlettatraniandria.it -andria-trani-barletta.it -andriatranibarletta.it -trani-andria-barletta.it -traniandriabarletta.it -bt.it -belluno.it -bl.it -benevento.it -bn.it -bergamo.it -bg.it -biella.it -bi.it -bologna.it -bo.it -bolzano.it -bozen.it -balsan.it -alto-adige.it -altoadige.it -suedtirol.it -bz.it -brescia.it -bs.it -brindisi.it -br.it -cagliari.it -ca.it -caltanissetta.it -cl.it -campobasso.it -cb.it -carboniaiglesias.it -carbonia-iglesias.it -iglesias-carbonia.it -iglesiascarbonia.it -ci.it -caserta.it -ce.it -catania.it -ct.it -catanzaro.it -cz.it -chieti.it -ch.it -como.it -co.it -cosenza.it -cs.it -cremona.it -cr.it -crotone.it -kr.it -cuneo.it -cn.it -dell-ogliastra.it -dellogliastra.it -ogliastra.it -og.it -enna.it -en.it -ferrara.it -fe.it -fermo.it -fm.it -firenze.it -florence.it -fi.it -foggia.it -fg.it -forli-cesena.it -forlicesena.it -cesena-forli.it -cesenaforli.it -fc.it -frosinone.it -fr.it -genova.it -genoa.it -ge.it -gorizia.it -go.it -grosseto.it -gr.it -imperia.it -im.it -isernia.it -is.it -laquila.it -aquila.it -aq.it -la-spezia.it -laspezia.it -sp.it -latina.it -lt.it -lecce.it -le.it -lecco.it -lc.it -livorno.it -li.it -lodi.it -lo.it -lucca.it -lu.it -macerata.it -mc.it -mantova.it -mn.it -massa-carrara.it -massacarrara.it -carrara-massa.it -carraramassa.it -ms.it -matera.it -mt.it -medio-campidano.it -mediocampidano.it -campidano-medio.it -campidanomedio.it -vs.it -messina.it -me.it -milano.it -milan.it -mi.it -modena.it -mo.it -monza.it -monza-brianza.it -monzabrianza.it -monzaebrianza.it -monzaedellabrianza.it -monza-e-della-brianza.it -mb.it -napoli.it -naples.it -na.it -novara.it -no.it -nuoro.it -nu.it -oristano.it -or.it -padova.it -padua.it -pd.it -palermo.it -pa.it -parma.it -pr.it -pavia.it -pv.it -perugia.it -pg.it -pescara.it -pe.it -pesaro-urbino.it -pesarourbino.it -urbino-pesaro.it -urbinopesaro.it -pu.it -piacenza.it -pc.it -pisa.it -pi.it -pistoia.it -pt.it -pordenone.it -pn.it -potenza.it -pz.it -prato.it -po.it -ragusa.it -rg.it -ravenna.it -ra.it -reggio-calabria.it -reggiocalabria.it -rc.it -reggio-emilia.it -reggioemilia.it -re.it -rieti.it -ri.it -rimini.it -rn.it -roma.it -rome.it -rm.it -rovigo.it -ro.it -salerno.it -sa.it -sassari.it -ss.it -savona.it -sv.it -siena.it -si.it -siracusa.it -sr.it -sondrio.it -so.it -taranto.it -ta.it -tempio-olbia.it -tempioolbia.it -olbia-tempio.it -olbiatempio.it -ot.it -teramo.it -te.it -terni.it -tr.it -torino.it -turin.it -to.it -trapani.it -tp.it -trento.it -trentino.it -tn.it -treviso.it -tv.it -trieste.it -ts.it -udine.it -ud.it -varese.it -va.it -venezia.it -venice.it -ve.it -verbania.it -vb.it -vercelli.it -vc.it -verona.it -vr.it -vibo-valentia.it -vibovalentia.it -vv.it -vicenza.it -vi.it -viterbo.it -vt.it - -// je : http://www.channelisles.net/applic/avextn.shtml -je -co.je -org.je -net.je -sch.je -gov.je - -// jm : http://www.com.jm/register.html -*.jm - -// jo : http://www.dns.jo/Registration_policy.aspx -jo -com.jo -org.jo -net.jo -edu.jo -sch.jo -gov.jo -mil.jo -name.jo - -// jobs : http://en.wikipedia.org/wiki/.jobs -jobs - -// jp : http://en.wikipedia.org/wiki/.jp -// http://jprs.co.jp/en/jpdomain.html -// Updated by registry 2012-05-28 -jp -// jp organizational type names -ac.jp -ad.jp -co.jp -ed.jp -go.jp -gr.jp -lg.jp -ne.jp -or.jp -// jp preficture type names -aichi.jp -akita.jp -aomori.jp -chiba.jp -ehime.jp -fukui.jp -fukuoka.jp -fukushima.jp -gifu.jp -gunma.jp -hiroshima.jp -hokkaido.jp -hyogo.jp -ibaraki.jp -ishikawa.jp -iwate.jp -kagawa.jp -kagoshima.jp -kanagawa.jp -kochi.jp -kumamoto.jp -kyoto.jp -mie.jp -miyagi.jp -miyazaki.jp -nagano.jp -nagasaki.jp -nara.jp -niigata.jp -oita.jp -okayama.jp -okinawa.jp -osaka.jp -saga.jp -saitama.jp -shiga.jp -shimane.jp -shizuoka.jp -tochigi.jp -tokushima.jp -tokyo.jp -tottori.jp -toyama.jp -wakayama.jp -yamagata.jp -yamaguchi.jp -yamanashi.jp -// jp geographic type names -// http://jprs.jp/doc/rule/saisoku-1.html -*.kawasaki.jp -*.kitakyushu.jp -*.kobe.jp -*.nagoya.jp -*.sapporo.jp -*.sendai.jp -*.yokohama.jp -!city.kawasaki.jp -!city.kitakyushu.jp -!city.kobe.jp -!city.nagoya.jp -!city.sapporo.jp -!city.sendai.jp -!city.yokohama.jp -// 4th level registration -aisai.aichi.jp -ama.aichi.jp -anjo.aichi.jp -asuke.aichi.jp -chiryu.aichi.jp -chita.aichi.jp -fuso.aichi.jp -gamagori.aichi.jp -handa.aichi.jp -hazu.aichi.jp -hekinan.aichi.jp -higashiura.aichi.jp -ichinomiya.aichi.jp -inazawa.aichi.jp -inuyama.aichi.jp -isshiki.aichi.jp -iwakura.aichi.jp -kanie.aichi.jp -kariya.aichi.jp -kasugai.aichi.jp -kira.aichi.jp -kiyosu.aichi.jp -komaki.aichi.jp -konan.aichi.jp -kota.aichi.jp -mihama.aichi.jp -miyoshi.aichi.jp -nagakute.aichi.jp -nishio.aichi.jp -nisshin.aichi.jp -obu.aichi.jp -oguchi.aichi.jp -oharu.aichi.jp -okazaki.aichi.jp -owariasahi.aichi.jp -seto.aichi.jp -shikatsu.aichi.jp -shinshiro.aichi.jp -shitara.aichi.jp -tahara.aichi.jp -takahama.aichi.jp -tobishima.aichi.jp -toei.aichi.jp -togo.aichi.jp -tokai.aichi.jp -tokoname.aichi.jp -toyoake.aichi.jp -toyohashi.aichi.jp -toyokawa.aichi.jp -toyone.aichi.jp -toyota.aichi.jp -tsushima.aichi.jp -yatomi.aichi.jp -akita.akita.jp -daisen.akita.jp -fujisato.akita.jp -gojome.akita.jp -hachirogata.akita.jp -happou.akita.jp -higashinaruse.akita.jp -honjo.akita.jp -honjyo.akita.jp -ikawa.akita.jp -kamikoani.akita.jp -kamioka.akita.jp -katagami.akita.jp -kazuno.akita.jp -kitaakita.akita.jp -kosaka.akita.jp -kyowa.akita.jp -misato.akita.jp -mitane.akita.jp -moriyoshi.akita.jp -nikaho.akita.jp -noshiro.akita.jp -odate.akita.jp -oga.akita.jp -ogata.akita.jp -semboku.akita.jp -yokote.akita.jp -yurihonjo.akita.jp -aomori.aomori.jp -gonohe.aomori.jp -hachinohe.aomori.jp -hashikami.aomori.jp -hiranai.aomori.jp -hirosaki.aomori.jp -itayanagi.aomori.jp -kuroishi.aomori.jp -misawa.aomori.jp -mutsu.aomori.jp -nakadomari.aomori.jp -noheji.aomori.jp -oirase.aomori.jp -owani.aomori.jp -rokunohe.aomori.jp -sannohe.aomori.jp -shichinohe.aomori.jp -shingo.aomori.jp -takko.aomori.jp -towada.aomori.jp -tsugaru.aomori.jp -tsuruta.aomori.jp -abiko.chiba.jp -asahi.chiba.jp -chonan.chiba.jp -chosei.chiba.jp -choshi.chiba.jp -chuo.chiba.jp -funabashi.chiba.jp -futtsu.chiba.jp -hanamigawa.chiba.jp -ichihara.chiba.jp -ichikawa.chiba.jp -ichinomiya.chiba.jp -inzai.chiba.jp -isumi.chiba.jp -kamagaya.chiba.jp -kamogawa.chiba.jp -kashiwa.chiba.jp -katori.chiba.jp -katsuura.chiba.jp -kimitsu.chiba.jp -kisarazu.chiba.jp -kozaki.chiba.jp -kujukuri.chiba.jp -kyonan.chiba.jp -matsudo.chiba.jp -midori.chiba.jp -mihama.chiba.jp -minamiboso.chiba.jp -mobara.chiba.jp -mutsuzawa.chiba.jp -nagara.chiba.jp -nagareyama.chiba.jp -narashino.chiba.jp -narita.chiba.jp -noda.chiba.jp -oamishirasato.chiba.jp -omigawa.chiba.jp -onjuku.chiba.jp -otaki.chiba.jp -sakae.chiba.jp -sakura.chiba.jp -shimofusa.chiba.jp -shirako.chiba.jp -shiroi.chiba.jp -shisui.chiba.jp -sodegaura.chiba.jp -sosa.chiba.jp -tako.chiba.jp -tateyama.chiba.jp -togane.chiba.jp -tohnosho.chiba.jp -tomisato.chiba.jp -urayasu.chiba.jp -yachimata.chiba.jp -yachiyo.chiba.jp -yokaichiba.chiba.jp -yokoshibahikari.chiba.jp -yotsukaido.chiba.jp -ainan.ehime.jp -honai.ehime.jp -ikata.ehime.jp -imabari.ehime.jp -iyo.ehime.jp -kamijima.ehime.jp -kihoku.ehime.jp -kumakogen.ehime.jp -masaki.ehime.jp -matsuno.ehime.jp -matsuyama.ehime.jp -namikata.ehime.jp -niihama.ehime.jp -ozu.ehime.jp -saijo.ehime.jp -seiyo.ehime.jp -shikokuchuo.ehime.jp -tobe.ehime.jp -toon.ehime.jp -uchiko.ehime.jp -uwajima.ehime.jp -yawatahama.ehime.jp -echizen.fukui.jp -eiheiji.fukui.jp -fukui.fukui.jp -ikeda.fukui.jp -katsuyama.fukui.jp -mihama.fukui.jp -minamiechizen.fukui.jp -obama.fukui.jp -ohi.fukui.jp -ono.fukui.jp -sabae.fukui.jp -sakai.fukui.jp -takahama.fukui.jp -tsuruga.fukui.jp -wakasa.fukui.jp -ashiya.fukuoka.jp -buzen.fukuoka.jp -chikugo.fukuoka.jp -chikuho.fukuoka.jp -chikujo.fukuoka.jp -chikushino.fukuoka.jp -chikuzen.fukuoka.jp -chuo.fukuoka.jp -dazaifu.fukuoka.jp -fukuchi.fukuoka.jp -hakata.fukuoka.jp -higashi.fukuoka.jp -hirokawa.fukuoka.jp -hisayama.fukuoka.jp -iizuka.fukuoka.jp -inatsuki.fukuoka.jp -kaho.fukuoka.jp -kasuga.fukuoka.jp -kasuya.fukuoka.jp -kawara.fukuoka.jp -keisen.fukuoka.jp -koga.fukuoka.jp -kurate.fukuoka.jp -kurogi.fukuoka.jp -kurume.fukuoka.jp -minami.fukuoka.jp -miyako.fukuoka.jp -miyama.fukuoka.jp -miyawaka.fukuoka.jp -mizumaki.fukuoka.jp -munakata.fukuoka.jp -nakagawa.fukuoka.jp -nakama.fukuoka.jp -nishi.fukuoka.jp -nogata.fukuoka.jp -ogori.fukuoka.jp -okagaki.fukuoka.jp -okawa.fukuoka.jp -oki.fukuoka.jp -omuta.fukuoka.jp -onga.fukuoka.jp -onojo.fukuoka.jp -oto.fukuoka.jp -saigawa.fukuoka.jp -sasaguri.fukuoka.jp -shingu.fukuoka.jp -shinyoshitomi.fukuoka.jp -shonai.fukuoka.jp -soeda.fukuoka.jp -sue.fukuoka.jp -tachiarai.fukuoka.jp -tagawa.fukuoka.jp -takata.fukuoka.jp -toho.fukuoka.jp -toyotsu.fukuoka.jp -tsuiki.fukuoka.jp -ukiha.fukuoka.jp -umi.fukuoka.jp -usui.fukuoka.jp -yamada.fukuoka.jp -yame.fukuoka.jp -yanagawa.fukuoka.jp -yukuhashi.fukuoka.jp -aizubange.fukushima.jp -aizumisato.fukushima.jp -aizuwakamatsu.fukushima.jp -asakawa.fukushima.jp -bandai.fukushima.jp -date.fukushima.jp -fukushima.fukushima.jp -furudono.fukushima.jp -futaba.fukushima.jp -hanawa.fukushima.jp -higashi.fukushima.jp -hirata.fukushima.jp -hirono.fukushima.jp -iitate.fukushima.jp -inawashiro.fukushima.jp -ishikawa.fukushima.jp -iwaki.fukushima.jp -izumizaki.fukushima.jp -kagamiishi.fukushima.jp -kaneyama.fukushima.jp -kawamata.fukushima.jp -kitakata.fukushima.jp -kitashiobara.fukushima.jp -koori.fukushima.jp -koriyama.fukushima.jp -kunimi.fukushima.jp -miharu.fukushima.jp -mishima.fukushima.jp -namie.fukushima.jp -nango.fukushima.jp -nishiaizu.fukushima.jp -nishigo.fukushima.jp -okuma.fukushima.jp -omotego.fukushima.jp -ono.fukushima.jp -otama.fukushima.jp -samegawa.fukushima.jp -shimogo.fukushima.jp -shirakawa.fukushima.jp -showa.fukushima.jp -soma.fukushima.jp -sukagawa.fukushima.jp -taishin.fukushima.jp -tamakawa.fukushima.jp -tanagura.fukushima.jp -tenei.fukushima.jp -yabuki.fukushima.jp -yamato.fukushima.jp -yamatsuri.fukushima.jp -yanaizu.fukushima.jp -yugawa.fukushima.jp -anpachi.gifu.jp -ena.gifu.jp -gifu.gifu.jp -ginan.gifu.jp -godo.gifu.jp -gujo.gifu.jp -hashima.gifu.jp -hichiso.gifu.jp -hida.gifu.jp -higashishirakawa.gifu.jp -ibigawa.gifu.jp -ikeda.gifu.jp -kakamigahara.gifu.jp -kani.gifu.jp -kasahara.gifu.jp -kasamatsu.gifu.jp -kawaue.gifu.jp -kitagata.gifu.jp -mino.gifu.jp -minokamo.gifu.jp -mitake.gifu.jp -mizunami.gifu.jp -motosu.gifu.jp -nakatsugawa.gifu.jp -ogaki.gifu.jp -sakahogi.gifu.jp -seki.gifu.jp -sekigahara.gifu.jp -shirakawa.gifu.jp -tajimi.gifu.jp -takayama.gifu.jp -tarui.gifu.jp -toki.gifu.jp -tomika.gifu.jp -wanouchi.gifu.jp -yamagata.gifu.jp -yaotsu.gifu.jp -yoro.gifu.jp -annaka.gunma.jp -chiyoda.gunma.jp -fujioka.gunma.jp -higashiagatsuma.gunma.jp -isesaki.gunma.jp -itakura.gunma.jp -kanna.gunma.jp -kanra.gunma.jp -katashina.gunma.jp -kawaba.gunma.jp -kiryu.gunma.jp -kusatsu.gunma.jp -maebashi.gunma.jp -meiwa.gunma.jp -midori.gunma.jp -minakami.gunma.jp -naganohara.gunma.jp -nakanojo.gunma.jp -nanmoku.gunma.jp -numata.gunma.jp -oizumi.gunma.jp -ora.gunma.jp -ota.gunma.jp -shibukawa.gunma.jp -shimonita.gunma.jp -shinto.gunma.jp -showa.gunma.jp -takasaki.gunma.jp -takayama.gunma.jp -tamamura.gunma.jp -tatebayashi.gunma.jp -tomioka.gunma.jp -tsukiyono.gunma.jp -tsumagoi.gunma.jp -ueno.gunma.jp -yoshioka.gunma.jp -asaminami.hiroshima.jp -daiwa.hiroshima.jp -etajima.hiroshima.jp -fuchu.hiroshima.jp -fukuyama.hiroshima.jp -hatsukaichi.hiroshima.jp -higashihiroshima.hiroshima.jp -hongo.hiroshima.jp -jinsekikogen.hiroshima.jp -kaita.hiroshima.jp -kui.hiroshima.jp -kumano.hiroshima.jp -kure.hiroshima.jp -mihara.hiroshima.jp -miyoshi.hiroshima.jp -naka.hiroshima.jp -onomichi.hiroshima.jp -osakikamijima.hiroshima.jp -otake.hiroshima.jp -saka.hiroshima.jp -sera.hiroshima.jp -seranishi.hiroshima.jp -shinichi.hiroshima.jp -shobara.hiroshima.jp -takehara.hiroshima.jp -abashiri.hokkaido.jp -abira.hokkaido.jp -aibetsu.hokkaido.jp -akabira.hokkaido.jp -akkeshi.hokkaido.jp -asahikawa.hokkaido.jp -ashibetsu.hokkaido.jp -ashoro.hokkaido.jp -assabu.hokkaido.jp -atsuma.hokkaido.jp -bibai.hokkaido.jp -biei.hokkaido.jp -bifuka.hokkaido.jp -bihoro.hokkaido.jp -biratori.hokkaido.jp -chippubetsu.hokkaido.jp -chitose.hokkaido.jp -date.hokkaido.jp -ebetsu.hokkaido.jp -embetsu.hokkaido.jp -eniwa.hokkaido.jp -erimo.hokkaido.jp -esan.hokkaido.jp -esashi.hokkaido.jp -fukagawa.hokkaido.jp -fukushima.hokkaido.jp -furano.hokkaido.jp -furubira.hokkaido.jp -haboro.hokkaido.jp -hakodate.hokkaido.jp -hamatonbetsu.hokkaido.jp -hidaka.hokkaido.jp -higashikagura.hokkaido.jp -higashikawa.hokkaido.jp -hiroo.hokkaido.jp -hokuryu.hokkaido.jp -hokuto.hokkaido.jp -honbetsu.hokkaido.jp -horokanai.hokkaido.jp -horonobe.hokkaido.jp -ikeda.hokkaido.jp -imakane.hokkaido.jp -ishikari.hokkaido.jp -iwamizawa.hokkaido.jp -iwanai.hokkaido.jp -kamifurano.hokkaido.jp -kamikawa.hokkaido.jp -kamishihoro.hokkaido.jp -kamisunagawa.hokkaido.jp -kamoenai.hokkaido.jp -kayabe.hokkaido.jp -kembuchi.hokkaido.jp -kikonai.hokkaido.jp -kimobetsu.hokkaido.jp -kitahiroshima.hokkaido.jp -kitami.hokkaido.jp -kiyosato.hokkaido.jp -koshimizu.hokkaido.jp -kunneppu.hokkaido.jp -kuriyama.hokkaido.jp -kuromatsunai.hokkaido.jp -kushiro.hokkaido.jp -kutchan.hokkaido.jp -kyowa.hokkaido.jp -mashike.hokkaido.jp -matsumae.hokkaido.jp -mikasa.hokkaido.jp -minamifurano.hokkaido.jp -mombetsu.hokkaido.jp -moseushi.hokkaido.jp -mukawa.hokkaido.jp -muroran.hokkaido.jp -naie.hokkaido.jp -nakagawa.hokkaido.jp -nakasatsunai.hokkaido.jp -nakatombetsu.hokkaido.jp -nanae.hokkaido.jp -nanporo.hokkaido.jp -nayoro.hokkaido.jp -nemuro.hokkaido.jp -niikappu.hokkaido.jp -niki.hokkaido.jp -nishiokoppe.hokkaido.jp -noboribetsu.hokkaido.jp -numata.hokkaido.jp -obihiro.hokkaido.jp -obira.hokkaido.jp -oketo.hokkaido.jp -okoppe.hokkaido.jp -otaru.hokkaido.jp -otobe.hokkaido.jp -otofuke.hokkaido.jp -otoineppu.hokkaido.jp -oumu.hokkaido.jp -ozora.hokkaido.jp -pippu.hokkaido.jp -rankoshi.hokkaido.jp -rebun.hokkaido.jp -rikubetsu.hokkaido.jp -rishiri.hokkaido.jp -rishirifuji.hokkaido.jp -saroma.hokkaido.jp -sarufutsu.hokkaido.jp -shakotan.hokkaido.jp -shari.hokkaido.jp -shibecha.hokkaido.jp -shibetsu.hokkaido.jp -shikabe.hokkaido.jp -shikaoi.hokkaido.jp -shimamaki.hokkaido.jp -shimizu.hokkaido.jp -shimokawa.hokkaido.jp -shinshinotsu.hokkaido.jp -shintoku.hokkaido.jp -shiranuka.hokkaido.jp -shiraoi.hokkaido.jp -shiriuchi.hokkaido.jp -sobetsu.hokkaido.jp -sunagawa.hokkaido.jp -taiki.hokkaido.jp -takasu.hokkaido.jp -takikawa.hokkaido.jp -takinoue.hokkaido.jp -teshikaga.hokkaido.jp -tobetsu.hokkaido.jp -tohma.hokkaido.jp -tomakomai.hokkaido.jp -tomari.hokkaido.jp -toya.hokkaido.jp -toyako.hokkaido.jp -toyotomi.hokkaido.jp -toyoura.hokkaido.jp -tsubetsu.hokkaido.jp -tsukigata.hokkaido.jp -urakawa.hokkaido.jp -urausu.hokkaido.jp -uryu.hokkaido.jp -utashinai.hokkaido.jp -wakkanai.hokkaido.jp -wassamu.hokkaido.jp -yakumo.hokkaido.jp -yoichi.hokkaido.jp -aioi.hyogo.jp -akashi.hyogo.jp -ako.hyogo.jp -amagasaki.hyogo.jp -aogaki.hyogo.jp -asago.hyogo.jp -ashiya.hyogo.jp -awaji.hyogo.jp -fukusaki.hyogo.jp -goshiki.hyogo.jp -harima.hyogo.jp -himeji.hyogo.jp -ichikawa.hyogo.jp -inagawa.hyogo.jp -itami.hyogo.jp -kakogawa.hyogo.jp -kamigori.hyogo.jp -kamikawa.hyogo.jp -kasai.hyogo.jp -kasuga.hyogo.jp -kawanishi.hyogo.jp -miki.hyogo.jp -minamiawaji.hyogo.jp -nishinomiya.hyogo.jp -nishiwaki.hyogo.jp -ono.hyogo.jp -sanda.hyogo.jp -sannan.hyogo.jp -sasayama.hyogo.jp -sayo.hyogo.jp -shingu.hyogo.jp -shinonsen.hyogo.jp -shiso.hyogo.jp -sumoto.hyogo.jp -taishi.hyogo.jp -taka.hyogo.jp -takarazuka.hyogo.jp -takasago.hyogo.jp -takino.hyogo.jp -tamba.hyogo.jp -tatsuno.hyogo.jp -toyooka.hyogo.jp -yabu.hyogo.jp -yashiro.hyogo.jp -yoka.hyogo.jp -yokawa.hyogo.jp -ami.ibaraki.jp -asahi.ibaraki.jp -bando.ibaraki.jp -chikusei.ibaraki.jp -daigo.ibaraki.jp -fujishiro.ibaraki.jp -hitachi.ibaraki.jp -hitachinaka.ibaraki.jp -hitachiomiya.ibaraki.jp -hitachiota.ibaraki.jp -ibaraki.ibaraki.jp -ina.ibaraki.jp -inashiki.ibaraki.jp -itako.ibaraki.jp -iwama.ibaraki.jp -joso.ibaraki.jp -kamisu.ibaraki.jp -kasama.ibaraki.jp -kashima.ibaraki.jp -kasumigaura.ibaraki.jp -koga.ibaraki.jp -miho.ibaraki.jp -mito.ibaraki.jp -moriya.ibaraki.jp -naka.ibaraki.jp -namegata.ibaraki.jp -oarai.ibaraki.jp -ogawa.ibaraki.jp -omitama.ibaraki.jp -ryugasaki.ibaraki.jp -sakai.ibaraki.jp -sakuragawa.ibaraki.jp -shimodate.ibaraki.jp -shimotsuma.ibaraki.jp -shirosato.ibaraki.jp -sowa.ibaraki.jp -suifu.ibaraki.jp -takahagi.ibaraki.jp -tamatsukuri.ibaraki.jp -tokai.ibaraki.jp -tomobe.ibaraki.jp -tone.ibaraki.jp -toride.ibaraki.jp -tsuchiura.ibaraki.jp -tsukuba.ibaraki.jp -uchihara.ibaraki.jp -ushiku.ibaraki.jp -yachiyo.ibaraki.jp -yamagata.ibaraki.jp -yawara.ibaraki.jp -yuki.ibaraki.jp -anamizu.ishikawa.jp -hakui.ishikawa.jp -hakusan.ishikawa.jp -kaga.ishikawa.jp -kahoku.ishikawa.jp -kanazawa.ishikawa.jp -kawakita.ishikawa.jp -komatsu.ishikawa.jp -nakanoto.ishikawa.jp -nanao.ishikawa.jp -nomi.ishikawa.jp -nonoichi.ishikawa.jp -noto.ishikawa.jp -shika.ishikawa.jp -suzu.ishikawa.jp -tsubata.ishikawa.jp -tsurugi.ishikawa.jp -uchinada.ishikawa.jp -wajima.ishikawa.jp -fudai.iwate.jp -fujisawa.iwate.jp -hanamaki.iwate.jp -hiraizumi.iwate.jp -hirono.iwate.jp -ichinohe.iwate.jp -ichinoseki.iwate.jp -iwaizumi.iwate.jp -iwate.iwate.jp -joboji.iwate.jp -kamaishi.iwate.jp -kanegasaki.iwate.jp -karumai.iwate.jp -kawai.iwate.jp -kitakami.iwate.jp -kuji.iwate.jp -kunohe.iwate.jp -kuzumaki.iwate.jp -miyako.iwate.jp -mizusawa.iwate.jp -morioka.iwate.jp -ninohe.iwate.jp -noda.iwate.jp -ofunato.iwate.jp -oshu.iwate.jp -otsuchi.iwate.jp -rikuzentakata.iwate.jp -shiwa.iwate.jp -shizukuishi.iwate.jp -sumita.iwate.jp -takizawa.iwate.jp -tanohata.iwate.jp -tono.iwate.jp -yahaba.iwate.jp -yamada.iwate.jp -ayagawa.kagawa.jp -higashikagawa.kagawa.jp -kanonji.kagawa.jp -kotohira.kagawa.jp -manno.kagawa.jp -marugame.kagawa.jp -mitoyo.kagawa.jp -naoshima.kagawa.jp -sanuki.kagawa.jp -tadotsu.kagawa.jp -takamatsu.kagawa.jp -tonosho.kagawa.jp -uchinomi.kagawa.jp -utazu.kagawa.jp -zentsuji.kagawa.jp -akune.kagoshima.jp -amami.kagoshima.jp -hioki.kagoshima.jp -isa.kagoshima.jp -isen.kagoshima.jp -izumi.kagoshima.jp -kagoshima.kagoshima.jp -kanoya.kagoshima.jp -kawanabe.kagoshima.jp -kinko.kagoshima.jp -kouyama.kagoshima.jp -makurazaki.kagoshima.jp -matsumoto.kagoshima.jp -minamitane.kagoshima.jp -nakatane.kagoshima.jp -nishinoomote.kagoshima.jp -satsumasendai.kagoshima.jp -soo.kagoshima.jp -tarumizu.kagoshima.jp -yusui.kagoshima.jp -aikawa.kanagawa.jp -atsugi.kanagawa.jp -ayase.kanagawa.jp -chigasaki.kanagawa.jp -ebina.kanagawa.jp -fujisawa.kanagawa.jp -hadano.kanagawa.jp -hakone.kanagawa.jp -hiratsuka.kanagawa.jp -isehara.kanagawa.jp -kaisei.kanagawa.jp -kamakura.kanagawa.jp -kiyokawa.kanagawa.jp -matsuda.kanagawa.jp -minamiashigara.kanagawa.jp -miura.kanagawa.jp -nakai.kanagawa.jp -ninomiya.kanagawa.jp -odawara.kanagawa.jp -oi.kanagawa.jp -oiso.kanagawa.jp -sagamihara.kanagawa.jp -samukawa.kanagawa.jp -tsukui.kanagawa.jp -yamakita.kanagawa.jp -yamato.kanagawa.jp -yokosuka.kanagawa.jp -yugawara.kanagawa.jp -zama.kanagawa.jp -zushi.kanagawa.jp -aki.kochi.jp -geisei.kochi.jp -hidaka.kochi.jp -higashitsuno.kochi.jp -ino.kochi.jp -kagami.kochi.jp -kami.kochi.jp -kitagawa.kochi.jp -kochi.kochi.jp -mihara.kochi.jp -motoyama.kochi.jp -muroto.kochi.jp -nahari.kochi.jp -nakamura.kochi.jp -nankoku.kochi.jp -nishitosa.kochi.jp -niyodogawa.kochi.jp -ochi.kochi.jp -okawa.kochi.jp -otoyo.kochi.jp -otsuki.kochi.jp -sakawa.kochi.jp -sukumo.kochi.jp -susaki.kochi.jp -tosa.kochi.jp -tosashimizu.kochi.jp -toyo.kochi.jp -tsuno.kochi.jp -umaji.kochi.jp -yasuda.kochi.jp -yusuhara.kochi.jp -amakusa.kumamoto.jp -arao.kumamoto.jp -aso.kumamoto.jp -choyo.kumamoto.jp -gyokuto.kumamoto.jp -hitoyoshi.kumamoto.jp -kamiamakusa.kumamoto.jp -kashima.kumamoto.jp -kikuchi.kumamoto.jp -kosa.kumamoto.jp -kumamoto.kumamoto.jp -mashiki.kumamoto.jp -mifune.kumamoto.jp -minamata.kumamoto.jp -minamioguni.kumamoto.jp -nagasu.kumamoto.jp -nishihara.kumamoto.jp -oguni.kumamoto.jp -ozu.kumamoto.jp -sumoto.kumamoto.jp -takamori.kumamoto.jp -uki.kumamoto.jp -uto.kumamoto.jp -yamaga.kumamoto.jp -yamato.kumamoto.jp -yatsushiro.kumamoto.jp -ayabe.kyoto.jp -fukuchiyama.kyoto.jp -higashiyama.kyoto.jp -ide.kyoto.jp -ine.kyoto.jp -joyo.kyoto.jp -kameoka.kyoto.jp -kamo.kyoto.jp -kita.kyoto.jp -kizu.kyoto.jp -kumiyama.kyoto.jp -kyotamba.kyoto.jp -kyotanabe.kyoto.jp -kyotango.kyoto.jp -maizuru.kyoto.jp -minami.kyoto.jp -minamiyamashiro.kyoto.jp -miyazu.kyoto.jp -muko.kyoto.jp -nagaokakyo.kyoto.jp -nakagyo.kyoto.jp -nantan.kyoto.jp -oyamazaki.kyoto.jp -sakyo.kyoto.jp -seika.kyoto.jp -tanabe.kyoto.jp -uji.kyoto.jp -ujitawara.kyoto.jp -wazuka.kyoto.jp -yamashina.kyoto.jp -yawata.kyoto.jp -asahi.mie.jp -inabe.mie.jp -ise.mie.jp -kameyama.mie.jp -kawagoe.mie.jp -kiho.mie.jp -kisosaki.mie.jp -kiwa.mie.jp -komono.mie.jp -kumano.mie.jp -kuwana.mie.jp -matsusaka.mie.jp -meiwa.mie.jp -mihama.mie.jp -minamiise.mie.jp -misugi.mie.jp -miyama.mie.jp -nabari.mie.jp -shima.mie.jp -suzuka.mie.jp -tado.mie.jp -taiki.mie.jp -taki.mie.jp -tamaki.mie.jp -toba.mie.jp -tsu.mie.jp -udono.mie.jp -ureshino.mie.jp -watarai.mie.jp -yokkaichi.mie.jp -furukawa.miyagi.jp -higashimatsushima.miyagi.jp -ishinomaki.miyagi.jp -iwanuma.miyagi.jp -kakuda.miyagi.jp -kami.miyagi.jp -kawasaki.miyagi.jp -kesennuma.miyagi.jp -marumori.miyagi.jp -matsushima.miyagi.jp -minamisanriku.miyagi.jp -misato.miyagi.jp -murata.miyagi.jp -natori.miyagi.jp -ogawara.miyagi.jp -ohira.miyagi.jp -onagawa.miyagi.jp -osaki.miyagi.jp -rifu.miyagi.jp -semine.miyagi.jp -shibata.miyagi.jp -shichikashuku.miyagi.jp -shikama.miyagi.jp -shiogama.miyagi.jp -shiroishi.miyagi.jp -tagajo.miyagi.jp -taiwa.miyagi.jp -tome.miyagi.jp -tomiya.miyagi.jp -wakuya.miyagi.jp -watari.miyagi.jp -yamamoto.miyagi.jp -zao.miyagi.jp -aya.miyazaki.jp -ebino.miyazaki.jp -gokase.miyazaki.jp -hyuga.miyazaki.jp -kadogawa.miyazaki.jp -kawaminami.miyazaki.jp -kijo.miyazaki.jp -kitagawa.miyazaki.jp -kitakata.miyazaki.jp -kitaura.miyazaki.jp -kobayashi.miyazaki.jp -kunitomi.miyazaki.jp -kushima.miyazaki.jp -mimata.miyazaki.jp -miyakonojo.miyazaki.jp -miyazaki.miyazaki.jp -morotsuka.miyazaki.jp -nichinan.miyazaki.jp -nishimera.miyazaki.jp -nobeoka.miyazaki.jp -saito.miyazaki.jp -shiiba.miyazaki.jp -shintomi.miyazaki.jp -takaharu.miyazaki.jp -takanabe.miyazaki.jp -takazaki.miyazaki.jp -tsuno.miyazaki.jp -achi.nagano.jp -agematsu.nagano.jp -anan.nagano.jp -aoki.nagano.jp -asahi.nagano.jp -azumino.nagano.jp -chikuhoku.nagano.jp -chikuma.nagano.jp -chino.nagano.jp -fujimi.nagano.jp -hakuba.nagano.jp -hara.nagano.jp -hiraya.nagano.jp -iida.nagano.jp -iijima.nagano.jp -iiyama.nagano.jp -iizuna.nagano.jp -ikeda.nagano.jp -ikusaka.nagano.jp -ina.nagano.jp -karuizawa.nagano.jp -kawakami.nagano.jp -kiso.nagano.jp -kisofukushima.nagano.jp -kitaaiki.nagano.jp -komagane.nagano.jp -komoro.nagano.jp -matsukawa.nagano.jp -matsumoto.nagano.jp -miasa.nagano.jp -minamiaiki.nagano.jp -minamimaki.nagano.jp -minamiminowa.nagano.jp -minowa.nagano.jp -miyada.nagano.jp -miyota.nagano.jp -mochizuki.nagano.jp -nagano.nagano.jp -nagawa.nagano.jp -nagiso.nagano.jp -nakagawa.nagano.jp -nakano.nagano.jp -nozawaonsen.nagano.jp -obuse.nagano.jp -ogawa.nagano.jp -okaya.nagano.jp -omachi.nagano.jp -omi.nagano.jp -ookuwa.nagano.jp -ooshika.nagano.jp -otaki.nagano.jp -otari.nagano.jp -sakae.nagano.jp -sakaki.nagano.jp -saku.nagano.jp -sakuho.nagano.jp -shimosuwa.nagano.jp -shinanomachi.nagano.jp -shiojiri.nagano.jp -suwa.nagano.jp -suzaka.nagano.jp -takagi.nagano.jp -takamori.nagano.jp -takayama.nagano.jp -tateshina.nagano.jp -tatsuno.nagano.jp -togakushi.nagano.jp -togura.nagano.jp -tomi.nagano.jp -ueda.nagano.jp -wada.nagano.jp -yamagata.nagano.jp -yamanouchi.nagano.jp -yasaka.nagano.jp -yasuoka.nagano.jp -chijiwa.nagasaki.jp -futsu.nagasaki.jp -goto.nagasaki.jp -hasami.nagasaki.jp -hirado.nagasaki.jp -iki.nagasaki.jp -isahaya.nagasaki.jp -kawatana.nagasaki.jp -kuchinotsu.nagasaki.jp -matsuura.nagasaki.jp -nagasaki.nagasaki.jp -obama.nagasaki.jp -omura.nagasaki.jp -oseto.nagasaki.jp -saikai.nagasaki.jp -sasebo.nagasaki.jp -seihi.nagasaki.jp -shimabara.nagasaki.jp -shinkamigoto.nagasaki.jp -togitsu.nagasaki.jp -tsushima.nagasaki.jp -unzen.nagasaki.jp -ando.nara.jp -gose.nara.jp -heguri.nara.jp -higashiyoshino.nara.jp -ikaruga.nara.jp -ikoma.nara.jp -kamikitayama.nara.jp -kanmaki.nara.jp -kashiba.nara.jp -kashihara.nara.jp -katsuragi.nara.jp -kawai.nara.jp -kawakami.nara.jp -kawanishi.nara.jp -koryo.nara.jp -kurotaki.nara.jp -mitsue.nara.jp -miyake.nara.jp -nara.nara.jp -nosegawa.nara.jp -oji.nara.jp -ouda.nara.jp -oyodo.nara.jp -sakurai.nara.jp -sango.nara.jp -shimoichi.nara.jp -shimokitayama.nara.jp -shinjo.nara.jp -soni.nara.jp -takatori.nara.jp -tawaramoto.nara.jp -tenkawa.nara.jp -tenri.nara.jp -uda.nara.jp -yamatokoriyama.nara.jp -yamatotakada.nara.jp -yamazoe.nara.jp -yoshino.nara.jp -aga.niigata.jp -agano.niigata.jp -gosen.niigata.jp -itoigawa.niigata.jp -izumozaki.niigata.jp -joetsu.niigata.jp -kamo.niigata.jp -kariwa.niigata.jp -kashiwazaki.niigata.jp -minamiuonuma.niigata.jp -mitsuke.niigata.jp -muika.niigata.jp -murakami.niigata.jp -myoko.niigata.jp -nagaoka.niigata.jp -niigata.niigata.jp -ojiya.niigata.jp -omi.niigata.jp -sado.niigata.jp -sanjo.niigata.jp -seiro.niigata.jp -seirou.niigata.jp -sekikawa.niigata.jp -shibata.niigata.jp -tagami.niigata.jp -tainai.niigata.jp -tochio.niigata.jp -tokamachi.niigata.jp -tsubame.niigata.jp -tsunan.niigata.jp -uonuma.niigata.jp -yahiko.niigata.jp -yoita.niigata.jp -yuzawa.niigata.jp -beppu.oita.jp -bungoono.oita.jp -bungotakada.oita.jp -hasama.oita.jp -hiji.oita.jp -himeshima.oita.jp -hita.oita.jp -kamitsue.oita.jp -kokonoe.oita.jp -kuju.oita.jp -kunisaki.oita.jp -kusu.oita.jp -oita.oita.jp -saiki.oita.jp -taketa.oita.jp -tsukumi.oita.jp -usa.oita.jp -usuki.oita.jp -yufu.oita.jp -akaiwa.okayama.jp -asakuchi.okayama.jp -bizen.okayama.jp -hayashima.okayama.jp -ibara.okayama.jp -kagamino.okayama.jp -kasaoka.okayama.jp -kibichuo.okayama.jp -kumenan.okayama.jp -kurashiki.okayama.jp -maniwa.okayama.jp -misaki.okayama.jp -nagi.okayama.jp -niimi.okayama.jp -nishiawakura.okayama.jp -okayama.okayama.jp -satosho.okayama.jp -setouchi.okayama.jp -shinjo.okayama.jp -shoo.okayama.jp -soja.okayama.jp -takahashi.okayama.jp -tamano.okayama.jp -tsuyama.okayama.jp -wake.okayama.jp -yakage.okayama.jp -aguni.okinawa.jp -ginowan.okinawa.jp -ginoza.okinawa.jp -gushikami.okinawa.jp -haebaru.okinawa.jp -higashi.okinawa.jp -hirara.okinawa.jp -iheya.okinawa.jp -ishigaki.okinawa.jp -ishikawa.okinawa.jp -itoman.okinawa.jp -izena.okinawa.jp -kadena.okinawa.jp -kin.okinawa.jp -kitadaito.okinawa.jp -kitanakagusuku.okinawa.jp -kumejima.okinawa.jp -kunigami.okinawa.jp -minamidaito.okinawa.jp -motobu.okinawa.jp -nago.okinawa.jp -naha.okinawa.jp -nakagusuku.okinawa.jp -nakijin.okinawa.jp -nanjo.okinawa.jp -nishihara.okinawa.jp -ogimi.okinawa.jp -okinawa.okinawa.jp -onna.okinawa.jp -shimoji.okinawa.jp -taketomi.okinawa.jp -tarama.okinawa.jp -tokashiki.okinawa.jp -tomigusuku.okinawa.jp -tonaki.okinawa.jp -urasoe.okinawa.jp -uruma.okinawa.jp -yaese.okinawa.jp -yomitan.okinawa.jp -yonabaru.okinawa.jp -yonaguni.okinawa.jp -zamami.okinawa.jp -abeno.osaka.jp -chihayaakasaka.osaka.jp -chuo.osaka.jp -daito.osaka.jp -fujiidera.osaka.jp -habikino.osaka.jp -hannan.osaka.jp -higashiosaka.osaka.jp -higashisumiyoshi.osaka.jp -higashiyodogawa.osaka.jp -hirakata.osaka.jp -ibaraki.osaka.jp -ikeda.osaka.jp -izumi.osaka.jp -izumiotsu.osaka.jp -izumisano.osaka.jp -kadoma.osaka.jp -kaizuka.osaka.jp -kanan.osaka.jp -kashiwara.osaka.jp -katano.osaka.jp -kawachinagano.osaka.jp -kishiwada.osaka.jp -kita.osaka.jp -kumatori.osaka.jp -matsubara.osaka.jp -minato.osaka.jp -minoh.osaka.jp -misaki.osaka.jp -moriguchi.osaka.jp -neyagawa.osaka.jp -nishi.osaka.jp -nose.osaka.jp -osakasayama.osaka.jp -sakai.osaka.jp -sayama.osaka.jp -sennan.osaka.jp -settsu.osaka.jp -shijonawate.osaka.jp -shimamoto.osaka.jp -suita.osaka.jp -tadaoka.osaka.jp -taishi.osaka.jp -tajiri.osaka.jp -takaishi.osaka.jp -takatsuki.osaka.jp -tondabayashi.osaka.jp -toyonaka.osaka.jp -toyono.osaka.jp -yao.osaka.jp -ariake.saga.jp -arita.saga.jp -fukudomi.saga.jp -genkai.saga.jp -hamatama.saga.jp -hizen.saga.jp -imari.saga.jp -kamimine.saga.jp -kanzaki.saga.jp -karatsu.saga.jp -kashima.saga.jp -kitagata.saga.jp -kitahata.saga.jp -kiyama.saga.jp -kouhoku.saga.jp -kyuragi.saga.jp -nishiarita.saga.jp -ogi.saga.jp -omachi.saga.jp -ouchi.saga.jp -saga.saga.jp -shiroishi.saga.jp -taku.saga.jp -tara.saga.jp -tosu.saga.jp -yoshinogari.saga.jp -arakawa.saitama.jp -asaka.saitama.jp -chichibu.saitama.jp -fujimi.saitama.jp -fujimino.saitama.jp -fukaya.saitama.jp -hanno.saitama.jp -hanyu.saitama.jp -hasuda.saitama.jp -hatogaya.saitama.jp -hatoyama.saitama.jp -hidaka.saitama.jp -higashichichibu.saitama.jp -higashimatsuyama.saitama.jp -honjo.saitama.jp -ina.saitama.jp -iruma.saitama.jp -iwatsuki.saitama.jp -kamiizumi.saitama.jp -kamikawa.saitama.jp -kamisato.saitama.jp -kasukabe.saitama.jp -kawagoe.saitama.jp -kawaguchi.saitama.jp -kawajima.saitama.jp -kazo.saitama.jp -kitamoto.saitama.jp -koshigaya.saitama.jp -kounosu.saitama.jp -kuki.saitama.jp -kumagaya.saitama.jp -matsubushi.saitama.jp -minano.saitama.jp -misato.saitama.jp -miyashiro.saitama.jp -miyoshi.saitama.jp -moroyama.saitama.jp -nagatoro.saitama.jp -namegawa.saitama.jp -niiza.saitama.jp -ogano.saitama.jp -ogawa.saitama.jp -ogose.saitama.jp -okegawa.saitama.jp -omiya.saitama.jp -otaki.saitama.jp -ranzan.saitama.jp -ryokami.saitama.jp -saitama.saitama.jp -sakado.saitama.jp -satte.saitama.jp -sayama.saitama.jp -shiki.saitama.jp -shiraoka.saitama.jp -soka.saitama.jp -sugito.saitama.jp -toda.saitama.jp -tokigawa.saitama.jp -tokorozawa.saitama.jp -tsurugashima.saitama.jp -urawa.saitama.jp -warabi.saitama.jp -yashio.saitama.jp -yokoze.saitama.jp -yono.saitama.jp -yorii.saitama.jp -yoshida.saitama.jp -yoshikawa.saitama.jp -yoshimi.saitama.jp -aisho.shiga.jp -gamo.shiga.jp -higashiomi.shiga.jp -hikone.shiga.jp -koka.shiga.jp -konan.shiga.jp -kosei.shiga.jp -koto.shiga.jp -kusatsu.shiga.jp -maibara.shiga.jp -moriyama.shiga.jp -nagahama.shiga.jp -nishiazai.shiga.jp -notogawa.shiga.jp -omihachiman.shiga.jp -otsu.shiga.jp -ritto.shiga.jp -ryuoh.shiga.jp -takashima.shiga.jp -takatsuki.shiga.jp -torahime.shiga.jp -toyosato.shiga.jp -yasu.shiga.jp -akagi.shimane.jp -ama.shimane.jp -gotsu.shimane.jp -hamada.shimane.jp -higashiizumo.shimane.jp -hikawa.shimane.jp -hikimi.shimane.jp -izumo.shimane.jp -kakinoki.shimane.jp -masuda.shimane.jp -matsue.shimane.jp -misato.shimane.jp -nishinoshima.shimane.jp -ohda.shimane.jp -okinoshima.shimane.jp -okuizumo.shimane.jp -shimane.shimane.jp -tamayu.shimane.jp -tsuwano.shimane.jp -unnan.shimane.jp -yakumo.shimane.jp -yasugi.shimane.jp -yatsuka.shimane.jp -arai.shizuoka.jp -atami.shizuoka.jp -fuji.shizuoka.jp -fujieda.shizuoka.jp -fujikawa.shizuoka.jp -fujinomiya.shizuoka.jp -fukuroi.shizuoka.jp -gotemba.shizuoka.jp -haibara.shizuoka.jp -hamamatsu.shizuoka.jp -higashiizu.shizuoka.jp -ito.shizuoka.jp -iwata.shizuoka.jp -izu.shizuoka.jp -izunokuni.shizuoka.jp -kakegawa.shizuoka.jp -kannami.shizuoka.jp -kawanehon.shizuoka.jp -kawazu.shizuoka.jp -kikugawa.shizuoka.jp -kosai.shizuoka.jp -makinohara.shizuoka.jp -matsuzaki.shizuoka.jp -minamiizu.shizuoka.jp -mishima.shizuoka.jp -morimachi.shizuoka.jp -nishiizu.shizuoka.jp -numazu.shizuoka.jp -omaezaki.shizuoka.jp -shimada.shizuoka.jp -shimizu.shizuoka.jp -shimoda.shizuoka.jp -shizuoka.shizuoka.jp -susono.shizuoka.jp -yaizu.shizuoka.jp -yoshida.shizuoka.jp -ashikaga.tochigi.jp -bato.tochigi.jp -haga.tochigi.jp -ichikai.tochigi.jp -iwafune.tochigi.jp -kaminokawa.tochigi.jp -kanuma.tochigi.jp -karasuyama.tochigi.jp -kuroiso.tochigi.jp -mashiko.tochigi.jp -mibu.tochigi.jp -moka.tochigi.jp -motegi.tochigi.jp -nasu.tochigi.jp -nasushiobara.tochigi.jp -nikko.tochigi.jp -nishikata.tochigi.jp -nogi.tochigi.jp -ohira.tochigi.jp -ohtawara.tochigi.jp -oyama.tochigi.jp -sakura.tochigi.jp -sano.tochigi.jp -shimotsuke.tochigi.jp -shioya.tochigi.jp -takanezawa.tochigi.jp -tochigi.tochigi.jp -tsuga.tochigi.jp -ujiie.tochigi.jp -utsunomiya.tochigi.jp -yaita.tochigi.jp -aizumi.tokushima.jp -anan.tokushima.jp -ichiba.tokushima.jp -itano.tokushima.jp -kainan.tokushima.jp -komatsushima.tokushima.jp -matsushige.tokushima.jp -mima.tokushima.jp -minami.tokushima.jp -miyoshi.tokushima.jp -mugi.tokushima.jp -nakagawa.tokushima.jp -naruto.tokushima.jp -sanagochi.tokushima.jp -shishikui.tokushima.jp -tokushima.tokushima.jp -wajiki.tokushima.jp -adachi.tokyo.jp -akiruno.tokyo.jp -akishima.tokyo.jp -aogashima.tokyo.jp -arakawa.tokyo.jp -bunkyo.tokyo.jp -chiyoda.tokyo.jp -chofu.tokyo.jp -chuo.tokyo.jp -edogawa.tokyo.jp -fuchu.tokyo.jp -fussa.tokyo.jp -hachijo.tokyo.jp -hachioji.tokyo.jp -hamura.tokyo.jp -higashikurume.tokyo.jp -higashimurayama.tokyo.jp -higashiyamato.tokyo.jp -hino.tokyo.jp -hinode.tokyo.jp -hinohara.tokyo.jp -inagi.tokyo.jp -itabashi.tokyo.jp -katsushika.tokyo.jp -kita.tokyo.jp -kiyose.tokyo.jp -kodaira.tokyo.jp -koganei.tokyo.jp -kokubunji.tokyo.jp -komae.tokyo.jp -koto.tokyo.jp -kouzushima.tokyo.jp -kunitachi.tokyo.jp -machida.tokyo.jp -meguro.tokyo.jp -minato.tokyo.jp -mitaka.tokyo.jp -mizuho.tokyo.jp -musashimurayama.tokyo.jp -musashino.tokyo.jp -nakano.tokyo.jp -nerima.tokyo.jp -ogasawara.tokyo.jp -okutama.tokyo.jp -ome.tokyo.jp -oshima.tokyo.jp -ota.tokyo.jp -setagaya.tokyo.jp -shibuya.tokyo.jp -shinagawa.tokyo.jp -shinjuku.tokyo.jp -suginami.tokyo.jp -sumida.tokyo.jp -tachikawa.tokyo.jp -taito.tokyo.jp -tama.tokyo.jp -toshima.tokyo.jp -chizu.tottori.jp -hino.tottori.jp -kawahara.tottori.jp -koge.tottori.jp -kotoura.tottori.jp -misasa.tottori.jp -nanbu.tottori.jp -nichinan.tottori.jp -sakaiminato.tottori.jp -tottori.tottori.jp -wakasa.tottori.jp -yazu.tottori.jp -yonago.tottori.jp -asahi.toyama.jp -fuchu.toyama.jp -fukumitsu.toyama.jp -funahashi.toyama.jp -himi.toyama.jp -imizu.toyama.jp -inami.toyama.jp -johana.toyama.jp -kamiichi.toyama.jp -kurobe.toyama.jp -nakaniikawa.toyama.jp -namerikawa.toyama.jp -nanto.toyama.jp -nyuzen.toyama.jp -oyabe.toyama.jp -taira.toyama.jp -takaoka.toyama.jp -tateyama.toyama.jp -toga.toyama.jp -tonami.toyama.jp -toyama.toyama.jp -unazuki.toyama.jp -uozu.toyama.jp -yamada.toyama.jp -arida.wakayama.jp -aridagawa.wakayama.jp -gobo.wakayama.jp -hashimoto.wakayama.jp -hidaka.wakayama.jp -hirogawa.wakayama.jp -inami.wakayama.jp -iwade.wakayama.jp -kainan.wakayama.jp -kamitonda.wakayama.jp -katsuragi.wakayama.jp -kimino.wakayama.jp -kinokawa.wakayama.jp -kitayama.wakayama.jp -koya.wakayama.jp -koza.wakayama.jp -kozagawa.wakayama.jp -kudoyama.wakayama.jp -kushimoto.wakayama.jp -mihama.wakayama.jp -misato.wakayama.jp -nachikatsuura.wakayama.jp -shingu.wakayama.jp -shirahama.wakayama.jp -taiji.wakayama.jp -tanabe.wakayama.jp -wakayama.wakayama.jp -yuasa.wakayama.jp -yura.wakayama.jp -asahi.yamagata.jp -funagata.yamagata.jp -higashine.yamagata.jp -iide.yamagata.jp -kahoku.yamagata.jp -kaminoyama.yamagata.jp -kaneyama.yamagata.jp -kawanishi.yamagata.jp -mamurogawa.yamagata.jp -mikawa.yamagata.jp -murayama.yamagata.jp -nagai.yamagata.jp -nakayama.yamagata.jp -nanyo.yamagata.jp -nishikawa.yamagata.jp -obanazawa.yamagata.jp -oe.yamagata.jp -oguni.yamagata.jp -ohkura.yamagata.jp -oishida.yamagata.jp -sagae.yamagata.jp -sakata.yamagata.jp -sakegawa.yamagata.jp -shinjo.yamagata.jp -shirataka.yamagata.jp -shonai.yamagata.jp -takahata.yamagata.jp -tendo.yamagata.jp -tozawa.yamagata.jp -tsuruoka.yamagata.jp -yamagata.yamagata.jp -yamanobe.yamagata.jp -yonezawa.yamagata.jp -yuza.yamagata.jp -abu.yamaguchi.jp -hagi.yamaguchi.jp -hikari.yamaguchi.jp -hofu.yamaguchi.jp -iwakuni.yamaguchi.jp -kudamatsu.yamaguchi.jp -mitou.yamaguchi.jp -nagato.yamaguchi.jp -oshima.yamaguchi.jp -shimonoseki.yamaguchi.jp -shunan.yamaguchi.jp -tabuse.yamaguchi.jp -tokuyama.yamaguchi.jp -toyota.yamaguchi.jp -ube.yamaguchi.jp -yuu.yamaguchi.jp -chuo.yamanashi.jp -doshi.yamanashi.jp -fuefuki.yamanashi.jp -fujikawa.yamanashi.jp -fujikawaguchiko.yamanashi.jp -fujiyoshida.yamanashi.jp -hayakawa.yamanashi.jp -hokuto.yamanashi.jp -ichikawamisato.yamanashi.jp -kai.yamanashi.jp -kofu.yamanashi.jp -koshu.yamanashi.jp -kosuge.yamanashi.jp -minami-alps.yamanashi.jp -minobu.yamanashi.jp -nakamichi.yamanashi.jp -nanbu.yamanashi.jp -narusawa.yamanashi.jp -nirasaki.yamanashi.jp -nishikatsura.yamanashi.jp -oshino.yamanashi.jp -otsuki.yamanashi.jp -showa.yamanashi.jp -tabayama.yamanashi.jp -tsuru.yamanashi.jp -uenohara.yamanashi.jp -yamanakako.yamanashi.jp -yamanashi.yamanashi.jp - -// ke : http://www.kenic.or.ke/index.php?option=com_content&task=view&id=117&Itemid=145 -*.ke - -// kg : http://www.domain.kg/dmn_n.html -kg -org.kg -net.kg -com.kg -edu.kg -gov.kg -mil.kg - -// kh : http://www.mptc.gov.kh/dns_registration.htm -*.kh - -// ki : http://www.ki/dns/index.html -ki -edu.ki -biz.ki -net.ki -org.ki -gov.ki -info.ki -com.ki - -// km : http://en.wikipedia.org/wiki/.km -// http://www.domaine.km/documents/charte.doc -km -org.km -nom.km -gov.km -prd.km -tm.km -edu.km -mil.km -ass.km -com.km -// These are only mentioned as proposed suggestions at domaine.km, but -// http://en.wikipedia.org/wiki/.km says they're available for registration: -coop.km -asso.km -presse.km -medecin.km -notaires.km -pharmaciens.km -veterinaire.km -gouv.km - -// kn : http://en.wikipedia.org/wiki/.kn -// http://www.dot.kn/domainRules.html -kn -net.kn -org.kn -edu.kn -gov.kn - -// kp : http://www.kcce.kp/en_index.php -com.kp -edu.kp -gov.kp -org.kp -rep.kp -tra.kp - -// kr : http://en.wikipedia.org/wiki/.kr -// see also: http://domain.nida.or.kr/eng/registration.jsp -kr -ac.kr -co.kr -es.kr -go.kr -hs.kr -kg.kr -mil.kr -ms.kr -ne.kr -or.kr -pe.kr -re.kr -sc.kr -// kr geographical names -busan.kr -chungbuk.kr -chungnam.kr -daegu.kr -daejeon.kr -gangwon.kr -gwangju.kr -gyeongbuk.kr -gyeonggi.kr -gyeongnam.kr -incheon.kr -jeju.kr -jeonbuk.kr -jeonnam.kr -seoul.kr -ulsan.kr - -// kw : http://en.wikipedia.org/wiki/.kw -*.kw - -// ky : http://www.icta.ky/da_ky_reg_dom.php -// Confirmed by registry 2008-06-17 -ky -edu.ky -gov.ky -com.ky -org.ky -net.ky - -// kz : http://en.wikipedia.org/wiki/.kz -// see also: http://www.nic.kz/rules/index.jsp -kz -org.kz -edu.kz -net.kz -gov.kz -mil.kz -com.kz - -// la : http://en.wikipedia.org/wiki/.la -// Submitted by registry 2008-06-10 -la -int.la -net.la -info.la -edu.la -gov.la -per.la -com.la -org.la - -// lb : http://en.wikipedia.org/wiki/.lb -// Submitted by registry 2008-06-17 -com.lb -edu.lb -gov.lb -net.lb -org.lb - -// lc : http://en.wikipedia.org/wiki/.lc -// see also: http://www.nic.lc/rules.htm -lc -com.lc -net.lc -co.lc -org.lc -edu.lc -gov.lc - -// li : http://en.wikipedia.org/wiki/.li -li - -// lk : http://www.nic.lk/seclevpr.html -lk -gov.lk -sch.lk -net.lk -int.lk -com.lk -org.lk -edu.lk -ngo.lk -soc.lk -web.lk -ltd.lk -assn.lk -grp.lk -hotel.lk - -// lr : http://psg.com/dns/lr/lr.txt -// Submitted by registry 2008-06-17 -com.lr -edu.lr -gov.lr -org.lr -net.lr - -// ls : http://en.wikipedia.org/wiki/.ls -ls -co.ls -org.ls - -// lt : http://en.wikipedia.org/wiki/.lt -lt -// gov.lt : http://www.gov.lt/index_en.php -gov.lt - -// lu : http://www.dns.lu/en/ -lu - -// lv : http://www.nic.lv/DNS/En/generic.php -lv -com.lv -edu.lv -gov.lv -org.lv -mil.lv -id.lv -net.lv -asn.lv -conf.lv - -// ly : http://www.nic.ly/regulations.php -ly -com.ly -net.ly -gov.ly -plc.ly -edu.ly -sch.ly -med.ly -org.ly -id.ly - -// ma : http://en.wikipedia.org/wiki/.ma -// http://www.anrt.ma/fr/admin/download/upload/file_fr782.pdf -ma -co.ma -net.ma -gov.ma -org.ma -ac.ma -press.ma - -// mc : http://www.nic.mc/ -mc -tm.mc -asso.mc - -// md : http://en.wikipedia.org/wiki/.md -md - -// me : http://en.wikipedia.org/wiki/.me -me -co.me -net.me -org.me -edu.me -ac.me -gov.me -its.me -priv.me - -// mg : http://www.nic.mg/tarif.htm -mg -org.mg -nom.mg -gov.mg -prd.mg -tm.mg -edu.mg -mil.mg -com.mg - -// mh : http://en.wikipedia.org/wiki/.mh -mh - -// mil : http://en.wikipedia.org/wiki/.mil -mil - -// mk : http://en.wikipedia.org/wiki/.mk -// see also: http://dns.marnet.net.mk/postapka.php -mk -com.mk -org.mk -net.mk -edu.mk -gov.mk -inf.mk -name.mk - -// ml : http://www.gobin.info/domainname/ml-template.doc -// see also: http://en.wikipedia.org/wiki/.ml -ml -com.ml -edu.ml -gouv.ml -gov.ml -net.ml -org.ml -presse.ml - -// mm : http://en.wikipedia.org/wiki/.mm -*.mm - -// mn : http://en.wikipedia.org/wiki/.mn -mn -gov.mn -edu.mn -org.mn - -// mo : http://www.monic.net.mo/ -mo -com.mo -net.mo -org.mo -edu.mo -gov.mo - -// mobi : http://en.wikipedia.org/wiki/.mobi -mobi - -// mp : http://www.dot.mp/ -// Confirmed by registry 2008-06-17 -mp - -// mq : http://en.wikipedia.org/wiki/.mq -mq - -// mr : http://en.wikipedia.org/wiki/.mr -mr -gov.mr - -// ms : http://en.wikipedia.org/wiki/.ms -ms - -// mt : https://www.nic.org.mt/dotmt/ -*.mt - -// mu : http://en.wikipedia.org/wiki/.mu -mu -com.mu -net.mu -org.mu -gov.mu -ac.mu -co.mu -or.mu - -// museum : http://about.museum/naming/ -// http://index.museum/ -museum -academy.museum -agriculture.museum -air.museum -airguard.museum -alabama.museum -alaska.museum -amber.museum -ambulance.museum -american.museum -americana.museum -americanantiques.museum -americanart.museum -amsterdam.museum -and.museum -annefrank.museum -anthro.museum -anthropology.museum -antiques.museum -aquarium.museum -arboretum.museum -archaeological.museum -archaeology.museum -architecture.museum -art.museum -artanddesign.museum -artcenter.museum -artdeco.museum -arteducation.museum -artgallery.museum -arts.museum -artsandcrafts.museum -asmatart.museum -assassination.museum -assisi.museum -association.museum -astronomy.museum -atlanta.museum -austin.museum -australia.museum -automotive.museum -aviation.museum -axis.museum -badajoz.museum -baghdad.museum -bahn.museum -bale.museum -baltimore.museum -barcelona.museum -baseball.museum -basel.museum -baths.museum -bauern.museum -beauxarts.museum -beeldengeluid.museum -bellevue.museum -bergbau.museum -berkeley.museum -berlin.museum -bern.museum -bible.museum -bilbao.museum -bill.museum -birdart.museum -birthplace.museum -bonn.museum -boston.museum -botanical.museum -botanicalgarden.museum -botanicgarden.museum -botany.museum -brandywinevalley.museum -brasil.museum -bristol.museum -british.museum -britishcolumbia.museum -broadcast.museum -brunel.museum -brussel.museum -brussels.museum -bruxelles.museum -building.museum -burghof.museum -bus.museum -bushey.museum -cadaques.museum -california.museum -cambridge.museum -can.museum -canada.museum -capebreton.museum -carrier.museum -cartoonart.museum -casadelamoneda.museum -castle.museum -castres.museum -celtic.museum -center.museum -chattanooga.museum -cheltenham.museum -chesapeakebay.museum -chicago.museum -children.museum -childrens.museum -childrensgarden.museum -chiropractic.museum -chocolate.museum -christiansburg.museum -cincinnati.museum -cinema.museum -circus.museum -civilisation.museum -civilization.museum -civilwar.museum -clinton.museum -clock.museum -coal.museum -coastaldefence.museum -cody.museum -coldwar.museum -collection.museum -colonialwilliamsburg.museum -coloradoplateau.museum -columbia.museum -columbus.museum -communication.museum -communications.museum -community.museum -computer.museum -computerhistory.museum -comunicações.museum -contemporary.museum -contemporaryart.museum -convent.museum -copenhagen.museum -corporation.museum -correios-e-telecomunicações.museum -corvette.museum -costume.museum -countryestate.museum -county.museum -crafts.museum -cranbrook.museum -creation.museum -cultural.museum -culturalcenter.museum -culture.museum -cyber.museum -cymru.museum -dali.museum -dallas.museum -database.museum -ddr.museum -decorativearts.museum -delaware.museum -delmenhorst.museum -denmark.museum -depot.museum -design.museum -detroit.museum -dinosaur.museum -discovery.museum -dolls.museum -donostia.museum -durham.museum -eastafrica.museum -eastcoast.museum -education.museum -educational.museum -egyptian.museum -eisenbahn.museum -elburg.museum -elvendrell.museum -embroidery.museum -encyclopedic.museum -england.museum -entomology.museum -environment.museum -environmentalconservation.museum -epilepsy.museum -essex.museum -estate.museum -ethnology.museum -exeter.museum -exhibition.museum -family.museum -farm.museum -farmequipment.museum -farmers.museum -farmstead.museum -field.museum -figueres.museum -filatelia.museum -film.museum -fineart.museum -finearts.museum -finland.museum -flanders.museum -florida.museum -force.museum -fortmissoula.museum -fortworth.museum -foundation.museum -francaise.museum -frankfurt.museum -franziskaner.museum -freemasonry.museum -freiburg.museum -fribourg.museum -frog.museum -fundacio.museum -furniture.museum -gallery.museum -garden.museum -gateway.museum -geelvinck.museum -gemological.museum -geology.museum -georgia.museum -giessen.museum -glas.museum -glass.museum -gorge.museum -grandrapids.museum -graz.museum -guernsey.museum -halloffame.museum -hamburg.museum -handson.museum -harvestcelebration.museum -hawaii.museum -health.museum -heimatunduhren.museum -hellas.museum -helsinki.museum -hembygdsforbund.museum -heritage.museum -histoire.museum -historical.museum -historicalsociety.museum -historichouses.museum -historisch.museum -historisches.museum -history.museum -historyofscience.museum -horology.museum -house.museum -humanities.museum -illustration.museum -imageandsound.museum -indian.museum -indiana.museum -indianapolis.museum -indianmarket.museum -intelligence.museum -interactive.museum -iraq.museum -iron.museum -isleofman.museum -jamison.museum -jefferson.museum -jerusalem.museum -jewelry.museum -jewish.museum -jewishart.museum -jfk.museum -journalism.museum -judaica.museum -judygarland.museum -juedisches.museum -juif.museum -karate.museum -karikatur.museum -kids.museum -koebenhavn.museum -koeln.museum -kunst.museum -kunstsammlung.museum -kunstunddesign.museum -labor.museum -labour.museum -lajolla.museum -lancashire.museum -landes.museum -lans.museum -läns.museum -larsson.museum -lewismiller.museum -lincoln.museum -linz.museum -living.museum -livinghistory.museum -localhistory.museum -london.museum -losangeles.museum -louvre.museum -loyalist.museum -lucerne.museum -luxembourg.museum -luzern.museum -mad.museum -madrid.museum -mallorca.museum -manchester.museum -mansion.museum -mansions.museum -manx.museum -marburg.museum -maritime.museum -maritimo.museum -maryland.museum -marylhurst.museum -media.museum -medical.museum -medizinhistorisches.museum -meeres.museum -memorial.museum -mesaverde.museum -michigan.museum -midatlantic.museum -military.museum -mill.museum -miners.museum -mining.museum -minnesota.museum -missile.museum -missoula.museum -modern.museum -moma.museum -money.museum -monmouth.museum -monticello.museum -montreal.museum -moscow.museum -motorcycle.museum -muenchen.museum -muenster.museum -mulhouse.museum -muncie.museum -museet.museum -museumcenter.museum -museumvereniging.museum -music.museum -national.museum -nationalfirearms.museum -nationalheritage.museum -nativeamerican.museum -naturalhistory.museum -naturalhistorymuseum.museum -naturalsciences.museum -nature.museum -naturhistorisches.museum -natuurwetenschappen.museum -naumburg.museum -naval.museum -nebraska.museum -neues.museum -newhampshire.museum -newjersey.museum -newmexico.museum -newport.museum -newspaper.museum -newyork.museum -niepce.museum -norfolk.museum -north.museum -nrw.museum -nuernberg.museum -nuremberg.museum -nyc.museum -nyny.museum -oceanographic.museum -oceanographique.museum -omaha.museum -online.museum -ontario.museum -openair.museum -oregon.museum -oregontrail.museum -otago.museum -oxford.museum -pacific.museum -paderborn.museum -palace.museum -paleo.museum -palmsprings.museum -panama.museum -paris.museum -pasadena.museum -pharmacy.museum -philadelphia.museum -philadelphiaarea.museum -philately.museum -phoenix.museum -photography.museum -pilots.museum -pittsburgh.museum -planetarium.museum -plantation.museum -plants.museum -plaza.museum -portal.museum -portland.museum -portlligat.museum -posts-and-telecommunications.museum -preservation.museum -presidio.museum -press.museum -project.museum -public.museum -pubol.museum -quebec.museum -railroad.museum -railway.museum -research.museum -resistance.museum -riodejaneiro.museum -rochester.museum -rockart.museum -roma.museum -russia.museum -saintlouis.museum -salem.museum -salvadordali.museum -salzburg.museum -sandiego.museum -sanfrancisco.museum -santabarbara.museum -santacruz.museum -santafe.museum -saskatchewan.museum -satx.museum -savannahga.museum -schlesisches.museum -schoenbrunn.museum -schokoladen.museum -school.museum -schweiz.museum -science.museum -scienceandhistory.museum -scienceandindustry.museum -sciencecenter.museum -sciencecenters.museum -science-fiction.museum -sciencehistory.museum -sciences.museum -sciencesnaturelles.museum -scotland.museum -seaport.museum -settlement.museum -settlers.museum -shell.museum -sherbrooke.museum -sibenik.museum -silk.museum -ski.museum -skole.museum -society.museum -sologne.museum -soundandvision.museum -southcarolina.museum -southwest.museum -space.museum -spy.museum -square.museum -stadt.museum -stalbans.museum -starnberg.museum -state.museum -stateofdelaware.museum -station.museum -steam.museum -steiermark.museum -stjohn.museum -stockholm.museum -stpetersburg.museum -stuttgart.museum -suisse.museum -surgeonshall.museum -surrey.museum -svizzera.museum -sweden.museum -sydney.museum -tank.museum -tcm.museum -technology.museum -telekommunikation.museum -television.museum -texas.museum -textile.museum -theater.museum -time.museum -timekeeping.museum -topology.museum -torino.museum -touch.museum -town.museum -transport.museum -tree.museum -trolley.museum -trust.museum -trustee.museum -uhren.museum -ulm.museum -undersea.museum -university.museum -usa.museum -usantiques.museum -usarts.museum -uscountryestate.museum -usculture.museum -usdecorativearts.museum -usgarden.museum -ushistory.museum -ushuaia.museum -uslivinghistory.museum -utah.museum -uvic.museum -valley.museum -vantaa.museum -versailles.museum -viking.museum -village.museum -virginia.museum -virtual.museum -virtuel.museum -vlaanderen.museum -volkenkunde.museum -wales.museum -wallonie.museum -war.museum -washingtondc.museum -watchandclock.museum -watch-and-clock.museum -western.museum -westfalen.museum -whaling.museum -wildlife.museum -williamsburg.museum -windmill.museum -workshop.museum -york.museum -yorkshire.museum -yosemite.museum -youth.museum -zoological.museum -zoology.museum -ירושלים.museum -иком.museum - -// mv : http://en.wikipedia.org/wiki/.mv -// "mv" included because, contra Wikipedia, google.mv exists. -mv -aero.mv -biz.mv -com.mv -coop.mv -edu.mv -gov.mv -info.mv -int.mv -mil.mv -museum.mv -name.mv -net.mv -org.mv -pro.mv - -// mw : http://www.registrar.mw/ -mw -ac.mw -biz.mw -co.mw -com.mw -coop.mw -edu.mw -gov.mw -int.mw -museum.mw -net.mw -org.mw - -// mx : http://www.nic.mx/ -// Submitted by registry 2008-06-19 -mx -com.mx -org.mx -gob.mx -edu.mx -net.mx - -// my : http://www.mynic.net.my/ -my -com.my -net.my -org.my -gov.my -edu.my -mil.my -name.my - -// mz : http://www.gobin.info/domainname/mz-template.doc -*.mz -!teledata.mz - -// na : http://www.na-nic.com.na/ -// http://www.info.na/domain/ -na -info.na -pro.na -name.na -school.na -or.na -dr.na -us.na -mx.na -ca.na -in.na -cc.na -tv.na -ws.na -mobi.na -co.na -com.na -org.na - -// name : has 2nd-level tlds, but there's no list of them -name - -// nc : http://www.cctld.nc/ -nc -asso.nc - -// ne : http://en.wikipedia.org/wiki/.ne -ne - -// net : http://en.wikipedia.org/wiki/.net -net - -// nf : http://en.wikipedia.org/wiki/.nf -nf -com.nf -net.nf -per.nf -rec.nf -web.nf -arts.nf -firm.nf -info.nf -other.nf -store.nf - -// ng : http://psg.com/dns/ng/ -// Submitted by registry 2008-06-17 -ac.ng -com.ng -edu.ng -gov.ng -net.ng -org.ng - -// ni : http://www.nic.ni/dominios.htm -*.ni - -// nl : http://www.domain-registry.nl/ace.php/c,728,122,,,,Home.html -// Confirmed by registry (with technical -// reservations) 2008-06-08 -nl - -// BV.nl will be a registry for dutch BV's (besloten vennootschap) -bv.nl - -// no : http://www.norid.no/regelverk/index.en.html -// The Norwegian registry has declined to notify us of updates. The web pages -// referenced below are the official source of the data. There is also an -// announce mailing list: -// https://postlister.uninett.no/sympa/info/norid-diskusjon -no -// Norid generic domains : http://www.norid.no/regelverk/vedlegg-c.en.html -fhs.no -vgs.no -fylkesbibl.no -folkebibl.no -museum.no -idrett.no -priv.no -// Non-Norid generic domains : http://www.norid.no/regelverk/vedlegg-d.en.html -mil.no -stat.no -dep.no -kommune.no -herad.no -// no geographical names : http://www.norid.no/regelverk/vedlegg-b.en.html -// counties -aa.no -ah.no -bu.no -fm.no -hl.no -hm.no -jan-mayen.no -mr.no -nl.no -nt.no -of.no -ol.no -oslo.no -rl.no -sf.no -st.no -svalbard.no -tm.no -tr.no -va.no -vf.no -// primary and lower secondary schools per county -gs.aa.no -gs.ah.no -gs.bu.no -gs.fm.no -gs.hl.no -gs.hm.no -gs.jan-mayen.no -gs.mr.no -gs.nl.no -gs.nt.no -gs.of.no -gs.ol.no -gs.oslo.no -gs.rl.no -gs.sf.no -gs.st.no -gs.svalbard.no -gs.tm.no -gs.tr.no -gs.va.no -gs.vf.no -// cities -akrehamn.no -åkrehamn.no -algard.no -ålgård.no -arna.no -brumunddal.no -bryne.no -bronnoysund.no -brønnøysund.no -drobak.no -drøbak.no -egersund.no -fetsund.no -floro.no -florø.no -fredrikstad.no -hokksund.no -honefoss.no -hønefoss.no -jessheim.no -jorpeland.no -jørpeland.no -kirkenes.no -kopervik.no -krokstadelva.no -langevag.no -langevåg.no -leirvik.no -mjondalen.no -mjøndalen.no -mo-i-rana.no -mosjoen.no -mosjøen.no -nesoddtangen.no -orkanger.no -osoyro.no -osøyro.no -raholt.no -råholt.no -sandnessjoen.no -sandnessjøen.no -skedsmokorset.no -slattum.no -spjelkavik.no -stathelle.no -stavern.no -stjordalshalsen.no -stjørdalshalsen.no -tananger.no -tranby.no -vossevangen.no -// communities -afjord.no -åfjord.no -agdenes.no -al.no -ål.no -alesund.no -ålesund.no -alstahaug.no -alta.no -áltá.no -alaheadju.no -álaheadju.no -alvdal.no -amli.no -åmli.no -amot.no -åmot.no -andebu.no -andoy.no -andøy.no -andasuolo.no -ardal.no -årdal.no -aremark.no -arendal.no -ås.no -aseral.no -åseral.no -asker.no -askim.no -askvoll.no -askoy.no -askøy.no -asnes.no -åsnes.no -audnedaln.no -aukra.no -aure.no -aurland.no -aurskog-holand.no -aurskog-høland.no -austevoll.no -austrheim.no -averoy.no -averøy.no -balestrand.no -ballangen.no -balat.no -bálát.no -balsfjord.no -bahccavuotna.no -báhccavuotna.no -bamble.no -bardu.no -beardu.no -beiarn.no -bajddar.no -bájddar.no -baidar.no -báidár.no -berg.no -bergen.no -berlevag.no -berlevåg.no -bearalvahki.no -bearalváhki.no -bindal.no -birkenes.no -bjarkoy.no -bjarkøy.no -bjerkreim.no -bjugn.no -bodo.no -bodø.no -badaddja.no -bådåddjå.no -budejju.no -bokn.no -bremanger.no -bronnoy.no -brønnøy.no -bygland.no -bykle.no -barum.no -bærum.no -bo.telemark.no -bø.telemark.no -bo.nordland.no -bø.nordland.no -bievat.no -bievát.no -bomlo.no -bømlo.no -batsfjord.no -båtsfjord.no -bahcavuotna.no -báhcavuotna.no -dovre.no -drammen.no -drangedal.no -dyroy.no -dyrøy.no -donna.no -dønna.no -eid.no -eidfjord.no -eidsberg.no -eidskog.no -eidsvoll.no -eigersund.no -elverum.no -enebakk.no -engerdal.no -etne.no -etnedal.no -evenes.no -evenassi.no -evenášši.no -evje-og-hornnes.no -farsund.no -fauske.no -fuossko.no -fuoisku.no -fedje.no -fet.no -finnoy.no -finnøy.no -fitjar.no -fjaler.no -fjell.no -flakstad.no -flatanger.no -flekkefjord.no -flesberg.no -flora.no -fla.no -flå.no -folldal.no -forsand.no -fosnes.no -frei.no -frogn.no -froland.no -frosta.no -frana.no -fræna.no -froya.no -frøya.no -fusa.no -fyresdal.no -forde.no -førde.no -gamvik.no -gangaviika.no -gáŋgaviika.no -gaular.no -gausdal.no -gildeskal.no -gildeskål.no -giske.no -gjemnes.no -gjerdrum.no -gjerstad.no -gjesdal.no -gjovik.no -gjøvik.no -gloppen.no -gol.no -gran.no -grane.no -granvin.no -gratangen.no -grimstad.no -grong.no -kraanghke.no -kråanghke.no -grue.no -gulen.no -hadsel.no -halden.no -halsa.no -hamar.no -hamaroy.no -habmer.no -hábmer.no -hapmir.no -hápmir.no -hammerfest.no -hammarfeasta.no -hámmárfeasta.no -haram.no -hareid.no -harstad.no -hasvik.no -aknoluokta.no -ákŋoluokta.no -hattfjelldal.no -aarborte.no -haugesund.no -hemne.no -hemnes.no -hemsedal.no -heroy.more-og-romsdal.no -herøy.møre-og-romsdal.no -heroy.nordland.no -herøy.nordland.no -hitra.no -hjartdal.no -hjelmeland.no -hobol.no -hobøl.no -hof.no -hol.no -hole.no -holmestrand.no -holtalen.no -holtålen.no -hornindal.no -horten.no -hurdal.no -hurum.no -hvaler.no -hyllestad.no -hagebostad.no -hægebostad.no -hoyanger.no -høyanger.no -hoylandet.no -høylandet.no -ha.no -hå.no -ibestad.no -inderoy.no -inderøy.no -iveland.no -jevnaker.no -jondal.no -jolster.no -jølster.no -karasjok.no -karasjohka.no -kárášjohka.no -karlsoy.no -galsa.no -gálsá.no -karmoy.no -karmøy.no -kautokeino.no -guovdageaidnu.no -klepp.no -klabu.no -klæbu.no -kongsberg.no -kongsvinger.no -kragero.no -kragerø.no -kristiansand.no -kristiansund.no -krodsherad.no -krødsherad.no -kvalsund.no -rahkkeravju.no -ráhkkerávju.no -kvam.no -kvinesdal.no -kvinnherad.no -kviteseid.no -kvitsoy.no -kvitsøy.no -kvafjord.no -kvæfjord.no -giehtavuoatna.no -kvanangen.no -kvænangen.no -navuotna.no -návuotna.no -kafjord.no -kåfjord.no -gaivuotna.no -gáivuotna.no -larvik.no -lavangen.no -lavagis.no -loabat.no -loabát.no -lebesby.no -davvesiida.no -leikanger.no -leirfjord.no -leka.no -leksvik.no -lenvik.no -leangaviika.no -leaŋgaviika.no -lesja.no -levanger.no -lier.no -lierne.no -lillehammer.no -lillesand.no -lindesnes.no -lindas.no -lindås.no -lom.no -loppa.no -lahppi.no -láhppi.no -lund.no -lunner.no -luroy.no -lurøy.no -luster.no -lyngdal.no -lyngen.no -ivgu.no -lardal.no -lerdal.no -lærdal.no -lodingen.no -lødingen.no -lorenskog.no -lørenskog.no -loten.no -løten.no -malvik.no -masoy.no -måsøy.no -muosat.no -muosát.no -mandal.no -marker.no -marnardal.no -masfjorden.no -meland.no -meldal.no -melhus.no -meloy.no -meløy.no -meraker.no -meråker.no -moareke.no -moåreke.no -midsund.no -midtre-gauldal.no -modalen.no -modum.no -molde.no -moskenes.no -moss.no -mosvik.no -malselv.no -målselv.no -malatvuopmi.no -málatvuopmi.no -namdalseid.no -aejrie.no -namsos.no -namsskogan.no -naamesjevuemie.no -nååmesjevuemie.no -laakesvuemie.no -nannestad.no -narvik.no -narviika.no -naustdal.no -nedre-eiker.no -nes.akershus.no -nes.buskerud.no -nesna.no -nesodden.no -nesseby.no -unjarga.no -unjárga.no -nesset.no -nissedal.no -nittedal.no -nord-aurdal.no -nord-fron.no -nord-odal.no -norddal.no -nordkapp.no -davvenjarga.no -davvenjárga.no -nordre-land.no -nordreisa.no -raisa.no -ráisa.no -nore-og-uvdal.no -notodden.no -naroy.no -nærøy.no -notteroy.no -nøtterøy.no -odda.no -oksnes.no -øksnes.no -oppdal.no -oppegard.no -oppegård.no -orkdal.no -orland.no -ørland.no -orskog.no -ørskog.no -orsta.no -ørsta.no -os.hedmark.no -os.hordaland.no -osen.no -osteroy.no -osterøy.no -ostre-toten.no -østre-toten.no -overhalla.no -ovre-eiker.no -øvre-eiker.no -oyer.no -øyer.no -oygarden.no -øygarden.no -oystre-slidre.no -øystre-slidre.no -porsanger.no -porsangu.no -porsáŋgu.no -porsgrunn.no -radoy.no -radøy.no -rakkestad.no -rana.no -ruovat.no -randaberg.no -rauma.no -rendalen.no -rennebu.no -rennesoy.no -rennesøy.no -rindal.no -ringebu.no -ringerike.no -ringsaker.no -rissa.no -risor.no -risør.no -roan.no -rollag.no -rygge.no -ralingen.no -rælingen.no -rodoy.no -rødøy.no -romskog.no -rømskog.no -roros.no -røros.no -rost.no -røst.no -royken.no -røyken.no -royrvik.no -røyrvik.no -rade.no -råde.no -salangen.no -siellak.no -saltdal.no -salat.no -sálát.no -sálat.no -samnanger.no -sande.more-og-romsdal.no -sande.møre-og-romsdal.no -sande.vestfold.no -sandefjord.no -sandnes.no -sandoy.no -sandøy.no -sarpsborg.no -sauda.no -sauherad.no -sel.no -selbu.no -selje.no -seljord.no -sigdal.no -siljan.no -sirdal.no -skaun.no -skedsmo.no -ski.no -skien.no -skiptvet.no -skjervoy.no -skjervøy.no -skierva.no -skiervá.no -skjak.no -skjåk.no -skodje.no -skanland.no -skånland.no -skanit.no -skánit.no -smola.no -smøla.no -snillfjord.no -snasa.no -snåsa.no -snoasa.no -snaase.no -snåase.no -sogndal.no -sokndal.no -sola.no -solund.no -songdalen.no -sortland.no -spydeberg.no -stange.no -stavanger.no -steigen.no -steinkjer.no -stjordal.no -stjørdal.no -stokke.no -stor-elvdal.no -stord.no -stordal.no -storfjord.no -omasvuotna.no -strand.no -stranda.no -stryn.no -sula.no -suldal.no -sund.no -sunndal.no -surnadal.no -sveio.no -svelvik.no -sykkylven.no -sogne.no -søgne.no -somna.no -sømna.no -sondre-land.no -søndre-land.no -sor-aurdal.no -sør-aurdal.no -sor-fron.no -sør-fron.no -sor-odal.no -sør-odal.no -sor-varanger.no -sør-varanger.no -matta-varjjat.no -mátta-várjjat.no -sorfold.no -sørfold.no -sorreisa.no -sørreisa.no -sorum.no -sørum.no -tana.no -deatnu.no -time.no -tingvoll.no -tinn.no -tjeldsund.no -dielddanuorri.no -tjome.no -tjøme.no -tokke.no -tolga.no -torsken.no -tranoy.no -tranøy.no -tromso.no -tromsø.no -tromsa.no -romsa.no -trondheim.no -troandin.no -trysil.no -trana.no -træna.no -trogstad.no -trøgstad.no -tvedestrand.no -tydal.no -tynset.no -tysfjord.no -divtasvuodna.no -divttasvuotna.no -tysnes.no -tysvar.no -tysvær.no -tonsberg.no -tønsberg.no -ullensaker.no -ullensvang.no -ulvik.no -utsira.no -vadso.no -vadsø.no -cahcesuolo.no -čáhcesuolo.no -vaksdal.no -valle.no -vang.no -vanylven.no -vardo.no -vardø.no -varggat.no -várggát.no -vefsn.no -vaapste.no -vega.no -vegarshei.no -vegårshei.no -vennesla.no -verdal.no -verran.no -vestby.no -vestnes.no -vestre-slidre.no -vestre-toten.no -vestvagoy.no -vestvågøy.no -vevelstad.no -vik.no -vikna.no -vindafjord.no -volda.no -voss.no -varoy.no -værøy.no -vagan.no -vågan.no -voagat.no -vagsoy.no -vågsøy.no -vaga.no -vågå.no -valer.ostfold.no -våler.østfold.no -valer.hedmark.no -våler.hedmark.no - -// np : http://www.mos.com.np/register.html -*.np - -// nr : http://cenpac.net.nr/dns/index.html -// Confirmed by registry 2008-06-17 -nr -biz.nr -info.nr -gov.nr -edu.nr -org.nr -net.nr -com.nr - -// nu : http://en.wikipedia.org/wiki/.nu -nu - -// nz : http://en.wikipedia.org/wiki/.nz -*.nz - -// om : http://en.wikipedia.org/wiki/.om -*.om -!mediaphone.om -!nawrastelecom.om -!nawras.om -!omanmobile.om -!omanpost.om -!omantel.om -!rakpetroleum.om -!siemens.om -!songfest.om -!statecouncil.om - -// org : http://en.wikipedia.org/wiki/.org -org - -// pa : http://www.nic.pa/ -// Some additional second level "domains" resolve directly as hostnames, such as -// pannet.pa, so we add a rule for "pa". -pa -ac.pa -gob.pa -com.pa -org.pa -sld.pa -edu.pa -net.pa -ing.pa -abo.pa -med.pa -nom.pa - -// pe : https://www.nic.pe/InformeFinalComision.pdf -pe -edu.pe -gob.pe -nom.pe -mil.pe -org.pe -com.pe -net.pe - -// pf : http://www.gobin.info/domainname/formulaire-pf.pdf -pf -com.pf -org.pf -edu.pf - -// pg : http://en.wikipedia.org/wiki/.pg -*.pg - -// ph : http://www.domains.ph/FAQ2.asp -// Submitted by registry 2008-06-13 -ph -com.ph -net.ph -org.ph -gov.ph -edu.ph -ngo.ph -mil.ph -i.ph - -// pk : http://pk5.pknic.net.pk/pk5/msgNamepk.PK -pk -com.pk -net.pk -edu.pk -org.pk -fam.pk -biz.pk -web.pk -gov.pk -gob.pk -gok.pk -gon.pk -gop.pk -gos.pk -info.pk - -// pl : http://www.dns.pl/english/ -pl -// NASK functional domains (nask.pl / dns.pl) : http://www.dns.pl/english/dns-funk.html -aid.pl -agro.pl -atm.pl -auto.pl -biz.pl -com.pl -edu.pl -gmina.pl -gsm.pl -info.pl -mail.pl -miasta.pl -media.pl -mil.pl -net.pl -nieruchomosci.pl -nom.pl -org.pl -pc.pl -powiat.pl -priv.pl -realestate.pl -rel.pl -sex.pl -shop.pl -sklep.pl -sos.pl -szkola.pl -targi.pl -tm.pl -tourism.pl -travel.pl -turystyka.pl -// ICM functional domains (icm.edu.pl) -6bone.pl -art.pl -mbone.pl -// Government domains (administred by ippt.gov.pl) -gov.pl -uw.gov.pl -um.gov.pl -ug.gov.pl -upow.gov.pl -starostwo.gov.pl -so.gov.pl -sr.gov.pl -po.gov.pl -pa.gov.pl -// other functional domains -ngo.pl -irc.pl -usenet.pl -// NASK geographical domains : http://www.dns.pl/english/dns-regiony.html -augustow.pl -babia-gora.pl -bedzin.pl -beskidy.pl -bialowieza.pl -bialystok.pl -bielawa.pl -bieszczady.pl -boleslawiec.pl -bydgoszcz.pl -bytom.pl -cieszyn.pl -czeladz.pl -czest.pl -dlugoleka.pl -elblag.pl -elk.pl -glogow.pl -gniezno.pl -gorlice.pl -grajewo.pl -ilawa.pl -jaworzno.pl -jelenia-gora.pl -jgora.pl -kalisz.pl -kazimierz-dolny.pl -karpacz.pl -kartuzy.pl -kaszuby.pl -katowice.pl -kepno.pl -ketrzyn.pl -klodzko.pl -kobierzyce.pl -kolobrzeg.pl -konin.pl -konskowola.pl -kutno.pl -lapy.pl -lebork.pl -legnica.pl -lezajsk.pl -limanowa.pl -lomza.pl -lowicz.pl -lubin.pl -lukow.pl -malbork.pl -malopolska.pl -mazowsze.pl -mazury.pl -mielec.pl -mielno.pl -mragowo.pl -naklo.pl -nowaruda.pl -nysa.pl -olawa.pl -olecko.pl -olkusz.pl -olsztyn.pl -opoczno.pl -opole.pl -ostroda.pl -ostroleka.pl -ostrowiec.pl -ostrowwlkp.pl -pila.pl -pisz.pl -podhale.pl -podlasie.pl -polkowice.pl -pomorze.pl -pomorskie.pl -prochowice.pl -pruszkow.pl -przeworsk.pl -pulawy.pl -radom.pl -rawa-maz.pl -rybnik.pl -rzeszow.pl -sanok.pl -sejny.pl -siedlce.pl -slask.pl -slupsk.pl -sosnowiec.pl -stalowa-wola.pl -skoczow.pl -starachowice.pl -stargard.pl -suwalki.pl -swidnica.pl -swiebodzin.pl -swinoujscie.pl -szczecin.pl -szczytno.pl -tarnobrzeg.pl -tgory.pl -turek.pl -tychy.pl -ustka.pl -walbrzych.pl -warmia.pl -warszawa.pl -waw.pl -wegrow.pl -wielun.pl -wlocl.pl -wloclawek.pl -wodzislaw.pl -wolomin.pl -wroclaw.pl -zachpomor.pl -zagan.pl -zarow.pl -zgora.pl -zgorzelec.pl -// TASK geographical domains (www.task.gda.pl/uslugi/dns) -gda.pl -gdansk.pl -gdynia.pl -med.pl -sopot.pl -// other geographical domains -gliwice.pl -krakow.pl -poznan.pl -wroc.pl -zakopane.pl - -// pm : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf -pm - -// pn : http://www.government.pn/PnRegistry/policies.htm -pn -gov.pn -co.pn -org.pn -edu.pn -net.pn - -// post : http://en.wikipedia.org/wiki/.post -post - -// pr : http://www.nic.pr/index.asp?f=1 -pr -com.pr -net.pr -org.pr -gov.pr -edu.pr -isla.pr -pro.pr -biz.pr -info.pr -name.pr -// these aren't mentioned on nic.pr, but on http://en.wikipedia.org/wiki/.pr -est.pr -prof.pr -ac.pr - -// pro : http://www.nic.pro/support_faq.htm -pro -aca.pro -bar.pro -cpa.pro -jur.pro -law.pro -med.pro -eng.pro - -// ps : http://en.wikipedia.org/wiki/.ps -// http://www.nic.ps/registration/policy.html#reg -ps -edu.ps -gov.ps -sec.ps -plo.ps -com.ps -org.ps -net.ps - -// pt : http://online.dns.pt/dns/start_dns -pt -net.pt -gov.pt -org.pt -edu.pt -int.pt -publ.pt -com.pt -nome.pt - -// pw : http://en.wikipedia.org/wiki/.pw -pw -co.pw -ne.pw -or.pw -ed.pw -go.pw -belau.pw - -// py : http://www.nic.py/pautas.html#seccion_9 -// Confirmed by registry 2012-10-03 -py -com.py -coop.py -edu.py -gov.py -mil.py -net.py -org.py - -// qa : http://domains.qa/en/ -qa -com.qa -edu.qa -gov.qa -mil.qa -name.qa -net.qa -org.qa -sch.qa - -// re : http://www.afnic.re/obtenir/chartes/nommage-re/annexe-descriptifs -re -com.re -asso.re -nom.re - -// ro : http://www.rotld.ro/ -ro -com.ro -org.ro -tm.ro -nt.ro -nom.ro -info.ro -rec.ro -arts.ro -firm.ro -store.ro -www.ro - -// rs : http://en.wikipedia.org/wiki/.rs -rs -co.rs -org.rs -edu.rs -ac.rs -gov.rs -in.rs - -// ru : http://www.cctld.ru/ru/docs/aktiv_8.php -// Industry domains -ru -ac.ru -com.ru -edu.ru -int.ru -net.ru -org.ru -pp.ru -// Geographical domains -adygeya.ru -altai.ru -amur.ru -arkhangelsk.ru -astrakhan.ru -bashkiria.ru -belgorod.ru -bir.ru -bryansk.ru -buryatia.ru -cbg.ru -chel.ru -chelyabinsk.ru -chita.ru -chukotka.ru -chuvashia.ru -dagestan.ru -dudinka.ru -e-burg.ru -grozny.ru -irkutsk.ru -ivanovo.ru -izhevsk.ru -jar.ru -joshkar-ola.ru -kalmykia.ru -kaluga.ru -kamchatka.ru -karelia.ru -kazan.ru -kchr.ru -kemerovo.ru -khabarovsk.ru -khakassia.ru -khv.ru -kirov.ru -koenig.ru -komi.ru -kostroma.ru -krasnoyarsk.ru -kuban.ru -kurgan.ru -kursk.ru -lipetsk.ru -magadan.ru -mari.ru -mari-el.ru -marine.ru -mordovia.ru -mosreg.ru -msk.ru -murmansk.ru -nalchik.ru -nnov.ru -nov.ru -novosibirsk.ru -nsk.ru -omsk.ru -orenburg.ru -oryol.ru -palana.ru -penza.ru -perm.ru -pskov.ru -ptz.ru -rnd.ru -ryazan.ru -sakhalin.ru -samara.ru -saratov.ru -simbirsk.ru -smolensk.ru -spb.ru -stavropol.ru -stv.ru -surgut.ru -tambov.ru -tatarstan.ru -tom.ru -tomsk.ru -tsaritsyn.ru -tsk.ru -tula.ru -tuva.ru -tver.ru -tyumen.ru -udm.ru -udmurtia.ru -ulan-ude.ru -vladikavkaz.ru -vladimir.ru -vladivostok.ru -volgograd.ru -vologda.ru -voronezh.ru -vrn.ru -vyatka.ru -yakutia.ru -yamal.ru -yaroslavl.ru -yekaterinburg.ru -yuzhno-sakhalinsk.ru -// More geographical domains -amursk.ru -baikal.ru -cmw.ru -fareast.ru -jamal.ru -kms.ru -k-uralsk.ru -kustanai.ru -kuzbass.ru -magnitka.ru -mytis.ru -nakhodka.ru -nkz.ru -norilsk.ru -oskol.ru -pyatigorsk.ru -rubtsovsk.ru -snz.ru -syzran.ru -vdonsk.ru -zgrad.ru -// State domains -gov.ru -mil.ru -// Technical domains -test.ru - -// rw : http://www.nic.rw/cgi-bin/policy.pl -rw -gov.rw -net.rw -edu.rw -ac.rw -com.rw -co.rw -int.rw -mil.rw -gouv.rw - -// sa : http://www.nic.net.sa/ -sa -com.sa -net.sa -org.sa -gov.sa -med.sa -pub.sa -edu.sa -sch.sa - -// sb : http://www.sbnic.net.sb/ -// Submitted by registry 2008-06-08 -sb -com.sb -edu.sb -gov.sb -net.sb -org.sb - -// sc : http://www.nic.sc/ -sc -com.sc -gov.sc -net.sc -org.sc -edu.sc - -// sd : http://www.isoc.sd/sudanic.isoc.sd/billing_pricing.htm -// Submitted by registry 2008-06-17 -sd -com.sd -net.sd -org.sd -edu.sd -med.sd -tv.sd -gov.sd -info.sd - -// se : http://en.wikipedia.org/wiki/.se -// Submitted by registry 2008-06-24 -se -a.se -ac.se -b.se -bd.se -brand.se -c.se -d.se -e.se -f.se -fh.se -fhsk.se -fhv.se -g.se -h.se -i.se -k.se -komforb.se -kommunalforbund.se -komvux.se -l.se -lanbib.se -m.se -n.se -naturbruksgymn.se -o.se -org.se -p.se -parti.se -pp.se -press.se -r.se -s.se -sshn.se -t.se -tm.se -u.se -w.se -x.se -y.se -z.se - -// sg : http://www.nic.net.sg/page/registration-policies-procedures-and-guidelines -sg -com.sg -net.sg -org.sg -gov.sg -edu.sg -per.sg - -// sh : http://www.nic.sh/registrar.html -sh -com.sh -net.sh -gov.sh -org.sh -mil.sh - -// si : http://en.wikipedia.org/wiki/.si -si - -// sj : No registrations at this time. -// Submitted by registry 2008-06-16 - -// sk : http://en.wikipedia.org/wiki/.sk -// list of 2nd level domains ? -sk - -// sl : http://www.nic.sl -// Submitted by registry 2008-06-12 -sl -com.sl -net.sl -edu.sl -gov.sl -org.sl - -// sm : http://en.wikipedia.org/wiki/.sm -sm - -// sn : http://en.wikipedia.org/wiki/.sn -sn -art.sn -com.sn -edu.sn -gouv.sn -org.sn -perso.sn -univ.sn - -// so : http://www.soregistry.com/ -so -com.so -net.so -org.so - -// sr : http://en.wikipedia.org/wiki/.sr -sr - -// st : http://www.nic.st/html/policyrules/ -st -co.st -com.st -consulado.st -edu.st -embaixada.st -gov.st -mil.st -net.st -org.st -principe.st -saotome.st -store.st - -// su : http://en.wikipedia.org/wiki/.su -su - -// sv : http://www.svnet.org.sv/svpolicy.html -*.sv - -// sx : http://en.wikipedia.org/wiki/.sx -// Confirmed by registry 2012-05-31 -sx -gov.sx - -// sy : http://en.wikipedia.org/wiki/.sy -// see also: http://www.gobin.info/domainname/sy.doc -sy -edu.sy -gov.sy -net.sy -mil.sy -com.sy -org.sy - -// sz : http://en.wikipedia.org/wiki/.sz -// http://www.sispa.org.sz/ -sz -co.sz -ac.sz -org.sz - -// tc : http://en.wikipedia.org/wiki/.tc -tc - -// td : http://en.wikipedia.org/wiki/.td -td - -// tel: http://en.wikipedia.org/wiki/.tel -// http://www.telnic.org/ -tel - -// tf : http://en.wikipedia.org/wiki/.tf -tf - -// tg : http://en.wikipedia.org/wiki/.tg -// http://www.nic.tg/ -tg - -// th : http://en.wikipedia.org/wiki/.th -// Submitted by registry 2008-06-17 -th -ac.th -co.th -go.th -in.th -mi.th -net.th -or.th - -// tj : http://www.nic.tj/policy.html -tj -ac.tj -biz.tj -co.tj -com.tj -edu.tj -go.tj -gov.tj -int.tj -mil.tj -name.tj -net.tj -nic.tj -org.tj -test.tj -web.tj - -// tk : http://en.wikipedia.org/wiki/.tk -tk - -// tl : http://en.wikipedia.org/wiki/.tl -tl -gov.tl - -// tm : http://www.nic.tm/local.html -tm -com.tm -co.tm -org.tm -net.tm -nom.tm -gov.tm -mil.tm -edu.tm - -// tn : http://en.wikipedia.org/wiki/.tn -// http://whois.ati.tn/ -tn -com.tn -ens.tn -fin.tn -gov.tn -ind.tn -intl.tn -nat.tn -net.tn -org.tn -info.tn -perso.tn -tourism.tn -edunet.tn -rnrt.tn -rns.tn -rnu.tn -mincom.tn -agrinet.tn -defense.tn -turen.tn - -// to : http://en.wikipedia.org/wiki/.to -// Submitted by registry 2008-06-17 -to -com.to -gov.to -net.to -org.to -edu.to -mil.to - -// tr : http://en.wikipedia.org/wiki/.tr -*.tr -!nic.tr -// Used by government in the TRNC -// http://en.wikipedia.org/wiki/.nc.tr -gov.nc.tr - -// travel : http://en.wikipedia.org/wiki/.travel -travel - -// tt : http://www.nic.tt/ -tt -co.tt -com.tt -org.tt -net.tt -biz.tt -info.tt -pro.tt -int.tt -coop.tt -jobs.tt -mobi.tt -travel.tt -museum.tt -aero.tt -name.tt -gov.tt -edu.tt - -// tv : http://en.wikipedia.org/wiki/.tv -// Not listing any 2LDs as reserved since none seem to exist in practice, -// Wikipedia notwithstanding. -tv - -// tw : http://en.wikipedia.org/wiki/.tw -tw -edu.tw -gov.tw -mil.tw -com.tw -net.tw -org.tw -idv.tw -game.tw -ebiz.tw -club.tw -網路.tw -組織.tw -商業.tw - -// tz : http://www.tznic.or.tz/index.php/domains -// Confirmed by registry 2013-01-22 -ac.tz -co.tz -go.tz -hotel.tz -info.tz -me.tz -mil.tz -mobi.tz -ne.tz -or.tz -sc.tz -tv.tz - -// ua : https://hostmaster.ua/policy/?ua -// Submitted by registry 2012-04-27 -ua -// ua 2LD -com.ua -edu.ua -gov.ua -in.ua -net.ua -org.ua -// ua geographic names -// https://hostmaster.ua/2ld/ -cherkassy.ua -cherkasy.ua -chernigov.ua -chernihiv.ua -chernivtsi.ua -chernovtsy.ua -ck.ua -cn.ua -cr.ua -crimea.ua -cv.ua -dn.ua -dnepropetrovsk.ua -dnipropetrovsk.ua -dominic.ua -donetsk.ua -dp.ua -if.ua -ivano-frankivsk.ua -kh.ua -kharkiv.ua -kharkov.ua -kherson.ua -khmelnitskiy.ua -khmelnytskyi.ua -kiev.ua -kirovograd.ua -km.ua -kr.ua -krym.ua -ks.ua -kv.ua -kyiv.ua -lg.ua -lt.ua -lugansk.ua -lutsk.ua -lv.ua -lviv.ua -mk.ua -mykolaiv.ua -nikolaev.ua -od.ua -odesa.ua -odessa.ua -pl.ua -poltava.ua -rivne.ua -rovno.ua -rv.ua -sb.ua -sebastopol.ua -sevastopol.ua -sm.ua -sumy.ua -te.ua -ternopil.ua -uz.ua -uzhgorod.ua -vinnica.ua -vinnytsia.ua -vn.ua -volyn.ua -yalta.ua -zaporizhzhe.ua -zaporizhzhia.ua -zhitomir.ua -zhytomyr.ua -zp.ua -zt.ua - -// Private registries in .ua -co.ua -pp.ua - -// ug : https://www.registry.co.ug/ -ug -co.ug -or.ug -ac.ug -sc.ug -go.ug -ne.ug -com.ug -org.ug - -// uk : http://en.wikipedia.org/wiki/.uk -// Submitted by registry 2012-10-02 -// and tweaked by us pending further consultation. -*.uk -*.sch.uk -!bl.uk -!british-library.uk -!jet.uk -!mod.uk -!national-library-scotland.uk -!nel.uk -!nic.uk -!nls.uk -!parliament.uk - -// us : http://en.wikipedia.org/wiki/.us -us -dni.us -fed.us -isa.us -kids.us -nsn.us -// us geographic names -ak.us -al.us -ar.us -as.us -az.us -ca.us -co.us -ct.us -dc.us -de.us -fl.us -ga.us -gu.us -hi.us -ia.us -id.us -il.us -in.us -ks.us -ky.us -la.us -ma.us -md.us -me.us -mi.us -mn.us -mo.us -ms.us -mt.us -nc.us -nd.us -ne.us -nh.us -nj.us -nm.us -nv.us -ny.us -oh.us -ok.us -or.us -pa.us -pr.us -ri.us -sc.us -sd.us -tn.us -tx.us -ut.us -vi.us -vt.us -va.us -wa.us -wi.us -wv.us -wy.us -// The registrar notes several more specific domains available in each state, -// such as state.*.us, dst.*.us, etc., but resolution of these is somewhat -// haphazard; in some states these domains resolve as addresses, while in others -// only subdomains are available, or even nothing at all. We include the -// most common ones where it's clear that different sites are different -// entities. -k12.ak.us -k12.al.us -k12.ar.us -k12.as.us -k12.az.us -k12.ca.us -k12.co.us -k12.ct.us -k12.dc.us -k12.de.us -k12.fl.us -k12.ga.us -k12.gu.us -// k12.hi.us Hawaii has a state-wide DOE login: bug 614565 -k12.ia.us -k12.id.us -k12.il.us -k12.in.us -k12.ks.us -k12.ky.us -k12.la.us -k12.ma.us -k12.md.us -k12.me.us -k12.mi.us -k12.mn.us -k12.mo.us -k12.ms.us -k12.mt.us -k12.nc.us -k12.nd.us -k12.ne.us -k12.nh.us -k12.nj.us -k12.nm.us -k12.nv.us -k12.ny.us -k12.oh.us -k12.ok.us -k12.or.us -k12.pa.us -k12.pr.us -k12.ri.us -k12.sc.us -k12.sd.us -k12.tn.us -k12.tx.us -k12.ut.us -k12.vi.us -k12.vt.us -k12.va.us -k12.wa.us -k12.wi.us -k12.wv.us -k12.wy.us - -cc.ak.us -cc.al.us -cc.ar.us -cc.as.us -cc.az.us -cc.ca.us -cc.co.us -cc.ct.us -cc.dc.us -cc.de.us -cc.fl.us -cc.ga.us -cc.gu.us -cc.hi.us -cc.ia.us -cc.id.us -cc.il.us -cc.in.us -cc.ks.us -cc.ky.us -cc.la.us -cc.ma.us -cc.md.us -cc.me.us -cc.mi.us -cc.mn.us -cc.mo.us -cc.ms.us -cc.mt.us -cc.nc.us -cc.nd.us -cc.ne.us -cc.nh.us -cc.nj.us -cc.nm.us -cc.nv.us -cc.ny.us -cc.oh.us -cc.ok.us -cc.or.us -cc.pa.us -cc.pr.us -cc.ri.us -cc.sc.us -cc.sd.us -cc.tn.us -cc.tx.us -cc.ut.us -cc.vi.us -cc.vt.us -cc.va.us -cc.wa.us -cc.wi.us -cc.wv.us -cc.wy.us - -lib.ak.us -lib.al.us -lib.ar.us -lib.as.us -lib.az.us -lib.ca.us -lib.co.us -lib.ct.us -lib.dc.us -lib.de.us -lib.fl.us -lib.ga.us -lib.gu.us -lib.hi.us -lib.ia.us -lib.id.us -lib.il.us -lib.in.us -lib.ks.us -lib.ky.us -lib.la.us -lib.ma.us -lib.md.us -lib.me.us -lib.mi.us -lib.mn.us -lib.mo.us -lib.ms.us -lib.mt.us -lib.nc.us -lib.nd.us -lib.ne.us -lib.nh.us -lib.nj.us -lib.nm.us -lib.nv.us -lib.ny.us -lib.oh.us -lib.ok.us -lib.or.us -lib.pa.us -lib.pr.us -lib.ri.us -lib.sc.us -lib.sd.us -lib.tn.us -lib.tx.us -lib.ut.us -lib.vi.us -lib.vt.us -lib.va.us -lib.wa.us -lib.wi.us -lib.wv.us -lib.wy.us - -// k12.ma.us contains school districts in Massachusetts. The 4LDs are -// managed indepedently except for private (PVT), charter (CHTR) and -// parochial (PAROCH) schools. Those are delegated dorectly to the -// 5LD operators. -pvt.k12.ma.us -chtr.k12.ma.us -paroch.k12.ma.us - -// uy : http://www.nic.org.uy/ -uy -com.uy -edu.uy -gub.uy -mil.uy -net.uy -org.uy - -// uz : http://www.reg.uz/ -uz -co.uz -com.uz -net.uz -org.uz - -// va : http://en.wikipedia.org/wiki/.va -va - -// vc : http://en.wikipedia.org/wiki/.vc -// Submitted by registry 2008-06-13 -vc -com.vc -net.vc -org.vc -gov.vc -mil.vc -edu.vc - -// ve : https://registro.nic.ve/ -// Confirmed by registry 2012-10-04 -ve -co.ve -com.ve -e12.ve -edu.ve -gov.ve -info.ve -mil.ve -net.ve -org.ve -web.ve - -// vg : http://en.wikipedia.org/wiki/.vg -vg - -// vi : http://www.nic.vi/newdomainform.htm -// http://www.nic.vi/Domain_Rules/body_domain_rules.html indicates some other -// TLDs are "reserved", such as edu.vi and gov.vi, but doesn't actually say they -// are available for registration (which they do not seem to be). -vi -co.vi -com.vi -k12.vi -net.vi -org.vi - -// vn : https://www.dot.vn/vnnic/vnnic/domainregistration.jsp -vn -com.vn -net.vn -org.vn -edu.vn -gov.vn -int.vn -ac.vn -biz.vn -info.vn -name.vn -pro.vn -health.vn - -// vu : http://en.wikipedia.org/wiki/.vu -// list of 2nd level tlds ? -vu - -// wf : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf -wf - -// ws : http://en.wikipedia.org/wiki/.ws -// http://samoanic.ws/index.dhtml -ws -com.ws -net.ws -org.ws -gov.ws -edu.ws - -// yt : http://www.afnic.fr/medias/documents/AFNIC-naming-policy2012.pdf -yt - -// IDN ccTLDs -// Please sort by ISO 3166 ccTLD, then punicode string -// when submitting patches and follow this format: -// ("" ) : -// [optional sponsoring org] -// - -// xn--mgbaam7a8h ("Emerat" Arabic) : AE -// http://nic.ae/english/arabicdomain/rules.jsp -امارات - -// xn--54b7fta0cc ("Bangla" Bangla) : BD -বাংলা - -// xn--fiqs8s ("China" Chinese-Han-Simplified <.Zhonggou>) : CN -// CNNIC -// http://cnnic.cn/html/Dir/2005/10/11/3218.htm -中国 - -// xn--fiqz9s ("China" Chinese-Han-Traditional <.Zhonggou>) : CN -// CNNIC -// http://cnnic.cn/html/Dir/2005/10/11/3218.htm -中國 - -// xn--lgbbat1ad8j ("Algeria / Al Jazair" Arabic) : DZ -الجزائر - -// xn--wgbh1c ("Egypt" Arabic .masr) : EG -// http://www.dotmasr.eg/ -مصر - -// xn--node ("ge" Georgian (Mkhedruli)) : GE -გე - -// xn--j6w193g ("Hong Kong" Chinese-Han) : HK -// https://www2.hkirc.hk/register/rules.jsp -香港 - -// xn--h2brj9c ("Bharat" Devanagari) : IN -// India -भारत - -// xn--mgbbh1a71e ("Bharat" Arabic) : IN -// India -بھارت - -// xn--fpcrj9c3d ("Bharat" Telugu) : IN -// India -భారత్ - -// xn--gecrj9c ("Bharat" Gujarati) : IN -// India -ભારત - -// xn--s9brj9c ("Bharat" Gurmukhi) : IN -// India -ਭਾਰਤ - -// xn--45brj9c ("Bharat" Bengali) : IN -// India -ভারত - -// xn--xkc2dl3a5ee0h ("India" Tamil) : IN -// India -இந்தியா - -// xn--mgba3a4f16a ("Iran" Persian) : IR -ایران - -// xn--mgba3a4fra ("Iran" Arabic) : IR -ايران - -// xn--mgbayh7gpa ("al-Ordon" Arabic) : JO -// National Information Technology Center (NITC) -// Royal Scientific Society, Al-Jubeiha -الاردن - -// xn--3e0b707e ("Republic of Korea" Hangul) : KR -한국 - -// xn--fzc2c9e2c ("Lanka" Sinhalese-Sinhala) : LK -// http://nic.lk -ලංකා - -// xn--xkc2al3hye2a ("Ilangai" Tamil) : LK -// http://nic.lk -இலங்கை - -// xn--mgbc0a9azcg ("Morocco / al-Maghrib" Arabic) : MA -المغرب - -// xn--mgb9awbf ("Oman" Arabic) : OM -عمان - -// xn--ygbi2ammx ("Falasteen" Arabic) : PS -// The Palestinian National Internet Naming Authority (PNINA) -// http://www.pnina.ps -فلسطين - -// xn--90a3ac ("srb" Cyrillic) : RS -срб - -// xn--p1ai ("rf" Russian-Cyrillic) : RU -// http://www.cctld.ru/en/docs/rulesrf.php -рф - -// xn--wgbl6a ("Qatar" Arabic) : QA -// http://www.ict.gov.qa/ -قطر - -// xn--mgberp4a5d4ar ("AlSaudiah" Arabic) : SA -// http://www.nic.net.sa/ -السعودية - -// xn--mgberp4a5d4a87g ("AlSaudiah" Arabic) variant : SA -السعودیة - -// xn--mgbqly7c0a67fbc ("AlSaudiah" Arabic) variant : SA -السعودیۃ - -// xn--mgbqly7cvafr ("AlSaudiah" Arabic) variant : SA -السعوديه - -// xn--ogbpf8fl ("Syria" Arabic) : SY -سورية - -// xn--mgbtf8fl ("Syria" Arabic) variant : SY -سوريا - -// xn--yfro4i67o Singapore ("Singapore" Chinese-Han) : SG -新加坡 - -// xn--clchc0ea0b2g2a9gcd ("Singapore" Tamil) : SG -சிங்கப்பூர் - -// xn--o3cw4h ("Thai" Thai) : TH -// http://www.thnic.co.th -ไทย - -// xn--pgbs0dh ("Tunis") : TN -// http://nic.tn -تونس - -// xn--kpry57d ("Taiwan" Chinese-Han-Traditional) : TW -// http://www.twnic.net/english/dn/dn_07a.htm -台灣 - -// xn--kprw13d ("Taiwan" Chinese-Han-Simplified) : TW -// http://www.twnic.net/english/dn/dn_07a.htm -台湾 - -// xn--nnx388a ("Taiwan") variant : TW -臺灣 - -// xn--j1amh ("ukr" Cyrillic) : UA -укр - -// xn--mgb2ddes ("AlYemen" Arabic) : YE -اليمن - -// xxx : http://icmregistry.com -xxx - -// ye : http://www.y.net.ye/services/domain_name.htm -*.ye - -// za : http://www.zadna.org.za/slds.html -*.za - -// zm : http://en.wikipedia.org/wiki/.zm -*.zm - -// zw : http://en.wikipedia.org/wiki/.zw -*.zw - -// ===END ICANN DOMAINS=== -// ===BEGIN PRIVATE DOMAINS=== - -// Amazon CloudFront : https://aws.amazon.com/cloudfront/ -// Requested by Donavan Miller 2013-03-22 -cloudfront.net - -// Amazon Elastic Compute Cloud: https://aws.amazon.com/ec2/ -// Requested by Osman Surkatty 2013-04-02 -compute.amazonaws.com -us-east-1.amazonaws.com -compute-1.amazonaws.com -z-1.compute-1.amazonaws.com -z-2.compute-1.amazonaws.com -ap-northeast-1.compute.amazonaws.com -ap-southeast-1.compute.amazonaws.com -ap-southeast-2.compute.amazonaws.com -eu-west-1.compute.amazonaws.com -sa-east-1.compute.amazonaws.com -us-gov-west-1.compute.amazonaws.com -us-west-1.compute.amazonaws.com -us-west-2.compute.amazonaws.com - -// Amazon Elastic Beanstalk : https://aws.amazon.com/elasticbeanstalk/ -// Requested by Adam Stein 2013-04-02 -elasticbeanstalk.com - -// Amazon Elastic Load Balancing : https://aws.amazon.com/elasticloadbalancing/ -// Requested by Scott Vidmar 2013-03-27 -elb.amazonaws.com - -// Amazon S3 : https://aws.amazon.com/s3/ -// Requested by Courtney Eckhardt 2013-03-22 -s3.amazonaws.com -s3-us-west-2.amazonaws.com -s3-us-west-1.amazonaws.com -s3-eu-west-1.amazonaws.com -s3-ap-southeast-1.amazonaws.com -s3-ap-southeast-2.amazonaws.com -s3-ap-northeast-1.amazonaws.com -s3-sa-east-1.amazonaws.com -s3-us-gov-west-1.amazonaws.com -s3-fips-us-gov-west-1.amazonaws.com -s3-website-us-east-1.amazonaws.com -s3-website-us-west-2.amazonaws.com -s3-website-us-west-1.amazonaws.com -s3-website-eu-west-1.amazonaws.com -s3-website-ap-southeast-1.amazonaws.com -s3-website-ap-southeast-2.amazonaws.com -s3-website-ap-northeast-1.amazonaws.com -s3-website-sa-east-1.amazonaws.com -s3-website-us-gov-west-1.amazonaws.com - -// BetaInABox -// Requested by adrian@betainabox.com 2012-09-13 -betainabox.com - -// CentralNic : http://www.centralnic.com/names/domains -// Requested by registry 2012-09-27 -ae.org -ar.com -br.com -cn.com -com.de -de.com -eu.com -gb.com -gb.net -gr.com -hu.com -hu.net -jp.net -jpn.com -kr.com -no.com -qc.com -ru.com -sa.com -se.com -se.net -uk.com -uk.net -us.com -us.org -uy.com -za.com - -// c.la : http://www.c.la/ -c.la - -// cloudControl : https://www.cloudcontrol.com/ -// Requested by Tobias Wilken 2013-07-23 -cloudcontrolled.com -cloudcontrolapp.com - -// co.ca : http://registry.co.ca/ -co.ca - -// CoDNS B.V. -co.nl -co.no - -// DreamHost : http://www.dreamhost.com/ -// Requested by Andrew Farmer 2012-10-02 -dreamhosters.com - -// DynDNS.com : http://www.dyndns.com/services/dns/dyndns/ -dyndns-at-home.com -dyndns-at-work.com -dyndns-blog.com -dyndns-free.com -dyndns-home.com -dyndns-ip.com -dyndns-mail.com -dyndns-office.com -dyndns-pics.com -dyndns-remote.com -dyndns-server.com -dyndns-web.com -dyndns-wiki.com -dyndns-work.com -dyndns.biz -dyndns.info -dyndns.org -dyndns.tv -at-band-camp.net -ath.cx -barrel-of-knowledge.info -barrell-of-knowledge.info -better-than.tv -blogdns.com -blogdns.net -blogdns.org -blogsite.org -boldlygoingnowhere.org -broke-it.net -buyshouses.net -cechire.com -dnsalias.com -dnsalias.net -dnsalias.org -dnsdojo.com -dnsdojo.net -dnsdojo.org -does-it.net -doesntexist.com -doesntexist.org -dontexist.com -dontexist.net -dontexist.org -doomdns.com -doomdns.org -dvrdns.org -dyn-o-saur.com -dynalias.com -dynalias.net -dynalias.org -dynathome.net -dyndns.ws -endofinternet.net -endofinternet.org -endoftheinternet.org -est-a-la-maison.com -est-a-la-masion.com -est-le-patron.com -est-mon-blogueur.com -for-better.biz -for-more.biz -for-our.info -for-some.biz -for-the.biz -forgot.her.name -forgot.his.name -from-ak.com -from-al.com -from-ar.com -from-az.net -from-ca.com -from-co.net -from-ct.com -from-dc.com -from-de.com -from-fl.com -from-ga.com -from-hi.com -from-ia.com -from-id.com -from-il.com -from-in.com -from-ks.com -from-ky.com -from-la.net -from-ma.com -from-md.com -from-me.org -from-mi.com -from-mn.com -from-mo.com -from-ms.com -from-mt.com -from-nc.com -from-nd.com -from-ne.com -from-nh.com -from-nj.com -from-nm.com -from-nv.com -from-ny.net -from-oh.com -from-ok.com -from-or.com -from-pa.com -from-pr.com -from-ri.com -from-sc.com -from-sd.com -from-tn.com -from-tx.com -from-ut.com -from-va.com -from-vt.com -from-wa.com -from-wi.com -from-wv.com -from-wy.com -ftpaccess.cc -fuettertdasnetz.de -game-host.org -game-server.cc -getmyip.com -gets-it.net -go.dyndns.org -gotdns.com -gotdns.org -groks-the.info -groks-this.info -ham-radio-op.net -here-for-more.info -hobby-site.com -hobby-site.org -home.dyndns.org -homedns.org -homeftp.net -homeftp.org -homeip.net -homelinux.com -homelinux.net -homelinux.org -homeunix.com -homeunix.net -homeunix.org -iamallama.com -in-the-band.net -is-a-anarchist.com -is-a-blogger.com -is-a-bookkeeper.com -is-a-bruinsfan.org -is-a-bulls-fan.com -is-a-candidate.org -is-a-caterer.com -is-a-celticsfan.org -is-a-chef.com -is-a-chef.net -is-a-chef.org -is-a-conservative.com -is-a-cpa.com -is-a-cubicle-slave.com -is-a-democrat.com -is-a-designer.com -is-a-doctor.com -is-a-financialadvisor.com -is-a-geek.com -is-a-geek.net -is-a-geek.org -is-a-green.com -is-a-guru.com -is-a-hard-worker.com -is-a-hunter.com -is-a-knight.org -is-a-landscaper.com -is-a-lawyer.com -is-a-liberal.com -is-a-libertarian.com -is-a-linux-user.org -is-a-llama.com -is-a-musician.com -is-a-nascarfan.com -is-a-nurse.com -is-a-painter.com -is-a-patsfan.org -is-a-personaltrainer.com -is-a-photographer.com -is-a-player.com -is-a-republican.com -is-a-rockstar.com -is-a-socialist.com -is-a-soxfan.org -is-a-student.com -is-a-teacher.com -is-a-techie.com -is-a-therapist.com -is-an-accountant.com -is-an-actor.com -is-an-actress.com -is-an-anarchist.com -is-an-artist.com -is-an-engineer.com -is-an-entertainer.com -is-by.us -is-certified.com -is-found.org -is-gone.com -is-into-anime.com -is-into-cars.com -is-into-cartoons.com -is-into-games.com -is-leet.com -is-lost.org -is-not-certified.com -is-saved.org -is-slick.com -is-uberleet.com -is-very-bad.org -is-very-evil.org -is-very-good.org -is-very-nice.org -is-very-sweet.org -is-with-theband.com -isa-geek.com -isa-geek.net -isa-geek.org -isa-hockeynut.com -issmarterthanyou.com -isteingeek.de -istmein.de -kicks-ass.net -kicks-ass.org -knowsitall.info -land-4-sale.us -lebtimnetz.de -leitungsen.de -likes-pie.com -likescandy.com -merseine.nu -mine.nu -misconfused.org -mypets.ws -myphotos.cc -neat-url.com -office-on-the.net -on-the-web.tv -podzone.net -podzone.org -readmyblog.org -saves-the-whales.com -scrapper-site.net -scrapping.cc -selfip.biz -selfip.com -selfip.info -selfip.net -selfip.org -sells-for-less.com -sells-for-u.com -sells-it.net -sellsyourhome.org -servebbs.com -servebbs.net -servebbs.org -serveftp.net -serveftp.org -servegame.org -shacknet.nu -simple-url.com -space-to-rent.com -stuff-4-sale.org -stuff-4-sale.us -teaches-yoga.com -thruhere.net -traeumtgerade.de -webhop.biz -webhop.info -webhop.net -webhop.org -worse-than.tv -writesthisblog.com - -// Fastly Inc. http://www.fastly.com/ -// Requested by Vladimir Vuksan 2013-05-31 -a.ssl.fastly.net -b.ssl.fastly.net -global.ssl.fastly.net -a.prod.fastly.net -global.prod.fastly.net - -// GitHub, Inc. -// Requested by Ben Toews 2013-04-18 -github.io - -// GlobeHosting, Inc. -// Requested by Zoltan Egresi 2013-07-12 -ro.com - -// Google, Inc. -// Requested by Eduardo Vela 2012-10-24 -appspot.com -blogspot.be -blogspot.bj -blogspot.ca -blogspot.cf -blogspot.ch -blogspot.co.at -blogspot.co.il -blogspot.co.nz -blogspot.co.uk -blogspot.com -blogspot.com.ar -blogspot.com.au -blogspot.com.br -blogspot.com.es -blogspot.cv -blogspot.cz -blogspot.de -blogspot.dk -blogspot.fi -blogspot.fr -blogspot.gr -blogspot.hk -blogspot.hu -blogspot.ie -blogspot.in -blogspot.it -blogspot.jp -blogspot.kr -blogspot.mr -blogspot.mx -blogspot.nl -blogspot.no -blogspot.pt -blogspot.re -blogspot.ro -blogspot.se -blogspot.sg -blogspot.sk -blogspot.td -blogspot.tw -codespot.com -googleapis.com -googlecode.com - -// Heroku : https://www.heroku.com/ -// Requested by Tom Maher 2013-05-02 -herokuapp.com -herokussl.com - -// iki.fi -// Requested by Hannu Aronsson 2009-11-05 -iki.fi - -// info.at : http://www.info.at/ -biz.at -info.at - -// Michau Enterprises Limited : http://www.co.pl/ -co.pl - -// NYC.mn : http://www.information.nyc.mn -// Requested by Matthew Brown 2013-03-11 -nyc.mn - -// Opera Software, A.S.A. -// Requested by Yngve Pettersen 2009-11-26 -operaunite.com - -// Red Hat, Inc. OpenShift : https://openshift.redhat.com/ -// Requested by Tim Kramer 2012-10-24 -rhcloud.com - -// priv.at : http://www.nic.priv.at/ -// Requested by registry 2008-06-09 -priv.at - -// ZaNiC : http://www.za.net/ -// Requested by registry 2009-10-03 -za.net -za.org - -// ===END PRIVATE DOMAINS=== diff --git a/src/main/resources/org/archive/commons.properties b/src/main/resources/org/archive/commons.properties new file mode 100644 index 00000000..f115ff43 --- /dev/null +++ b/src/main/resources/org/archive/commons.properties @@ -0,0 +1,5 @@ +operator= +publisher= +wat.warcinfo.description= +warc.format=WARC File Format 1.0 +warc.format.conforms.to=http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf diff --git a/src/main/resources/org/archive/effective_tld_names.dat b/src/main/resources/org/archive/effective_tld_names.dat new file mode 100644 index 00000000..91bf51b0 --- /dev/null +++ b/src/main/resources/org/archive/effective_tld_names.dat @@ -0,0 +1,15884 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Please pull this list from, and only from https://publicsuffix.org/list/public_suffix_list.dat, +// rather than any other VCS sites. Pulling from any other URL is not guaranteed to be supported. + +// VERSION: 2025-07-09_15-23-09_UTC +// COMMIT: c38a2f8e8862ad65d91af25dee90002c61329953 + +// Instructions on pulling and using this list can be found at https://publicsuffix.org/list/. + +// ===BEGIN ICANN DOMAINS=== + +// ac : http://nic.ac/rules.htm +ac +com.ac +edu.ac +gov.ac +mil.ac +net.ac +org.ac + +// ad : https://www.iana.org/domains/root/db/ad.html +// Confirmed by Amadeu Abril i Abril (CORE) 2024-11-17 +ad + +// ae : https://www.iana.org/domains/root/db/ae.html +ae +ac.ae +co.ae +gov.ae +mil.ae +net.ae +org.ae +sch.ae + +// aero : https://information.aero/registration/policies/dmp +aero +// 2LDs +airline.aero +airport.aero +// 2LDs (currently not accepting registration, seemingly never have) +// As of 2024-07, these are marked as reserved for potential 3LD +// registrations (clause 11 "allocated subdomains" in the 2006 TLD +// policy), but the relevant industry partners have not opened them up +// for registration. Current status can be determined from the TLD's +// policy document: 2LDs that are open for registration must list +// their policy in the TLD's policy. Any 2LD without such a policy is +// not open for registrations. +accident-investigation.aero +accident-prevention.aero +aerobatic.aero +aeroclub.aero +aerodrome.aero +agents.aero +air-surveillance.aero +air-traffic-control.aero +aircraft.aero +airtraffic.aero +ambulance.aero +association.aero +author.aero +ballooning.aero +broker.aero +caa.aero +cargo.aero +catering.aero +certification.aero +championship.aero +charter.aero +civilaviation.aero +club.aero +conference.aero +consultant.aero +consulting.aero +control.aero +council.aero +crew.aero +design.aero +dgca.aero +educator.aero +emergency.aero +engine.aero +engineer.aero +entertainment.aero +equipment.aero +exchange.aero +express.aero +federation.aero +flight.aero +freight.aero +fuel.aero +gliding.aero +government.aero +groundhandling.aero +group.aero +hanggliding.aero +homebuilt.aero +insurance.aero +journal.aero +journalist.aero +leasing.aero +logistics.aero +magazine.aero +maintenance.aero +marketplace.aero +media.aero +microlight.aero +modelling.aero +navigation.aero +parachuting.aero +paragliding.aero +passenger-association.aero +pilot.aero +press.aero +production.aero +recreation.aero +repbody.aero +res.aero +research.aero +rotorcraft.aero +safety.aero +scientist.aero +services.aero +show.aero +skydiving.aero +software.aero +student.aero +taxi.aero +trader.aero +trading.aero +trainer.aero +union.aero +workinggroup.aero +works.aero + +// af : https://www.nic.af/domain-price +af +com.af +edu.af +gov.af +net.af +org.af + +// ag : http://www.nic.ag/prices.htm +ag +co.ag +com.ag +net.ag +nom.ag +org.ag + +// ai : http://nic.com.ai/ +ai +com.ai +net.ai +off.ai +org.ai + +// al : http://www.ert.gov.al/ert_alb/faq_det.html?Id=31 +al +com.al +edu.al +gov.al +mil.al +net.al +org.al + +// am : https://www.amnic.net/policy/en/Policy_EN.pdf +// Confirmed by ISOC AM 2024-11-18 +am +co.am +com.am +commune.am +net.am +org.am + +// ao : https://www.iana.org/domains/root/db/ao.html +// https://www.dns.ao/ao/ +ao +co.ao +ed.ao +edu.ao +gov.ao +gv.ao +it.ao +og.ao +org.ao +pb.ao + +// aq : https://www.iana.org/domains/root/db/aq.html +aq + +// ar : https://nic.ar/es/nic-argentina/normativa +ar +bet.ar +com.ar +coop.ar +edu.ar +gob.ar +gov.ar +int.ar +mil.ar +musica.ar +mutual.ar +net.ar +org.ar +seg.ar +senasa.ar +tur.ar + +// arpa : https://www.iana.org/domains/root/db/arpa.html +// Confirmed by registry 2008-06-18 +arpa +e164.arpa +home.arpa +in-addr.arpa +ip6.arpa +iris.arpa +uri.arpa +urn.arpa + +// as : https://www.iana.org/domains/root/db/as.html +as +gov.as + +// asia : https://www.iana.org/domains/root/db/asia.html +asia + +// at : https://www.iana.org/domains/root/db/at.html +// Confirmed by registry 2008-06-17 +at +ac.at +sth.ac.at +co.at +gv.at +or.at + +// au : https://www.iana.org/domains/root/db/au.html +// https://www.auda.org.au/ +// Confirmed by registry 2024-11-17 +au +// 2LDs +asn.au +com.au +edu.au +gov.au +id.au +net.au +org.au +// Historic 2LDs (closed to new registration, but sites still exist) +conf.au +oz.au +// CGDNs : https://www.auda.org.au/au-domain-names/the-different-au-domain-names/state-and-territory-domain-names/ +act.au +nsw.au +nt.au +qld.au +sa.au +tas.au +vic.au +wa.au +// 3LDs +act.edu.au +catholic.edu.au +// eq.edu.au - Removed at the request of the Queensland Department of Education +nsw.edu.au +nt.edu.au +qld.edu.au +sa.edu.au +tas.edu.au +vic.edu.au +wa.edu.au +// act.gov.au - Bug 984824 - Removed at request of Greg Tankard +// nsw.gov.au - Bug 547985 - Removed at request of +// nt.gov.au - Bug 940478 - Removed at request of Greg Connors +qld.gov.au +sa.gov.au +tas.gov.au +vic.gov.au +wa.gov.au +// 4LDs +// education.tas.edu.au - Removed at the request of the Department of Education Tasmania +schools.nsw.edu.au + +// aw : https://www.iana.org/domains/root/db/aw.html +aw +com.aw + +// ax : https://www.iana.org/domains/root/db/ax.html +ax + +// az : https://www.iana.org/domains/root/db/az.html +// Confirmed via https://whois.az/?page_id=10 2024-12-11 +az +biz.az +co.az +com.az +edu.az +gov.az +info.az +int.az +mil.az +name.az +net.az +org.az +pp.az +// No longer available for registration, however domains exist as of 2024-12-11 +// see https://whois.az/?page_id=783 +pro.az + +// ba : https://www.iana.org/domains/root/db/ba.html +ba +com.ba +edu.ba +gov.ba +mil.ba +net.ba +org.ba + +// bb : https://www.iana.org/domains/root/db/bb.html +bb +biz.bb +co.bb +com.bb +edu.bb +gov.bb +info.bb +net.bb +org.bb +store.bb +tv.bb + +// bd : https://www.iana.org/domains/root/db/bd.html +*.bd + +// be : https://www.iana.org/domains/root/db/be.html +// Confirmed by registry 2008-06-08 +be +ac.be + +// bf : https://www.iana.org/domains/root/db/bf.html +bf +gov.bf + +// bg : https://www.iana.org/domains/root/db/bg.html +// https://www.register.bg/user/static/rules/en/index.html +bg +0.bg +1.bg +2.bg +3.bg +4.bg +5.bg +6.bg +7.bg +8.bg +9.bg +a.bg +b.bg +c.bg +d.bg +e.bg +f.bg +g.bg +h.bg +i.bg +j.bg +k.bg +l.bg +m.bg +n.bg +o.bg +p.bg +q.bg +r.bg +s.bg +t.bg +u.bg +v.bg +w.bg +x.bg +y.bg +z.bg + +// bh : https://www.iana.org/domains/root/db/bh.html +bh +com.bh +edu.bh +gov.bh +net.bh +org.bh + +// bi : https://www.iana.org/domains/root/db/bi.html +// http://whois.nic.bi/ +bi +co.bi +com.bi +edu.bi +or.bi +org.bi + +// biz : https://www.iana.org/domains/root/db/biz.html +biz + +// bj : https://nic.bj/bj-suffixes.txt +// Submitted by registry +bj +africa.bj +agro.bj +architectes.bj +assur.bj +avocats.bj +co.bj +com.bj +eco.bj +econo.bj +edu.bj +info.bj +loisirs.bj +money.bj +net.bj +org.bj +ote.bj +restaurant.bj +resto.bj +tourism.bj +univ.bj + +// bm : https://www.bermudanic.bm/domain-registration/index.php +bm +com.bm +edu.bm +gov.bm +net.bm +org.bm + +// bn : http://www.bnnic.bn/faqs +bn +com.bn +edu.bn +gov.bn +net.bn +org.bn + +// bo : https://nic.bo +// Confirmed by registry 2024-11-19 +bo +com.bo +edu.bo +gob.bo +int.bo +mil.bo +net.bo +org.bo +tv.bo +web.bo +// Social Domains +academia.bo +agro.bo +arte.bo +blog.bo +bolivia.bo +ciencia.bo +cooperativa.bo +democracia.bo +deporte.bo +ecologia.bo +economia.bo +empresa.bo +indigena.bo +industria.bo +info.bo +medicina.bo +movimiento.bo +musica.bo +natural.bo +nombre.bo +noticias.bo +patria.bo +plurinacional.bo +politica.bo +profesional.bo +pueblo.bo +revista.bo +salud.bo +tecnologia.bo +tksat.bo +transporte.bo +wiki.bo + +// br : http://registro.br/dominio/categoria.html +// Submitted by registry +br +9guacu.br +abc.br +adm.br +adv.br +agr.br +aju.br +am.br +anani.br +aparecida.br +app.br +arq.br +art.br +ato.br +b.br +barueri.br +belem.br +bet.br +bhz.br +bib.br +bio.br +blog.br +bmd.br +boavista.br +bsb.br +campinagrande.br +campinas.br +caxias.br +cim.br +cng.br +cnt.br +com.br +contagem.br +coop.br +coz.br +cri.br +cuiaba.br +curitiba.br +def.br +des.br +det.br +dev.br +ecn.br +eco.br +edu.br +emp.br +enf.br +eng.br +esp.br +etc.br +eti.br +far.br +feira.br +flog.br +floripa.br +fm.br +fnd.br +fortal.br +fot.br +foz.br +fst.br +g12.br +geo.br +ggf.br +goiania.br +gov.br +// gov.br 26 states + df https://en.wikipedia.org/wiki/States_of_Brazil +ac.gov.br +al.gov.br +am.gov.br +ap.gov.br +ba.gov.br +ce.gov.br +df.gov.br +es.gov.br +go.gov.br +ma.gov.br +mg.gov.br +ms.gov.br +mt.gov.br +pa.gov.br +pb.gov.br +pe.gov.br +pi.gov.br +pr.gov.br +rj.gov.br +rn.gov.br +ro.gov.br +rr.gov.br +rs.gov.br +sc.gov.br +se.gov.br +sp.gov.br +to.gov.br +gru.br +imb.br +ind.br +inf.br +jab.br +jampa.br +jdf.br +joinville.br +jor.br +jus.br +leg.br +leilao.br +lel.br +log.br +londrina.br +macapa.br +maceio.br +manaus.br +maringa.br +mat.br +med.br +mil.br +morena.br +mp.br +mus.br +natal.br +net.br +niteroi.br +*.nom.br +not.br +ntr.br +odo.br +ong.br +org.br +osasco.br +palmas.br +poa.br +ppg.br +pro.br +psc.br +psi.br +pvh.br +qsl.br +radio.br +rec.br +recife.br +rep.br +ribeirao.br +rio.br +riobranco.br +riopreto.br +salvador.br +sampa.br +santamaria.br +santoandre.br +saobernardo.br +saogonca.br +seg.br +sjc.br +slg.br +slz.br +sorocaba.br +srv.br +taxi.br +tc.br +tec.br +teo.br +the.br +tmp.br +trd.br +tur.br +tv.br +udi.br +vet.br +vix.br +vlog.br +wiki.br +zlg.br + +// bs : http://www.nic.bs/rules.html +bs +com.bs +edu.bs +gov.bs +net.bs +org.bs + +// bt : https://www.iana.org/domains/root/db/bt.html +bt +com.bt +edu.bt +gov.bt +net.bt +org.bt + +// bv : No registrations at this time. +// Submitted by registry +bv + +// bw : https://www.iana.org/domains/root/db/bw.html +// https://nic.net.bw/bw-name-structure +bw +ac.bw +co.bw +gov.bw +net.bw +org.bw + +// by : https://www.iana.org/domains/root/db/by.html +// http://tld.by/rules_2006_en.html +// list of other 2nd level tlds ? +by +gov.by +mil.by +// Official information does not indicate that com.by is a reserved +// second-level domain, but it's being used as one (see www.google.com.by and +// www.yahoo.com.by, for example), so we list it here for safety's sake. +com.by +// http://hoster.by/ +of.by + +// bz : https://www.iana.org/domains/root/db/bz.html +// http://www.belizenic.bz/ +bz +co.bz +com.bz +edu.bz +gov.bz +net.bz +org.bz + +// ca : https://www.iana.org/domains/root/db/ca.html +ca +// ca geographical names +ab.ca +bc.ca +mb.ca +nb.ca +nf.ca +nl.ca +ns.ca +nt.ca +nu.ca +on.ca +pe.ca +qc.ca +sk.ca +yk.ca +// gc.ca: https://en.wikipedia.org/wiki/.gc.ca +// see also: http://registry.gc.ca/en/SubdomainFAQ +gc.ca + +// cat : https://www.iana.org/domains/root/db/cat.html +cat + +// cc : https://www.iana.org/domains/root/db/cc.html +cc + +// cd : https://www.iana.org/domains/root/db/cd.html +// https://www.nic.cd +cd +gov.cd + +// cf : https://www.iana.org/domains/root/db/cf.html +cf + +// cg : https://www.iana.org/domains/root/db/cg.html +cg + +// ch : https://www.iana.org/domains/root/db/ch.html +ch + +// ci : https://www.iana.org/domains/root/db/ci.html +ci +ac.ci +aéroport.ci +asso.ci +co.ci +com.ci +ed.ci +edu.ci +go.ci +gouv.ci +int.ci +net.ci +or.ci +org.ci + +// ck : https://www.iana.org/domains/root/db/ck.html +*.ck +!www.ck + +// cl : https://www.nic.cl +// Confirmed by .CL registry +cl +co.cl +gob.cl +gov.cl +mil.cl + +// cm : https://www.iana.org/domains/root/db/cm.html plus bug 981927 +cm +co.cm +com.cm +gov.cm +net.cm + +// cn : https://www.iana.org/domains/root/db/cn.html +// Submitted by registry +cn +ac.cn +com.cn +edu.cn +gov.cn +mil.cn +net.cn +org.cn +公司.cn +網絡.cn +网络.cn +// cn geographic names +ah.cn +bj.cn +cq.cn +fj.cn +gd.cn +gs.cn +gx.cn +gz.cn +ha.cn +hb.cn +he.cn +hi.cn +hk.cn +hl.cn +hn.cn +jl.cn +js.cn +jx.cn +ln.cn +mo.cn +nm.cn +nx.cn +qh.cn +sc.cn +sd.cn +sh.cn +sn.cn +sx.cn +tj.cn +tw.cn +xj.cn +xz.cn +yn.cn +zj.cn + +// co : https://www.iana.org/domains/root/db/co.html +// https://www.cointernet.com.co/como-funciona-un-dominio-restringido +// Confirmed by registry 2024-11-18 +co +com.co +edu.co +gov.co +mil.co +net.co +nom.co +org.co + +// com : https://www.iana.org/domains/root/db/com.html +com + +// coop : https://www.iana.org/domains/root/db/coop.html +coop + +// cr : https://nic.cr/capitulo-1-registro-de-un-nombre-de-dominio/ +cr +ac.cr +co.cr +ed.cr +fi.cr +go.cr +or.cr +sa.cr + +// cu : https://www.iana.org/domains/root/db/cu.html +cu +com.cu +edu.cu +gob.cu +inf.cu +nat.cu +net.cu +org.cu + +// cv : https://www.iana.org/domains/root/db/cv.html +// https://ola.cv/domain-extensions-under-cv/ +// Confirmed by registry 2024-11-26 +cv +com.cv +edu.cv +id.cv +int.cv +net.cv +nome.cv +org.cv +publ.cv + +// cw : https://www.uoc.cw/cw-registry +// Confirmed by registry 2024-11-19 +cw +com.cw +edu.cw +net.cw +org.cw + +// cx : https://www.iana.org/domains/root/db/cx.html +// list of other 2nd level tlds ? +cx +gov.cx + +// cy : http://www.nic.cy/ +// Submitted by Panayiotou Fotia +// https://nic.cy/wp-content/uploads/2024/01/Create-Request-for-domain-name-registration-1.pdf +cy +ac.cy +biz.cy +com.cy +ekloges.cy +gov.cy +ltd.cy +mil.cy +net.cy +org.cy +press.cy +pro.cy +tm.cy + +// cz : https://www.iana.org/domains/root/db/cz.html +cz + +// de : https://www.iana.org/domains/root/db/de.html +// Confirmed by registry (with technical +// reservations) 2008-07-01 +de + +// dj : https://www.iana.org/domains/root/db/dj.html +dj + +// dk : https://www.iana.org/domains/root/db/dk.html +// Confirmed by registry 2008-06-17 +dk + +// dm : https://www.iana.org/domains/root/db/dm.html +// https://nic.dm/policies/pdf/DMRulesandGuidelines2024v1.pdf +// Confirmed by registry 2024-11-19 +dm +co.dm +com.dm +edu.dm +gov.dm +net.dm +org.dm + +// do : https://www.iana.org/domains/root/db/do.html +do +art.do +com.do +edu.do +gob.do +gov.do +mil.do +net.do +org.do +sld.do +web.do + +// dz : http://www.nic.dz/images/pdf_nic/charte.pdf +dz +art.dz +asso.dz +com.dz +edu.dz +gov.dz +net.dz +org.dz +pol.dz +soc.dz +tm.dz + +// ec : https://www.nic.ec/ +// Submitted by registry +ec +abg.ec +adm.ec +agron.ec +arqt.ec +art.ec +bar.ec +chef.ec +com.ec +cont.ec +cpa.ec +cue.ec +dent.ec +dgn.ec +disco.ec +doc.ec +edu.ec +eng.ec +esm.ec +fin.ec +fot.ec +gal.ec +gob.ec +gov.ec +gye.ec +ibr.ec +info.ec +k12.ec +lat.ec +loj.ec +med.ec +mil.ec +mktg.ec +mon.ec +net.ec +ntr.ec +odont.ec +org.ec +pro.ec +prof.ec +psic.ec +psiq.ec +pub.ec +rio.ec +rrpp.ec +sal.ec +tech.ec +tul.ec +tur.ec +uio.ec +vet.ec +xxx.ec + +// edu : https://www.iana.org/domains/root/db/edu.html +edu + +// ee : https://www.internet.ee/domains/general-domains-and-procedure-for-registration-of-sub-domains-under-general-domains +ee +aip.ee +com.ee +edu.ee +fie.ee +gov.ee +lib.ee +med.ee +org.ee +pri.ee +riik.ee + +// eg : https://www.iana.org/domains/root/db/eg.html +// https://domain.eg/en/domain-rules/subdomain-names-types/ +eg +ac.eg +com.eg +edu.eg +eun.eg +gov.eg +info.eg +me.eg +mil.eg +name.eg +net.eg +org.eg +sci.eg +sport.eg +tv.eg + +// er : https://www.iana.org/domains/root/db/er.html +*.er + +// es : https://www.dominios.es/en +es +com.es +edu.es +gob.es +nom.es +org.es + +// et : https://www.iana.org/domains/root/db/et.html +et +biz.et +com.et +edu.et +gov.et +info.et +name.et +net.et +org.et + +// eu : https://www.iana.org/domains/root/db/eu.html +eu + +// fi : https://www.iana.org/domains/root/db/fi.html +fi +// aland.fi : https://www.iana.org/domains/root/db/ax.html +// This domain is being phased out in favor of .ax. As there are still many +// domains under aland.fi, we still keep it on the list until aland.fi is +// completely removed. +aland.fi + +// fj : http://domains.fj/ +// Submitted by registry 2020-02-11 +fj +ac.fj +biz.fj +com.fj +gov.fj +info.fj +mil.fj +name.fj +net.fj +org.fj +pro.fj + +// fk : https://www.iana.org/domains/root/db/fk.html +*.fk + +// fm : https://www.iana.org/domains/root/db/fm.html +fm +com.fm +edu.fm +net.fm +org.fm + +// fo : https://www.iana.org/domains/root/db/fo.html +fo + +// fr : https://www.afnic.fr/ https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +fr +asso.fr +com.fr +gouv.fr +nom.fr +prd.fr +tm.fr +// Other SLDs now selfmanaged out of AFNIC range. Former "domaines sectoriels", still registration suffixes +avoues.fr +cci.fr +greta.fr +huissier-justice.fr + +// ga : https://www.iana.org/domains/root/db/ga.html +ga + +// gb : This registry is effectively dormant +// Submitted by registry +gb + +// gd : https://www.iana.org/domains/root/db/gd.html +gd +edu.gd +gov.gd + +// ge : https://nic.ge/en/administrator/the-ge-domain-regulations +// Confirmed by registry 2024-11-20 +ge +com.ge +edu.ge +gov.ge +net.ge +org.ge +pvt.ge +school.ge + +// gf : https://www.iana.org/domains/root/db/gf.html +gf + +// gg : https://www.channelisles.net/register-1/register-direct +// Confirmed by registry 2013-11-28 +gg +co.gg +net.gg +org.gg + +// gh : https://www.iana.org/domains/root/db/gh.html +// https://www.nic.gh/ +// Although domains directly at second level are not possible at the moment, +// they have been possible for some time and may come back. +gh +biz.gh +com.gh +edu.gh +gov.gh +mil.gh +net.gh +org.gh + +// gi : http://www.nic.gi/rules.html +gi +com.gi +edu.gi +gov.gi +ltd.gi +mod.gi +org.gi + +// gl : https://www.iana.org/domains/root/db/gl.html +// http://nic.gl +gl +co.gl +com.gl +edu.gl +net.gl +org.gl + +// gm : http://www.nic.gm/htmlpages%5Cgm-policy.htm +gm + +// gn : http://psg.com/dns/gn/gn.txt +// Submitted by registry +gn +ac.gn +com.gn +edu.gn +gov.gn +net.gn +org.gn + +// gov : https://www.iana.org/domains/root/db/gov.html +gov + +// gp : http://www.nic.gp/index.php?lang=en +gp +asso.gp +com.gp +edu.gp +mobi.gp +net.gp +org.gp + +// gq : https://www.iana.org/domains/root/db/gq.html +gq + +// gr : https://www.iana.org/domains/root/db/gr.html +// Submitted by registry +gr +com.gr +edu.gr +gov.gr +net.gr +org.gr + +// gs : https://www.iana.org/domains/root/db/gs.html +gs + +// gt : https://www.gt/sitio/registration_policy.php?lang=en +gt +com.gt +edu.gt +gob.gt +ind.gt +mil.gt +net.gt +org.gt + +// gu : http://gadao.gov.gu/register.html +// University of Guam : https://www.uog.edu +// Submitted by uognoc@triton.uog.edu +gu +com.gu +edu.gu +gov.gu +guam.gu +info.gu +net.gu +org.gu +web.gu + +// gw : https://www.iana.org/domains/root/db/gw.html +// gw : https://nic.gw/regras/ +gw + +// gy : https://www.iana.org/domains/root/db/gy.html +// http://registry.gy/ +gy +co.gy +com.gy +edu.gy +gov.gy +net.gy +org.gy + +// hk : https://www.hkirc.hk +// Submitted by registry +hk +com.hk +edu.hk +gov.hk +idv.hk +net.hk +org.hk +个人.hk +個人.hk +公司.hk +政府.hk +敎育.hk +教育.hk +箇人.hk +組織.hk +組织.hk +網絡.hk +網络.hk +组織.hk +组织.hk +网絡.hk +网络.hk + +// hm : https://www.iana.org/domains/root/db/hm.html +hm + +// hn : https://www.iana.org/domains/root/db/hn.html +hn +com.hn +edu.hn +gob.hn +mil.hn +net.hn +org.hn + +// hr : http://www.dns.hr/documents/pdf/HRTLD-regulations.pdf +hr +com.hr +from.hr +iz.hr +name.hr + +// ht : http://www.nic.ht/info/charte.cfm +ht +adult.ht +art.ht +asso.ht +com.ht +coop.ht +edu.ht +firm.ht +gouv.ht +info.ht +med.ht +net.ht +org.ht +perso.ht +pol.ht +pro.ht +rel.ht +shop.ht + +// hu : https://www.iana.org/domains/root/db/hu.html +// Confirmed by registry 2008-06-12 +hu +2000.hu +agrar.hu +bolt.hu +casino.hu +city.hu +co.hu +erotica.hu +erotika.hu +film.hu +forum.hu +games.hu +hotel.hu +info.hu +ingatlan.hu +jogasz.hu +konyvelo.hu +lakas.hu +media.hu +news.hu +org.hu +priv.hu +reklam.hu +sex.hu +shop.hu +sport.hu +suli.hu +szex.hu +tm.hu +tozsde.hu +utazas.hu +video.hu + +// id : https://www.iana.org/domains/root/db/id.html +id +ac.id +biz.id +co.id +desa.id +go.id +kop.id +mil.id +my.id +net.id +or.id +ponpes.id +sch.id +web.id + +// ie : https://www.iana.org/domains/root/db/ie.html +ie +gov.ie + +// il : http://www.isoc.org.il/domains/ +// see also: https://en.isoc.org.il/il-cctld/registration-rules +// ISOC-IL (operated by .il Registry) +il +ac.il +co.il +gov.il +idf.il +k12.il +muni.il +net.il +org.il +// xn--4dbrk0ce ("Israel", Hebrew) : IL +ישראל +// xn--4dbgdty6c.xn--4dbrk0ce. +אקדמיה.ישראל +// xn--5dbhl8d.xn--4dbrk0ce. +ישוב.ישראל +// xn--8dbq2a.xn--4dbrk0ce. +צהל.ישראל +// xn--hebda8b.xn--4dbrk0ce. +ממשל.ישראל + +// im : https://www.nic.im/ +// Submitted by registry +im +ac.im +co.im +ltd.co.im +plc.co.im +com.im +net.im +org.im +tt.im +tv.im + +// in : https://www.iana.org/domains/root/db/in.html +// see also: https://registry.in/policies +// Please note, that nic.in is not an official eTLD, but used by most +// government institutions. +in +5g.in +6g.in +ac.in +ai.in +am.in +bihar.in +biz.in +business.in +ca.in +cn.in +co.in +com.in +coop.in +cs.in +delhi.in +dr.in +edu.in +er.in +firm.in +gen.in +gov.in +gujarat.in +ind.in +info.in +int.in +internet.in +io.in +me.in +mil.in +net.in +nic.in +org.in +pg.in +post.in +pro.in +res.in +travel.in +tv.in +uk.in +up.in +us.in + +// info : https://www.iana.org/domains/root/db/info.html +info + +// int : https://www.iana.org/domains/root/db/int.html +// Confirmed by registry 2008-06-18 +int +eu.int + +// io : http://www.nic.io/rules.htm +io +co.io +com.io +edu.io +gov.io +mil.io +net.io +nom.io +org.io + +// iq : http://www.cmc.iq/english/iq/iqregister1.htm +iq +com.iq +edu.iq +gov.iq +mil.iq +net.iq +org.iq + +// ir : http://www.nic.ir/Terms_and_Conditions_ir,_Appendix_1_Domain_Rules +// Also see http://www.nic.ir/Internationalized_Domain_Names +// Two .ir entries added at request of , 2010-04-16 +ir +ac.ir +co.ir +gov.ir +id.ir +net.ir +org.ir +sch.ir +// xn--mgba3a4f16a.ir (.ir, Persian YEH) +ایران.ir +// xn--mgba3a4fra.ir (.ir, Arabic YEH) +ايران.ir + +// is : http://www.isnic.is/domain/rules.php +// Confirmed by registry 2024-11-17 +is + +// it : https://www.iana.org/domains/root/db/it.html +// https://www.nic.it/ +it +edu.it +gov.it +// Regions (3.3.1) +// https://www.nic.it/en/manage-your-it/forms-and-docs -> "Assignment and Management of domain names" +abr.it +abruzzo.it +aosta-valley.it +aostavalley.it +bas.it +basilicata.it +cal.it +calabria.it +cam.it +campania.it +emilia-romagna.it +emiliaromagna.it +emr.it +friuli-v-giulia.it +friuli-ve-giulia.it +friuli-vegiulia.it +friuli-venezia-giulia.it +friuli-veneziagiulia.it +friuli-vgiulia.it +friuliv-giulia.it +friulive-giulia.it +friulivegiulia.it +friulivenezia-giulia.it +friuliveneziagiulia.it +friulivgiulia.it +fvg.it +laz.it +lazio.it +lig.it +liguria.it +lom.it +lombardia.it +lombardy.it +lucania.it +mar.it +marche.it +mol.it +molise.it +piedmont.it +piemonte.it +pmn.it +pug.it +puglia.it +sar.it +sardegna.it +sardinia.it +sic.it +sicilia.it +sicily.it +taa.it +tos.it +toscana.it +trentin-sud-tirol.it +trentin-süd-tirol.it +trentin-sudtirol.it +trentin-südtirol.it +trentin-sued-tirol.it +trentin-suedtirol.it +trentino.it +trentino-a-adige.it +trentino-aadige.it +trentino-alto-adige.it +trentino-altoadige.it +trentino-s-tirol.it +trentino-stirol.it +trentino-sud-tirol.it +trentino-süd-tirol.it +trentino-sudtirol.it +trentino-südtirol.it +trentino-sued-tirol.it +trentino-suedtirol.it +trentinoa-adige.it +trentinoaadige.it +trentinoalto-adige.it +trentinoaltoadige.it +trentinos-tirol.it +trentinostirol.it +trentinosud-tirol.it +trentinosüd-tirol.it +trentinosudtirol.it +trentinosüdtirol.it +trentinosued-tirol.it +trentinosuedtirol.it +trentinsud-tirol.it +trentinsüd-tirol.it +trentinsudtirol.it +trentinsüdtirol.it +trentinsued-tirol.it +trentinsuedtirol.it +tuscany.it +umb.it +umbria.it +val-d-aosta.it +val-daosta.it +vald-aosta.it +valdaosta.it +valle-aosta.it +valle-d-aosta.it +valle-daosta.it +valleaosta.it +valled-aosta.it +valledaosta.it +vallee-aoste.it +vallée-aoste.it +vallee-d-aoste.it +vallée-d-aoste.it +valleeaoste.it +valléeaoste.it +valleedaoste.it +valléedaoste.it +vao.it +vda.it +ven.it +veneto.it +// Provinces (3.3.2) +ag.it +agrigento.it +al.it +alessandria.it +alto-adige.it +altoadige.it +an.it +ancona.it +andria-barletta-trani.it +andria-trani-barletta.it +andriabarlettatrani.it +andriatranibarletta.it +ao.it +aosta.it +aoste.it +ap.it +aq.it +aquila.it +ar.it +arezzo.it +ascoli-piceno.it +ascolipiceno.it +asti.it +at.it +av.it +avellino.it +ba.it +balsan.it +balsan-sudtirol.it +balsan-südtirol.it +balsan-suedtirol.it +bari.it +barletta-trani-andria.it +barlettatraniandria.it +belluno.it +benevento.it +bergamo.it +bg.it +bi.it +biella.it +bl.it +bn.it +bo.it +bologna.it +bolzano.it +bolzano-altoadige.it +bozen.it +bozen-sudtirol.it +bozen-südtirol.it +bozen-suedtirol.it +br.it +brescia.it +brindisi.it +bs.it +bt.it +bulsan.it +bulsan-sudtirol.it +bulsan-südtirol.it +bulsan-suedtirol.it +bz.it +ca.it +cagliari.it +caltanissetta.it +campidano-medio.it +campidanomedio.it +campobasso.it +carbonia-iglesias.it +carboniaiglesias.it +carrara-massa.it +carraramassa.it +caserta.it +catania.it +catanzaro.it +cb.it +ce.it +cesena-forli.it +cesena-forlì.it +cesenaforli.it +cesenaforlì.it +ch.it +chieti.it +ci.it +cl.it +cn.it +co.it +como.it +cosenza.it +cr.it +cremona.it +crotone.it +cs.it +ct.it +cuneo.it +cz.it +dell-ogliastra.it +dellogliastra.it +en.it +enna.it +fc.it +fe.it +fermo.it +ferrara.it +fg.it +fi.it +firenze.it +florence.it +fm.it +foggia.it +forli-cesena.it +forlì-cesena.it +forlicesena.it +forlìcesena.it +fr.it +frosinone.it +ge.it +genoa.it +genova.it +go.it +gorizia.it +gr.it +grosseto.it +iglesias-carbonia.it +iglesiascarbonia.it +im.it +imperia.it +is.it +isernia.it +kr.it +la-spezia.it +laquila.it +laspezia.it +latina.it +lc.it +le.it +lecce.it +lecco.it +li.it +livorno.it +lo.it +lodi.it +lt.it +lu.it +lucca.it +macerata.it +mantova.it +massa-carrara.it +massacarrara.it +matera.it +mb.it +mc.it +me.it +medio-campidano.it +mediocampidano.it +messina.it +mi.it +milan.it +milano.it +mn.it +mo.it +modena.it +monza.it +monza-brianza.it +monza-e-della-brianza.it +monzabrianza.it +monzaebrianza.it +monzaedellabrianza.it +ms.it +mt.it +na.it +naples.it +napoli.it +no.it +novara.it +nu.it +nuoro.it +og.it +ogliastra.it +olbia-tempio.it +olbiatempio.it +or.it +oristano.it +ot.it +pa.it +padova.it +padua.it +palermo.it +parma.it +pavia.it +pc.it +pd.it +pe.it +perugia.it +pesaro-urbino.it +pesarourbino.it +pescara.it +pg.it +pi.it +piacenza.it +pisa.it +pistoia.it +pn.it +po.it +pordenone.it +potenza.it +pr.it +prato.it +pt.it +pu.it +pv.it +pz.it +ra.it +ragusa.it +ravenna.it +rc.it +re.it +reggio-calabria.it +reggio-emilia.it +reggiocalabria.it +reggioemilia.it +rg.it +ri.it +rieti.it +rimini.it +rm.it +rn.it +ro.it +roma.it +rome.it +rovigo.it +sa.it +salerno.it +sassari.it +savona.it +si.it +siena.it +siracusa.it +so.it +sondrio.it +sp.it +sr.it +ss.it +südtirol.it +suedtirol.it +sv.it +ta.it +taranto.it +te.it +tempio-olbia.it +tempioolbia.it +teramo.it +terni.it +tn.it +to.it +torino.it +tp.it +tr.it +trani-andria-barletta.it +trani-barletta-andria.it +traniandriabarletta.it +tranibarlettaandria.it +trapani.it +trento.it +treviso.it +trieste.it +ts.it +turin.it +tv.it +ud.it +udine.it +urbino-pesaro.it +urbinopesaro.it +va.it +varese.it +vb.it +vc.it +ve.it +venezia.it +venice.it +verbania.it +vercelli.it +verona.it +vi.it +vibo-valentia.it +vibovalentia.it +vicenza.it +viterbo.it +vr.it +vs.it +vt.it +vv.it + +// je : https://www.iana.org/domains/root/db/je.html +// Confirmed by registry 2013-11-28 +je +co.je +net.je +org.je + +// jm : http://www.com.jm/register.html +*.jm + +// jo : https://www.dns.jo/JoFamily.aspx +// Confirmed by registry 2024-11-17 +jo +agri.jo +ai.jo +com.jo +edu.jo +eng.jo +fm.jo +gov.jo +mil.jo +net.jo +org.jo +per.jo +phd.jo +sch.jo +tv.jo + +// jobs : https://www.iana.org/domains/root/db/jobs.html +jobs + +// jp : https://www.iana.org/domains/root/db/jp.html +// http://jprs.co.jp/en/jpdomain.html +// Confirmed by registry 2024-11-22 +jp +// jp organizational type names +ac.jp +ad.jp +co.jp +ed.jp +go.jp +gr.jp +lg.jp +ne.jp +or.jp +// jp prefecture type names +aichi.jp +akita.jp +aomori.jp +chiba.jp +ehime.jp +fukui.jp +fukuoka.jp +fukushima.jp +gifu.jp +gunma.jp +hiroshima.jp +hokkaido.jp +hyogo.jp +ibaraki.jp +ishikawa.jp +iwate.jp +kagawa.jp +kagoshima.jp +kanagawa.jp +kochi.jp +kumamoto.jp +kyoto.jp +mie.jp +miyagi.jp +miyazaki.jp +nagano.jp +nagasaki.jp +nara.jp +niigata.jp +oita.jp +okayama.jp +okinawa.jp +osaka.jp +saga.jp +saitama.jp +shiga.jp +shimane.jp +shizuoka.jp +tochigi.jp +tokushima.jp +tokyo.jp +tottori.jp +toyama.jp +wakayama.jp +yamagata.jp +yamaguchi.jp +yamanashi.jp +三重.jp +京都.jp +佐賀.jp +兵庫.jp +北海道.jp +千葉.jp +和歌山.jp +埼玉.jp +大分.jp +大阪.jp +奈良.jp +宮城.jp +宮崎.jp +富山.jp +山口.jp +山形.jp +山梨.jp +岐阜.jp +岡山.jp +岩手.jp +島根.jp +広島.jp +徳島.jp +愛媛.jp +愛知.jp +新潟.jp +東京.jp +栃木.jp +沖縄.jp +滋賀.jp +熊本.jp +石川.jp +神奈川.jp +福井.jp +福岡.jp +福島.jp +秋田.jp +群馬.jp +茨城.jp +長崎.jp +長野.jp +青森.jp +静岡.jp +香川.jp +高知.jp +鳥取.jp +鹿児島.jp +// jp geographic type names +// http://jprs.jp/doc/rule/saisoku-1.html +// 2024-11-22: JPRS confirmed that jp geographic type names no longer accept new registrations. +// Once all existing registrations expire (marking full discontinuation), these suffixes +// will be removed from the PSL. +*.kawasaki.jp +!city.kawasaki.jp +*.kitakyushu.jp +!city.kitakyushu.jp +*.kobe.jp +!city.kobe.jp +*.nagoya.jp +!city.nagoya.jp +*.sapporo.jp +!city.sapporo.jp +*.sendai.jp +!city.sendai.jp +*.yokohama.jp +!city.yokohama.jp +// 4th level registration +aisai.aichi.jp +ama.aichi.jp +anjo.aichi.jp +asuke.aichi.jp +chiryu.aichi.jp +chita.aichi.jp +fuso.aichi.jp +gamagori.aichi.jp +handa.aichi.jp +hazu.aichi.jp +hekinan.aichi.jp +higashiura.aichi.jp +ichinomiya.aichi.jp +inazawa.aichi.jp +inuyama.aichi.jp +isshiki.aichi.jp +iwakura.aichi.jp +kanie.aichi.jp +kariya.aichi.jp +kasugai.aichi.jp +kira.aichi.jp +kiyosu.aichi.jp +komaki.aichi.jp +konan.aichi.jp +kota.aichi.jp +mihama.aichi.jp +miyoshi.aichi.jp +nishio.aichi.jp +nisshin.aichi.jp +obu.aichi.jp +oguchi.aichi.jp +oharu.aichi.jp +okazaki.aichi.jp +owariasahi.aichi.jp +seto.aichi.jp +shikatsu.aichi.jp +shinshiro.aichi.jp +shitara.aichi.jp +tahara.aichi.jp +takahama.aichi.jp +tobishima.aichi.jp +toei.aichi.jp +togo.aichi.jp +tokai.aichi.jp +tokoname.aichi.jp +toyoake.aichi.jp +toyohashi.aichi.jp +toyokawa.aichi.jp +toyone.aichi.jp +toyota.aichi.jp +tsushima.aichi.jp +yatomi.aichi.jp +akita.akita.jp +daisen.akita.jp +fujisato.akita.jp +gojome.akita.jp +hachirogata.akita.jp +happou.akita.jp +higashinaruse.akita.jp +honjo.akita.jp +honjyo.akita.jp +ikawa.akita.jp +kamikoani.akita.jp +kamioka.akita.jp +katagami.akita.jp +kazuno.akita.jp +kitaakita.akita.jp +kosaka.akita.jp +kyowa.akita.jp +misato.akita.jp +mitane.akita.jp +moriyoshi.akita.jp +nikaho.akita.jp +noshiro.akita.jp +odate.akita.jp +oga.akita.jp +ogata.akita.jp +semboku.akita.jp +yokote.akita.jp +yurihonjo.akita.jp +aomori.aomori.jp +gonohe.aomori.jp +hachinohe.aomori.jp +hashikami.aomori.jp +hiranai.aomori.jp +hirosaki.aomori.jp +itayanagi.aomori.jp +kuroishi.aomori.jp +misawa.aomori.jp +mutsu.aomori.jp +nakadomari.aomori.jp +noheji.aomori.jp +oirase.aomori.jp +owani.aomori.jp +rokunohe.aomori.jp +sannohe.aomori.jp +shichinohe.aomori.jp +shingo.aomori.jp +takko.aomori.jp +towada.aomori.jp +tsugaru.aomori.jp +tsuruta.aomori.jp +abiko.chiba.jp +asahi.chiba.jp +chonan.chiba.jp +chosei.chiba.jp +choshi.chiba.jp +chuo.chiba.jp +funabashi.chiba.jp +futtsu.chiba.jp +hanamigawa.chiba.jp +ichihara.chiba.jp +ichikawa.chiba.jp +ichinomiya.chiba.jp +inzai.chiba.jp +isumi.chiba.jp +kamagaya.chiba.jp +kamogawa.chiba.jp +kashiwa.chiba.jp +katori.chiba.jp +katsuura.chiba.jp +kimitsu.chiba.jp +kisarazu.chiba.jp +kozaki.chiba.jp +kujukuri.chiba.jp +kyonan.chiba.jp +matsudo.chiba.jp +midori.chiba.jp +mihama.chiba.jp +minamiboso.chiba.jp +mobara.chiba.jp +mutsuzawa.chiba.jp +nagara.chiba.jp +nagareyama.chiba.jp +narashino.chiba.jp +narita.chiba.jp +noda.chiba.jp +oamishirasato.chiba.jp +omigawa.chiba.jp +onjuku.chiba.jp +otaki.chiba.jp +sakae.chiba.jp +sakura.chiba.jp +shimofusa.chiba.jp +shirako.chiba.jp +shiroi.chiba.jp +shisui.chiba.jp +sodegaura.chiba.jp +sosa.chiba.jp +tako.chiba.jp +tateyama.chiba.jp +togane.chiba.jp +tohnosho.chiba.jp +tomisato.chiba.jp +urayasu.chiba.jp +yachimata.chiba.jp +yachiyo.chiba.jp +yokaichiba.chiba.jp +yokoshibahikari.chiba.jp +yotsukaido.chiba.jp +ainan.ehime.jp +honai.ehime.jp +ikata.ehime.jp +imabari.ehime.jp +iyo.ehime.jp +kamijima.ehime.jp +kihoku.ehime.jp +kumakogen.ehime.jp +masaki.ehime.jp +matsuno.ehime.jp +matsuyama.ehime.jp +namikata.ehime.jp +niihama.ehime.jp +ozu.ehime.jp +saijo.ehime.jp +seiyo.ehime.jp +shikokuchuo.ehime.jp +tobe.ehime.jp +toon.ehime.jp +uchiko.ehime.jp +uwajima.ehime.jp +yawatahama.ehime.jp +echizen.fukui.jp +eiheiji.fukui.jp +fukui.fukui.jp +ikeda.fukui.jp +katsuyama.fukui.jp +mihama.fukui.jp +minamiechizen.fukui.jp +obama.fukui.jp +ohi.fukui.jp +ono.fukui.jp +sabae.fukui.jp +sakai.fukui.jp +takahama.fukui.jp +tsuruga.fukui.jp +wakasa.fukui.jp +ashiya.fukuoka.jp +buzen.fukuoka.jp +chikugo.fukuoka.jp +chikuho.fukuoka.jp +chikujo.fukuoka.jp +chikushino.fukuoka.jp +chikuzen.fukuoka.jp +chuo.fukuoka.jp +dazaifu.fukuoka.jp +fukuchi.fukuoka.jp +hakata.fukuoka.jp +higashi.fukuoka.jp +hirokawa.fukuoka.jp +hisayama.fukuoka.jp +iizuka.fukuoka.jp +inatsuki.fukuoka.jp +kaho.fukuoka.jp +kasuga.fukuoka.jp +kasuya.fukuoka.jp +kawara.fukuoka.jp +keisen.fukuoka.jp +koga.fukuoka.jp +kurate.fukuoka.jp +kurogi.fukuoka.jp +kurume.fukuoka.jp +minami.fukuoka.jp +miyako.fukuoka.jp +miyama.fukuoka.jp +miyawaka.fukuoka.jp +mizumaki.fukuoka.jp +munakata.fukuoka.jp +nakagawa.fukuoka.jp +nakama.fukuoka.jp +nishi.fukuoka.jp +nogata.fukuoka.jp +ogori.fukuoka.jp +okagaki.fukuoka.jp +okawa.fukuoka.jp +oki.fukuoka.jp +omuta.fukuoka.jp +onga.fukuoka.jp +onojo.fukuoka.jp +oto.fukuoka.jp +saigawa.fukuoka.jp +sasaguri.fukuoka.jp +shingu.fukuoka.jp +shinyoshitomi.fukuoka.jp +shonai.fukuoka.jp +soeda.fukuoka.jp +sue.fukuoka.jp +tachiarai.fukuoka.jp +tagawa.fukuoka.jp +takata.fukuoka.jp +toho.fukuoka.jp +toyotsu.fukuoka.jp +tsuiki.fukuoka.jp +ukiha.fukuoka.jp +umi.fukuoka.jp +usui.fukuoka.jp +yamada.fukuoka.jp +yame.fukuoka.jp +yanagawa.fukuoka.jp +yukuhashi.fukuoka.jp +aizubange.fukushima.jp +aizumisato.fukushima.jp +aizuwakamatsu.fukushima.jp +asakawa.fukushima.jp +bandai.fukushima.jp +date.fukushima.jp +fukushima.fukushima.jp +furudono.fukushima.jp +futaba.fukushima.jp +hanawa.fukushima.jp +higashi.fukushima.jp +hirata.fukushima.jp +hirono.fukushima.jp +iitate.fukushima.jp +inawashiro.fukushima.jp +ishikawa.fukushima.jp +iwaki.fukushima.jp +izumizaki.fukushima.jp +kagamiishi.fukushima.jp +kaneyama.fukushima.jp +kawamata.fukushima.jp +kitakata.fukushima.jp +kitashiobara.fukushima.jp +koori.fukushima.jp +koriyama.fukushima.jp +kunimi.fukushima.jp +miharu.fukushima.jp +mishima.fukushima.jp +namie.fukushima.jp +nango.fukushima.jp +nishiaizu.fukushima.jp +nishigo.fukushima.jp +okuma.fukushima.jp +omotego.fukushima.jp +ono.fukushima.jp +otama.fukushima.jp +samegawa.fukushima.jp +shimogo.fukushima.jp +shirakawa.fukushima.jp +showa.fukushima.jp +soma.fukushima.jp +sukagawa.fukushima.jp +taishin.fukushima.jp +tamakawa.fukushima.jp +tanagura.fukushima.jp +tenei.fukushima.jp +yabuki.fukushima.jp +yamato.fukushima.jp +yamatsuri.fukushima.jp +yanaizu.fukushima.jp +yugawa.fukushima.jp +anpachi.gifu.jp +ena.gifu.jp +gifu.gifu.jp +ginan.gifu.jp +godo.gifu.jp +gujo.gifu.jp +hashima.gifu.jp +hichiso.gifu.jp +hida.gifu.jp +higashishirakawa.gifu.jp +ibigawa.gifu.jp +ikeda.gifu.jp +kakamigahara.gifu.jp +kani.gifu.jp +kasahara.gifu.jp +kasamatsu.gifu.jp +kawaue.gifu.jp +kitagata.gifu.jp +mino.gifu.jp +minokamo.gifu.jp +mitake.gifu.jp +mizunami.gifu.jp +motosu.gifu.jp +nakatsugawa.gifu.jp +ogaki.gifu.jp +sakahogi.gifu.jp +seki.gifu.jp +sekigahara.gifu.jp +shirakawa.gifu.jp +tajimi.gifu.jp +takayama.gifu.jp +tarui.gifu.jp +toki.gifu.jp +tomika.gifu.jp +wanouchi.gifu.jp +yamagata.gifu.jp +yaotsu.gifu.jp +yoro.gifu.jp +annaka.gunma.jp +chiyoda.gunma.jp +fujioka.gunma.jp +higashiagatsuma.gunma.jp +isesaki.gunma.jp +itakura.gunma.jp +kanna.gunma.jp +kanra.gunma.jp +katashina.gunma.jp +kawaba.gunma.jp +kiryu.gunma.jp +kusatsu.gunma.jp +maebashi.gunma.jp +meiwa.gunma.jp +midori.gunma.jp +minakami.gunma.jp +naganohara.gunma.jp +nakanojo.gunma.jp +nanmoku.gunma.jp +numata.gunma.jp +oizumi.gunma.jp +ora.gunma.jp +ota.gunma.jp +shibukawa.gunma.jp +shimonita.gunma.jp +shinto.gunma.jp +showa.gunma.jp +takasaki.gunma.jp +takayama.gunma.jp +tamamura.gunma.jp +tatebayashi.gunma.jp +tomioka.gunma.jp +tsukiyono.gunma.jp +tsumagoi.gunma.jp +ueno.gunma.jp +yoshioka.gunma.jp +asaminami.hiroshima.jp +daiwa.hiroshima.jp +etajima.hiroshima.jp +fuchu.hiroshima.jp +fukuyama.hiroshima.jp +hatsukaichi.hiroshima.jp +higashihiroshima.hiroshima.jp +hongo.hiroshima.jp +jinsekikogen.hiroshima.jp +kaita.hiroshima.jp +kui.hiroshima.jp +kumano.hiroshima.jp +kure.hiroshima.jp +mihara.hiroshima.jp +miyoshi.hiroshima.jp +naka.hiroshima.jp +onomichi.hiroshima.jp +osakikamijima.hiroshima.jp +otake.hiroshima.jp +saka.hiroshima.jp +sera.hiroshima.jp +seranishi.hiroshima.jp +shinichi.hiroshima.jp +shobara.hiroshima.jp +takehara.hiroshima.jp +abashiri.hokkaido.jp +abira.hokkaido.jp +aibetsu.hokkaido.jp +akabira.hokkaido.jp +akkeshi.hokkaido.jp +asahikawa.hokkaido.jp +ashibetsu.hokkaido.jp +ashoro.hokkaido.jp +assabu.hokkaido.jp +atsuma.hokkaido.jp +bibai.hokkaido.jp +biei.hokkaido.jp +bifuka.hokkaido.jp +bihoro.hokkaido.jp +biratori.hokkaido.jp +chippubetsu.hokkaido.jp +chitose.hokkaido.jp +date.hokkaido.jp +ebetsu.hokkaido.jp +embetsu.hokkaido.jp +eniwa.hokkaido.jp +erimo.hokkaido.jp +esan.hokkaido.jp +esashi.hokkaido.jp +fukagawa.hokkaido.jp +fukushima.hokkaido.jp +furano.hokkaido.jp +furubira.hokkaido.jp +haboro.hokkaido.jp +hakodate.hokkaido.jp +hamatonbetsu.hokkaido.jp +hidaka.hokkaido.jp +higashikagura.hokkaido.jp +higashikawa.hokkaido.jp +hiroo.hokkaido.jp +hokuryu.hokkaido.jp +hokuto.hokkaido.jp +honbetsu.hokkaido.jp +horokanai.hokkaido.jp +horonobe.hokkaido.jp +ikeda.hokkaido.jp +imakane.hokkaido.jp +ishikari.hokkaido.jp +iwamizawa.hokkaido.jp +iwanai.hokkaido.jp +kamifurano.hokkaido.jp +kamikawa.hokkaido.jp +kamishihoro.hokkaido.jp +kamisunagawa.hokkaido.jp +kamoenai.hokkaido.jp +kayabe.hokkaido.jp +kembuchi.hokkaido.jp +kikonai.hokkaido.jp +kimobetsu.hokkaido.jp +kitahiroshima.hokkaido.jp +kitami.hokkaido.jp +kiyosato.hokkaido.jp +koshimizu.hokkaido.jp +kunneppu.hokkaido.jp +kuriyama.hokkaido.jp +kuromatsunai.hokkaido.jp +kushiro.hokkaido.jp +kutchan.hokkaido.jp +kyowa.hokkaido.jp +mashike.hokkaido.jp +matsumae.hokkaido.jp +mikasa.hokkaido.jp +minamifurano.hokkaido.jp +mombetsu.hokkaido.jp +moseushi.hokkaido.jp +mukawa.hokkaido.jp +muroran.hokkaido.jp +naie.hokkaido.jp +nakagawa.hokkaido.jp +nakasatsunai.hokkaido.jp +nakatombetsu.hokkaido.jp +nanae.hokkaido.jp +nanporo.hokkaido.jp +nayoro.hokkaido.jp +nemuro.hokkaido.jp +niikappu.hokkaido.jp +niki.hokkaido.jp +nishiokoppe.hokkaido.jp +noboribetsu.hokkaido.jp +numata.hokkaido.jp +obihiro.hokkaido.jp +obira.hokkaido.jp +oketo.hokkaido.jp +okoppe.hokkaido.jp +otaru.hokkaido.jp +otobe.hokkaido.jp +otofuke.hokkaido.jp +otoineppu.hokkaido.jp +oumu.hokkaido.jp +ozora.hokkaido.jp +pippu.hokkaido.jp +rankoshi.hokkaido.jp +rebun.hokkaido.jp +rikubetsu.hokkaido.jp +rishiri.hokkaido.jp +rishirifuji.hokkaido.jp +saroma.hokkaido.jp +sarufutsu.hokkaido.jp +shakotan.hokkaido.jp +shari.hokkaido.jp +shibecha.hokkaido.jp +shibetsu.hokkaido.jp +shikabe.hokkaido.jp +shikaoi.hokkaido.jp +shimamaki.hokkaido.jp +shimizu.hokkaido.jp +shimokawa.hokkaido.jp +shinshinotsu.hokkaido.jp +shintoku.hokkaido.jp +shiranuka.hokkaido.jp +shiraoi.hokkaido.jp +shiriuchi.hokkaido.jp +sobetsu.hokkaido.jp +sunagawa.hokkaido.jp +taiki.hokkaido.jp +takasu.hokkaido.jp +takikawa.hokkaido.jp +takinoue.hokkaido.jp +teshikaga.hokkaido.jp +tobetsu.hokkaido.jp +tohma.hokkaido.jp +tomakomai.hokkaido.jp +tomari.hokkaido.jp +toya.hokkaido.jp +toyako.hokkaido.jp +toyotomi.hokkaido.jp +toyoura.hokkaido.jp +tsubetsu.hokkaido.jp +tsukigata.hokkaido.jp +urakawa.hokkaido.jp +urausu.hokkaido.jp +uryu.hokkaido.jp +utashinai.hokkaido.jp +wakkanai.hokkaido.jp +wassamu.hokkaido.jp +yakumo.hokkaido.jp +yoichi.hokkaido.jp +aioi.hyogo.jp +akashi.hyogo.jp +ako.hyogo.jp +amagasaki.hyogo.jp +aogaki.hyogo.jp +asago.hyogo.jp +ashiya.hyogo.jp +awaji.hyogo.jp +fukusaki.hyogo.jp +goshiki.hyogo.jp +harima.hyogo.jp +himeji.hyogo.jp +ichikawa.hyogo.jp +inagawa.hyogo.jp +itami.hyogo.jp +kakogawa.hyogo.jp +kamigori.hyogo.jp +kamikawa.hyogo.jp +kasai.hyogo.jp +kasuga.hyogo.jp +kawanishi.hyogo.jp +miki.hyogo.jp +minamiawaji.hyogo.jp +nishinomiya.hyogo.jp +nishiwaki.hyogo.jp +ono.hyogo.jp +sanda.hyogo.jp +sannan.hyogo.jp +sasayama.hyogo.jp +sayo.hyogo.jp +shingu.hyogo.jp +shinonsen.hyogo.jp +shiso.hyogo.jp +sumoto.hyogo.jp +taishi.hyogo.jp +taka.hyogo.jp +takarazuka.hyogo.jp +takasago.hyogo.jp +takino.hyogo.jp +tamba.hyogo.jp +tatsuno.hyogo.jp +toyooka.hyogo.jp +yabu.hyogo.jp +yashiro.hyogo.jp +yoka.hyogo.jp +yokawa.hyogo.jp +ami.ibaraki.jp +asahi.ibaraki.jp +bando.ibaraki.jp +chikusei.ibaraki.jp +daigo.ibaraki.jp +fujishiro.ibaraki.jp +hitachi.ibaraki.jp +hitachinaka.ibaraki.jp +hitachiomiya.ibaraki.jp +hitachiota.ibaraki.jp +ibaraki.ibaraki.jp +ina.ibaraki.jp +inashiki.ibaraki.jp +itako.ibaraki.jp +iwama.ibaraki.jp +joso.ibaraki.jp +kamisu.ibaraki.jp +kasama.ibaraki.jp +kashima.ibaraki.jp +kasumigaura.ibaraki.jp +koga.ibaraki.jp +miho.ibaraki.jp +mito.ibaraki.jp +moriya.ibaraki.jp +naka.ibaraki.jp +namegata.ibaraki.jp +oarai.ibaraki.jp +ogawa.ibaraki.jp +omitama.ibaraki.jp +ryugasaki.ibaraki.jp +sakai.ibaraki.jp +sakuragawa.ibaraki.jp +shimodate.ibaraki.jp +shimotsuma.ibaraki.jp +shirosato.ibaraki.jp +sowa.ibaraki.jp +suifu.ibaraki.jp +takahagi.ibaraki.jp +tamatsukuri.ibaraki.jp +tokai.ibaraki.jp +tomobe.ibaraki.jp +tone.ibaraki.jp +toride.ibaraki.jp +tsuchiura.ibaraki.jp +tsukuba.ibaraki.jp +uchihara.ibaraki.jp +ushiku.ibaraki.jp +yachiyo.ibaraki.jp +yamagata.ibaraki.jp +yawara.ibaraki.jp +yuki.ibaraki.jp +anamizu.ishikawa.jp +hakui.ishikawa.jp +hakusan.ishikawa.jp +kaga.ishikawa.jp +kahoku.ishikawa.jp +kanazawa.ishikawa.jp +kawakita.ishikawa.jp +komatsu.ishikawa.jp +nakanoto.ishikawa.jp +nanao.ishikawa.jp +nomi.ishikawa.jp +nonoichi.ishikawa.jp +noto.ishikawa.jp +shika.ishikawa.jp +suzu.ishikawa.jp +tsubata.ishikawa.jp +tsurugi.ishikawa.jp +uchinada.ishikawa.jp +wajima.ishikawa.jp +fudai.iwate.jp +fujisawa.iwate.jp +hanamaki.iwate.jp +hiraizumi.iwate.jp +hirono.iwate.jp +ichinohe.iwate.jp +ichinoseki.iwate.jp +iwaizumi.iwate.jp +iwate.iwate.jp +joboji.iwate.jp +kamaishi.iwate.jp +kanegasaki.iwate.jp +karumai.iwate.jp +kawai.iwate.jp +kitakami.iwate.jp +kuji.iwate.jp +kunohe.iwate.jp +kuzumaki.iwate.jp +miyako.iwate.jp +mizusawa.iwate.jp +morioka.iwate.jp +ninohe.iwate.jp +noda.iwate.jp +ofunato.iwate.jp +oshu.iwate.jp +otsuchi.iwate.jp +rikuzentakata.iwate.jp +shiwa.iwate.jp +shizukuishi.iwate.jp +sumita.iwate.jp +tanohata.iwate.jp +tono.iwate.jp +yahaba.iwate.jp +yamada.iwate.jp +ayagawa.kagawa.jp +higashikagawa.kagawa.jp +kanonji.kagawa.jp +kotohira.kagawa.jp +manno.kagawa.jp +marugame.kagawa.jp +mitoyo.kagawa.jp +naoshima.kagawa.jp +sanuki.kagawa.jp +tadotsu.kagawa.jp +takamatsu.kagawa.jp +tonosho.kagawa.jp +uchinomi.kagawa.jp +utazu.kagawa.jp +zentsuji.kagawa.jp +akune.kagoshima.jp +amami.kagoshima.jp +hioki.kagoshima.jp +isa.kagoshima.jp +isen.kagoshima.jp +izumi.kagoshima.jp +kagoshima.kagoshima.jp +kanoya.kagoshima.jp +kawanabe.kagoshima.jp +kinko.kagoshima.jp +kouyama.kagoshima.jp +makurazaki.kagoshima.jp +matsumoto.kagoshima.jp +minamitane.kagoshima.jp +nakatane.kagoshima.jp +nishinoomote.kagoshima.jp +satsumasendai.kagoshima.jp +soo.kagoshima.jp +tarumizu.kagoshima.jp +yusui.kagoshima.jp +aikawa.kanagawa.jp +atsugi.kanagawa.jp +ayase.kanagawa.jp +chigasaki.kanagawa.jp +ebina.kanagawa.jp +fujisawa.kanagawa.jp +hadano.kanagawa.jp +hakone.kanagawa.jp +hiratsuka.kanagawa.jp +isehara.kanagawa.jp +kaisei.kanagawa.jp +kamakura.kanagawa.jp +kiyokawa.kanagawa.jp +matsuda.kanagawa.jp +minamiashigara.kanagawa.jp +miura.kanagawa.jp +nakai.kanagawa.jp +ninomiya.kanagawa.jp +odawara.kanagawa.jp +oi.kanagawa.jp +oiso.kanagawa.jp +sagamihara.kanagawa.jp +samukawa.kanagawa.jp +tsukui.kanagawa.jp +yamakita.kanagawa.jp +yamato.kanagawa.jp +yokosuka.kanagawa.jp +yugawara.kanagawa.jp +zama.kanagawa.jp +zushi.kanagawa.jp +aki.kochi.jp +geisei.kochi.jp +hidaka.kochi.jp +higashitsuno.kochi.jp +ino.kochi.jp +kagami.kochi.jp +kami.kochi.jp +kitagawa.kochi.jp +kochi.kochi.jp +mihara.kochi.jp +motoyama.kochi.jp +muroto.kochi.jp +nahari.kochi.jp +nakamura.kochi.jp +nankoku.kochi.jp +nishitosa.kochi.jp +niyodogawa.kochi.jp +ochi.kochi.jp +okawa.kochi.jp +otoyo.kochi.jp +otsuki.kochi.jp +sakawa.kochi.jp +sukumo.kochi.jp +susaki.kochi.jp +tosa.kochi.jp +tosashimizu.kochi.jp +toyo.kochi.jp +tsuno.kochi.jp +umaji.kochi.jp +yasuda.kochi.jp +yusuhara.kochi.jp +amakusa.kumamoto.jp +arao.kumamoto.jp +aso.kumamoto.jp +choyo.kumamoto.jp +gyokuto.kumamoto.jp +kamiamakusa.kumamoto.jp +kikuchi.kumamoto.jp +kumamoto.kumamoto.jp +mashiki.kumamoto.jp +mifune.kumamoto.jp +minamata.kumamoto.jp +minamioguni.kumamoto.jp +nagasu.kumamoto.jp +nishihara.kumamoto.jp +oguni.kumamoto.jp +ozu.kumamoto.jp +sumoto.kumamoto.jp +takamori.kumamoto.jp +uki.kumamoto.jp +uto.kumamoto.jp +yamaga.kumamoto.jp +yamato.kumamoto.jp +yatsushiro.kumamoto.jp +ayabe.kyoto.jp +fukuchiyama.kyoto.jp +higashiyama.kyoto.jp +ide.kyoto.jp +ine.kyoto.jp +joyo.kyoto.jp +kameoka.kyoto.jp +kamo.kyoto.jp +kita.kyoto.jp +kizu.kyoto.jp +kumiyama.kyoto.jp +kyotamba.kyoto.jp +kyotanabe.kyoto.jp +kyotango.kyoto.jp +maizuru.kyoto.jp +minami.kyoto.jp +minamiyamashiro.kyoto.jp +miyazu.kyoto.jp +muko.kyoto.jp +nagaokakyo.kyoto.jp +nakagyo.kyoto.jp +nantan.kyoto.jp +oyamazaki.kyoto.jp +sakyo.kyoto.jp +seika.kyoto.jp +tanabe.kyoto.jp +uji.kyoto.jp +ujitawara.kyoto.jp +wazuka.kyoto.jp +yamashina.kyoto.jp +yawata.kyoto.jp +asahi.mie.jp +inabe.mie.jp +ise.mie.jp +kameyama.mie.jp +kawagoe.mie.jp +kiho.mie.jp +kisosaki.mie.jp +kiwa.mie.jp +komono.mie.jp +kumano.mie.jp +kuwana.mie.jp +matsusaka.mie.jp +meiwa.mie.jp +mihama.mie.jp +minamiise.mie.jp +misugi.mie.jp +miyama.mie.jp +nabari.mie.jp +shima.mie.jp +suzuka.mie.jp +tado.mie.jp +taiki.mie.jp +taki.mie.jp +tamaki.mie.jp +toba.mie.jp +tsu.mie.jp +udono.mie.jp +ureshino.mie.jp +watarai.mie.jp +yokkaichi.mie.jp +furukawa.miyagi.jp +higashimatsushima.miyagi.jp +ishinomaki.miyagi.jp +iwanuma.miyagi.jp +kakuda.miyagi.jp +kami.miyagi.jp +kawasaki.miyagi.jp +marumori.miyagi.jp +matsushima.miyagi.jp +minamisanriku.miyagi.jp +misato.miyagi.jp +murata.miyagi.jp +natori.miyagi.jp +ogawara.miyagi.jp +ohira.miyagi.jp +onagawa.miyagi.jp +osaki.miyagi.jp +rifu.miyagi.jp +semine.miyagi.jp +shibata.miyagi.jp +shichikashuku.miyagi.jp +shikama.miyagi.jp +shiogama.miyagi.jp +shiroishi.miyagi.jp +tagajo.miyagi.jp +taiwa.miyagi.jp +tome.miyagi.jp +tomiya.miyagi.jp +wakuya.miyagi.jp +watari.miyagi.jp +yamamoto.miyagi.jp +zao.miyagi.jp +aya.miyazaki.jp +ebino.miyazaki.jp +gokase.miyazaki.jp +hyuga.miyazaki.jp +kadogawa.miyazaki.jp +kawaminami.miyazaki.jp +kijo.miyazaki.jp +kitagawa.miyazaki.jp +kitakata.miyazaki.jp +kitaura.miyazaki.jp +kobayashi.miyazaki.jp +kunitomi.miyazaki.jp +kushima.miyazaki.jp +mimata.miyazaki.jp +miyakonojo.miyazaki.jp +miyazaki.miyazaki.jp +morotsuka.miyazaki.jp +nichinan.miyazaki.jp +nishimera.miyazaki.jp +nobeoka.miyazaki.jp +saito.miyazaki.jp +shiiba.miyazaki.jp +shintomi.miyazaki.jp +takaharu.miyazaki.jp +takanabe.miyazaki.jp +takazaki.miyazaki.jp +tsuno.miyazaki.jp +achi.nagano.jp +agematsu.nagano.jp +anan.nagano.jp +aoki.nagano.jp +asahi.nagano.jp +azumino.nagano.jp +chikuhoku.nagano.jp +chikuma.nagano.jp +chino.nagano.jp +fujimi.nagano.jp +hakuba.nagano.jp +hara.nagano.jp +hiraya.nagano.jp +iida.nagano.jp +iijima.nagano.jp +iiyama.nagano.jp +iizuna.nagano.jp +ikeda.nagano.jp +ikusaka.nagano.jp +ina.nagano.jp +karuizawa.nagano.jp +kawakami.nagano.jp +kiso.nagano.jp +kisofukushima.nagano.jp +kitaaiki.nagano.jp +komagane.nagano.jp +komoro.nagano.jp +matsukawa.nagano.jp +matsumoto.nagano.jp +miasa.nagano.jp +minamiaiki.nagano.jp +minamimaki.nagano.jp +minamiminowa.nagano.jp +minowa.nagano.jp +miyada.nagano.jp +miyota.nagano.jp +mochizuki.nagano.jp +nagano.nagano.jp +nagawa.nagano.jp +nagiso.nagano.jp +nakagawa.nagano.jp +nakano.nagano.jp +nozawaonsen.nagano.jp +obuse.nagano.jp +ogawa.nagano.jp +okaya.nagano.jp +omachi.nagano.jp +omi.nagano.jp +ookuwa.nagano.jp +ooshika.nagano.jp +otaki.nagano.jp +otari.nagano.jp +sakae.nagano.jp +sakaki.nagano.jp +saku.nagano.jp +sakuho.nagano.jp +shimosuwa.nagano.jp +shinanomachi.nagano.jp +shiojiri.nagano.jp +suwa.nagano.jp +suzaka.nagano.jp +takagi.nagano.jp +takamori.nagano.jp +takayama.nagano.jp +tateshina.nagano.jp +tatsuno.nagano.jp +togakushi.nagano.jp +togura.nagano.jp +tomi.nagano.jp +ueda.nagano.jp +wada.nagano.jp +yamagata.nagano.jp +yamanouchi.nagano.jp +yasaka.nagano.jp +yasuoka.nagano.jp +chijiwa.nagasaki.jp +futsu.nagasaki.jp +goto.nagasaki.jp +hasami.nagasaki.jp +hirado.nagasaki.jp +iki.nagasaki.jp +isahaya.nagasaki.jp +kawatana.nagasaki.jp +kuchinotsu.nagasaki.jp +matsuura.nagasaki.jp +nagasaki.nagasaki.jp +obama.nagasaki.jp +omura.nagasaki.jp +oseto.nagasaki.jp +saikai.nagasaki.jp +sasebo.nagasaki.jp +seihi.nagasaki.jp +shimabara.nagasaki.jp +shinkamigoto.nagasaki.jp +togitsu.nagasaki.jp +tsushima.nagasaki.jp +unzen.nagasaki.jp +ando.nara.jp +gose.nara.jp +heguri.nara.jp +higashiyoshino.nara.jp +ikaruga.nara.jp +ikoma.nara.jp +kamikitayama.nara.jp +kanmaki.nara.jp +kashiba.nara.jp +kashihara.nara.jp +katsuragi.nara.jp +kawai.nara.jp +kawakami.nara.jp +kawanishi.nara.jp +koryo.nara.jp +kurotaki.nara.jp +mitsue.nara.jp +miyake.nara.jp +nara.nara.jp +nosegawa.nara.jp +oji.nara.jp +ouda.nara.jp +oyodo.nara.jp +sakurai.nara.jp +sango.nara.jp +shimoichi.nara.jp +shimokitayama.nara.jp +shinjo.nara.jp +soni.nara.jp +takatori.nara.jp +tawaramoto.nara.jp +tenkawa.nara.jp +tenri.nara.jp +uda.nara.jp +yamatokoriyama.nara.jp +yamatotakada.nara.jp +yamazoe.nara.jp +yoshino.nara.jp +aga.niigata.jp +agano.niigata.jp +gosen.niigata.jp +itoigawa.niigata.jp +izumozaki.niigata.jp +joetsu.niigata.jp +kamo.niigata.jp +kariwa.niigata.jp +kashiwazaki.niigata.jp +minamiuonuma.niigata.jp +mitsuke.niigata.jp +muika.niigata.jp +murakami.niigata.jp +myoko.niigata.jp +nagaoka.niigata.jp +niigata.niigata.jp +ojiya.niigata.jp +omi.niigata.jp +sado.niigata.jp +sanjo.niigata.jp +seiro.niigata.jp +seirou.niigata.jp +sekikawa.niigata.jp +shibata.niigata.jp +tagami.niigata.jp +tainai.niigata.jp +tochio.niigata.jp +tokamachi.niigata.jp +tsubame.niigata.jp +tsunan.niigata.jp +uonuma.niigata.jp +yahiko.niigata.jp +yoita.niigata.jp +yuzawa.niigata.jp +beppu.oita.jp +bungoono.oita.jp +bungotakada.oita.jp +hasama.oita.jp +hiji.oita.jp +himeshima.oita.jp +hita.oita.jp +kamitsue.oita.jp +kokonoe.oita.jp +kuju.oita.jp +kunisaki.oita.jp +kusu.oita.jp +oita.oita.jp +saiki.oita.jp +taketa.oita.jp +tsukumi.oita.jp +usa.oita.jp +usuki.oita.jp +yufu.oita.jp +akaiwa.okayama.jp +asakuchi.okayama.jp +bizen.okayama.jp +hayashima.okayama.jp +ibara.okayama.jp +kagamino.okayama.jp +kasaoka.okayama.jp +kibichuo.okayama.jp +kumenan.okayama.jp +kurashiki.okayama.jp +maniwa.okayama.jp +misaki.okayama.jp +nagi.okayama.jp +niimi.okayama.jp +nishiawakura.okayama.jp +okayama.okayama.jp +satosho.okayama.jp +setouchi.okayama.jp +shinjo.okayama.jp +shoo.okayama.jp +soja.okayama.jp +takahashi.okayama.jp +tamano.okayama.jp +tsuyama.okayama.jp +wake.okayama.jp +yakage.okayama.jp +aguni.okinawa.jp +ginowan.okinawa.jp +ginoza.okinawa.jp +gushikami.okinawa.jp +haebaru.okinawa.jp +higashi.okinawa.jp +hirara.okinawa.jp +iheya.okinawa.jp +ishigaki.okinawa.jp +ishikawa.okinawa.jp +itoman.okinawa.jp +izena.okinawa.jp +kadena.okinawa.jp +kin.okinawa.jp +kitadaito.okinawa.jp +kitanakagusuku.okinawa.jp +kumejima.okinawa.jp +kunigami.okinawa.jp +minamidaito.okinawa.jp +motobu.okinawa.jp +nago.okinawa.jp +naha.okinawa.jp +nakagusuku.okinawa.jp +nakijin.okinawa.jp +nanjo.okinawa.jp +nishihara.okinawa.jp +ogimi.okinawa.jp +okinawa.okinawa.jp +onna.okinawa.jp +shimoji.okinawa.jp +taketomi.okinawa.jp +tarama.okinawa.jp +tokashiki.okinawa.jp +tomigusuku.okinawa.jp +tonaki.okinawa.jp +urasoe.okinawa.jp +uruma.okinawa.jp +yaese.okinawa.jp +yomitan.okinawa.jp +yonabaru.okinawa.jp +yonaguni.okinawa.jp +zamami.okinawa.jp +abeno.osaka.jp +chihayaakasaka.osaka.jp +chuo.osaka.jp +daito.osaka.jp +fujiidera.osaka.jp +habikino.osaka.jp +hannan.osaka.jp +higashiosaka.osaka.jp +higashisumiyoshi.osaka.jp +higashiyodogawa.osaka.jp +hirakata.osaka.jp +ibaraki.osaka.jp +ikeda.osaka.jp +izumi.osaka.jp +izumiotsu.osaka.jp +izumisano.osaka.jp +kadoma.osaka.jp +kaizuka.osaka.jp +kanan.osaka.jp +kashiwara.osaka.jp +katano.osaka.jp +kawachinagano.osaka.jp +kishiwada.osaka.jp +kita.osaka.jp +kumatori.osaka.jp +matsubara.osaka.jp +minato.osaka.jp +minoh.osaka.jp +misaki.osaka.jp +moriguchi.osaka.jp +neyagawa.osaka.jp +nishi.osaka.jp +nose.osaka.jp +osakasayama.osaka.jp +sakai.osaka.jp +sayama.osaka.jp +sennan.osaka.jp +settsu.osaka.jp +shijonawate.osaka.jp +shimamoto.osaka.jp +suita.osaka.jp +tadaoka.osaka.jp +taishi.osaka.jp +tajiri.osaka.jp +takaishi.osaka.jp +takatsuki.osaka.jp +tondabayashi.osaka.jp +toyonaka.osaka.jp +toyono.osaka.jp +yao.osaka.jp +ariake.saga.jp +arita.saga.jp +fukudomi.saga.jp +genkai.saga.jp +hamatama.saga.jp +hizen.saga.jp +imari.saga.jp +kamimine.saga.jp +kanzaki.saga.jp +karatsu.saga.jp +kashima.saga.jp +kitagata.saga.jp +kitahata.saga.jp +kiyama.saga.jp +kouhoku.saga.jp +kyuragi.saga.jp +nishiarita.saga.jp +ogi.saga.jp +omachi.saga.jp +ouchi.saga.jp +saga.saga.jp +shiroishi.saga.jp +taku.saga.jp +tara.saga.jp +tosu.saga.jp +yoshinogari.saga.jp +arakawa.saitama.jp +asaka.saitama.jp +chichibu.saitama.jp +fujimi.saitama.jp +fujimino.saitama.jp +fukaya.saitama.jp +hanno.saitama.jp +hanyu.saitama.jp +hasuda.saitama.jp +hatogaya.saitama.jp +hatoyama.saitama.jp +hidaka.saitama.jp +higashichichibu.saitama.jp +higashimatsuyama.saitama.jp +honjo.saitama.jp +ina.saitama.jp +iruma.saitama.jp +iwatsuki.saitama.jp +kamiizumi.saitama.jp +kamikawa.saitama.jp +kamisato.saitama.jp +kasukabe.saitama.jp +kawagoe.saitama.jp +kawaguchi.saitama.jp +kawajima.saitama.jp +kazo.saitama.jp +kitamoto.saitama.jp +koshigaya.saitama.jp +kounosu.saitama.jp +kuki.saitama.jp +kumagaya.saitama.jp +matsubushi.saitama.jp +minano.saitama.jp +misato.saitama.jp +miyashiro.saitama.jp +miyoshi.saitama.jp +moroyama.saitama.jp +nagatoro.saitama.jp +namegawa.saitama.jp +niiza.saitama.jp +ogano.saitama.jp +ogawa.saitama.jp +ogose.saitama.jp +okegawa.saitama.jp +omiya.saitama.jp +otaki.saitama.jp +ranzan.saitama.jp +ryokami.saitama.jp +saitama.saitama.jp +sakado.saitama.jp +satte.saitama.jp +sayama.saitama.jp +shiki.saitama.jp +shiraoka.saitama.jp +soka.saitama.jp +sugito.saitama.jp +toda.saitama.jp +tokigawa.saitama.jp +tokorozawa.saitama.jp +tsurugashima.saitama.jp +urawa.saitama.jp +warabi.saitama.jp +yashio.saitama.jp +yokoze.saitama.jp +yono.saitama.jp +yorii.saitama.jp +yoshida.saitama.jp +yoshikawa.saitama.jp +yoshimi.saitama.jp +aisho.shiga.jp +gamo.shiga.jp +higashiomi.shiga.jp +hikone.shiga.jp +koka.shiga.jp +konan.shiga.jp +kosei.shiga.jp +koto.shiga.jp +kusatsu.shiga.jp +maibara.shiga.jp +moriyama.shiga.jp +nagahama.shiga.jp +nishiazai.shiga.jp +notogawa.shiga.jp +omihachiman.shiga.jp +otsu.shiga.jp +ritto.shiga.jp +ryuoh.shiga.jp +takashima.shiga.jp +takatsuki.shiga.jp +torahime.shiga.jp +toyosato.shiga.jp +yasu.shiga.jp +akagi.shimane.jp +ama.shimane.jp +gotsu.shimane.jp +hamada.shimane.jp +higashiizumo.shimane.jp +hikawa.shimane.jp +hikimi.shimane.jp +izumo.shimane.jp +kakinoki.shimane.jp +masuda.shimane.jp +matsue.shimane.jp +misato.shimane.jp +nishinoshima.shimane.jp +ohda.shimane.jp +okinoshima.shimane.jp +okuizumo.shimane.jp +shimane.shimane.jp +tamayu.shimane.jp +tsuwano.shimane.jp +unnan.shimane.jp +yakumo.shimane.jp +yasugi.shimane.jp +yatsuka.shimane.jp +arai.shizuoka.jp +atami.shizuoka.jp +fuji.shizuoka.jp +fujieda.shizuoka.jp +fujikawa.shizuoka.jp +fujinomiya.shizuoka.jp +fukuroi.shizuoka.jp +gotemba.shizuoka.jp +haibara.shizuoka.jp +hamamatsu.shizuoka.jp +higashiizu.shizuoka.jp +ito.shizuoka.jp +iwata.shizuoka.jp +izu.shizuoka.jp +izunokuni.shizuoka.jp +kakegawa.shizuoka.jp +kannami.shizuoka.jp +kawanehon.shizuoka.jp +kawazu.shizuoka.jp +kikugawa.shizuoka.jp +kosai.shizuoka.jp +makinohara.shizuoka.jp +matsuzaki.shizuoka.jp +minamiizu.shizuoka.jp +mishima.shizuoka.jp +morimachi.shizuoka.jp +nishiizu.shizuoka.jp +numazu.shizuoka.jp +omaezaki.shizuoka.jp +shimada.shizuoka.jp +shimizu.shizuoka.jp +shimoda.shizuoka.jp +shizuoka.shizuoka.jp +susono.shizuoka.jp +yaizu.shizuoka.jp +yoshida.shizuoka.jp +ashikaga.tochigi.jp +bato.tochigi.jp +haga.tochigi.jp +ichikai.tochigi.jp +iwafune.tochigi.jp +kaminokawa.tochigi.jp +kanuma.tochigi.jp +karasuyama.tochigi.jp +kuroiso.tochigi.jp +mashiko.tochigi.jp +mibu.tochigi.jp +moka.tochigi.jp +motegi.tochigi.jp +nasu.tochigi.jp +nasushiobara.tochigi.jp +nikko.tochigi.jp +nishikata.tochigi.jp +nogi.tochigi.jp +ohira.tochigi.jp +ohtawara.tochigi.jp +oyama.tochigi.jp +sakura.tochigi.jp +sano.tochigi.jp +shimotsuke.tochigi.jp +shioya.tochigi.jp +takanezawa.tochigi.jp +tochigi.tochigi.jp +tsuga.tochigi.jp +ujiie.tochigi.jp +utsunomiya.tochigi.jp +yaita.tochigi.jp +aizumi.tokushima.jp +anan.tokushima.jp +ichiba.tokushima.jp +itano.tokushima.jp +kainan.tokushima.jp +komatsushima.tokushima.jp +matsushige.tokushima.jp +mima.tokushima.jp +minami.tokushima.jp +miyoshi.tokushima.jp +mugi.tokushima.jp +nakagawa.tokushima.jp +naruto.tokushima.jp +sanagochi.tokushima.jp +shishikui.tokushima.jp +tokushima.tokushima.jp +wajiki.tokushima.jp +adachi.tokyo.jp +akiruno.tokyo.jp +akishima.tokyo.jp +aogashima.tokyo.jp +arakawa.tokyo.jp +bunkyo.tokyo.jp +chiyoda.tokyo.jp +chofu.tokyo.jp +chuo.tokyo.jp +edogawa.tokyo.jp +fuchu.tokyo.jp +fussa.tokyo.jp +hachijo.tokyo.jp +hachioji.tokyo.jp +hamura.tokyo.jp +higashikurume.tokyo.jp +higashimurayama.tokyo.jp +higashiyamato.tokyo.jp +hino.tokyo.jp +hinode.tokyo.jp +hinohara.tokyo.jp +inagi.tokyo.jp +itabashi.tokyo.jp +katsushika.tokyo.jp +kita.tokyo.jp +kiyose.tokyo.jp +kodaira.tokyo.jp +koganei.tokyo.jp +kokubunji.tokyo.jp +komae.tokyo.jp +koto.tokyo.jp +kouzushima.tokyo.jp +kunitachi.tokyo.jp +machida.tokyo.jp +meguro.tokyo.jp +minato.tokyo.jp +mitaka.tokyo.jp +mizuho.tokyo.jp +musashimurayama.tokyo.jp +musashino.tokyo.jp +nakano.tokyo.jp +nerima.tokyo.jp +ogasawara.tokyo.jp +okutama.tokyo.jp +ome.tokyo.jp +oshima.tokyo.jp +ota.tokyo.jp +setagaya.tokyo.jp +shibuya.tokyo.jp +shinagawa.tokyo.jp +shinjuku.tokyo.jp +suginami.tokyo.jp +sumida.tokyo.jp +tachikawa.tokyo.jp +taito.tokyo.jp +tama.tokyo.jp +toshima.tokyo.jp +chizu.tottori.jp +hino.tottori.jp +kawahara.tottori.jp +koge.tottori.jp +kotoura.tottori.jp +misasa.tottori.jp +nanbu.tottori.jp +nichinan.tottori.jp +sakaiminato.tottori.jp +tottori.tottori.jp +wakasa.tottori.jp +yazu.tottori.jp +yonago.tottori.jp +asahi.toyama.jp +fuchu.toyama.jp +fukumitsu.toyama.jp +funahashi.toyama.jp +himi.toyama.jp +imizu.toyama.jp +inami.toyama.jp +johana.toyama.jp +kamiichi.toyama.jp +kurobe.toyama.jp +nakaniikawa.toyama.jp +namerikawa.toyama.jp +nanto.toyama.jp +nyuzen.toyama.jp +oyabe.toyama.jp +taira.toyama.jp +takaoka.toyama.jp +tateyama.toyama.jp +toga.toyama.jp +tonami.toyama.jp +toyama.toyama.jp +unazuki.toyama.jp +uozu.toyama.jp +yamada.toyama.jp +arida.wakayama.jp +aridagawa.wakayama.jp +gobo.wakayama.jp +hashimoto.wakayama.jp +hidaka.wakayama.jp +hirogawa.wakayama.jp +inami.wakayama.jp +iwade.wakayama.jp +kainan.wakayama.jp +kamitonda.wakayama.jp +katsuragi.wakayama.jp +kimino.wakayama.jp +kinokawa.wakayama.jp +kitayama.wakayama.jp +koya.wakayama.jp +koza.wakayama.jp +kozagawa.wakayama.jp +kudoyama.wakayama.jp +kushimoto.wakayama.jp +mihama.wakayama.jp +misato.wakayama.jp +nachikatsuura.wakayama.jp +shingu.wakayama.jp +shirahama.wakayama.jp +taiji.wakayama.jp +tanabe.wakayama.jp +wakayama.wakayama.jp +yuasa.wakayama.jp +yura.wakayama.jp +asahi.yamagata.jp +funagata.yamagata.jp +higashine.yamagata.jp +iide.yamagata.jp +kahoku.yamagata.jp +kaminoyama.yamagata.jp +kaneyama.yamagata.jp +kawanishi.yamagata.jp +mamurogawa.yamagata.jp +mikawa.yamagata.jp +murayama.yamagata.jp +nagai.yamagata.jp +nakayama.yamagata.jp +nanyo.yamagata.jp +nishikawa.yamagata.jp +obanazawa.yamagata.jp +oe.yamagata.jp +oguni.yamagata.jp +ohkura.yamagata.jp +oishida.yamagata.jp +sagae.yamagata.jp +sakata.yamagata.jp +sakegawa.yamagata.jp +shinjo.yamagata.jp +shirataka.yamagata.jp +shonai.yamagata.jp +takahata.yamagata.jp +tendo.yamagata.jp +tozawa.yamagata.jp +tsuruoka.yamagata.jp +yamagata.yamagata.jp +yamanobe.yamagata.jp +yonezawa.yamagata.jp +yuza.yamagata.jp +abu.yamaguchi.jp +hagi.yamaguchi.jp +hikari.yamaguchi.jp +hofu.yamaguchi.jp +iwakuni.yamaguchi.jp +kudamatsu.yamaguchi.jp +mitou.yamaguchi.jp +nagato.yamaguchi.jp +oshima.yamaguchi.jp +shimonoseki.yamaguchi.jp +shunan.yamaguchi.jp +tabuse.yamaguchi.jp +tokuyama.yamaguchi.jp +toyota.yamaguchi.jp +ube.yamaguchi.jp +yuu.yamaguchi.jp +chuo.yamanashi.jp +doshi.yamanashi.jp +fuefuki.yamanashi.jp +fujikawa.yamanashi.jp +fujikawaguchiko.yamanashi.jp +fujiyoshida.yamanashi.jp +hayakawa.yamanashi.jp +hokuto.yamanashi.jp +ichikawamisato.yamanashi.jp +kai.yamanashi.jp +kofu.yamanashi.jp +koshu.yamanashi.jp +kosuge.yamanashi.jp +minami-alps.yamanashi.jp +minobu.yamanashi.jp +nakamichi.yamanashi.jp +nanbu.yamanashi.jp +narusawa.yamanashi.jp +nirasaki.yamanashi.jp +nishikatsura.yamanashi.jp +oshino.yamanashi.jp +otsuki.yamanashi.jp +showa.yamanashi.jp +tabayama.yamanashi.jp +tsuru.yamanashi.jp +uenohara.yamanashi.jp +yamanakako.yamanashi.jp +yamanashi.yamanashi.jp + +// ke : http://www.kenic.or.ke/index.php/en/ke-domains/ke-domains +ke +ac.ke +co.ke +go.ke +info.ke +me.ke +mobi.ke +ne.ke +or.ke +sc.ke + +// kg : http://www.domain.kg/dmn_n.html +kg +com.kg +edu.kg +gov.kg +mil.kg +net.kg +org.kg + +// kh : http://www.mptc.gov.kh/dns_registration.htm +*.kh + +// ki : https://www.iana.org/domains/root/db/ki.html +ki +biz.ki +com.ki +edu.ki +gov.ki +info.ki +net.ki +org.ki + +// km : https://www.iana.org/domains/root/db/km.html +// http://www.domaine.km/documents/charte.doc +km +ass.km +com.km +edu.km +gov.km +mil.km +nom.km +org.km +prd.km +tm.km +// These are only mentioned as proposed suggestions at domaine.km, but +// https://www.iana.org/domains/root/db/km.html says they're available for registration: +asso.km +coop.km +gouv.km +medecin.km +notaires.km +pharmaciens.km +presse.km +veterinaire.km + +// kn : https://www.iana.org/domains/root/db/kn.html +// http://www.dot.kn/domainRules.html +kn +edu.kn +gov.kn +net.kn +org.kn + +// kp : http://www.kcce.kp/en_index.php +kp +com.kp +edu.kp +gov.kp +org.kp +rep.kp +tra.kp + +// kr : https://www.iana.org/domains/root/db/kr.html +// see also: https://krnic.kisa.or.kr/jsp/infoboard/law/domBylawsReg.jsp +kr +ac.kr +ai.kr +co.kr +es.kr +go.kr +hs.kr +io.kr +it.kr +kg.kr +me.kr +mil.kr +ms.kr +ne.kr +or.kr +pe.kr +re.kr +sc.kr +// kr geographical names +busan.kr +chungbuk.kr +chungnam.kr +daegu.kr +daejeon.kr +gangwon.kr +gwangju.kr +gyeongbuk.kr +gyeonggi.kr +gyeongnam.kr +incheon.kr +jeju.kr +jeonbuk.kr +jeonnam.kr +seoul.kr +ulsan.kr + +// kw : https://www.nic.kw/policies/ +// Confirmed by registry +kw +com.kw +edu.kw +emb.kw +gov.kw +ind.kw +net.kw +org.kw + +// ky : http://www.icta.ky/da_ky_reg_dom.php +// Confirmed by registry 2008-06-17 +ky +com.ky +edu.ky +net.ky +org.ky + +// kz : https://www.iana.org/domains/root/db/kz.html +// see also: http://www.nic.kz/rules/index.jsp +kz +com.kz +edu.kz +gov.kz +mil.kz +net.kz +org.kz + +// la : https://www.iana.org/domains/root/db/la.html +// Submitted by registry +la +com.la +edu.la +gov.la +info.la +int.la +net.la +org.la +per.la + +// lb : https://www.iana.org/domains/root/db/lb.html +// Submitted by registry +lb +com.lb +edu.lb +gov.lb +net.lb +org.lb + +// lc : https://www.iana.org/domains/root/db/lc.html +// see also: http://www.nic.lc/rules.htm +lc +co.lc +com.lc +edu.lc +gov.lc +net.lc +org.lc + +// li : https://www.iana.org/domains/root/db/li.html +li + +// lk : https://www.iana.org/domains/root/db/lk.html +lk +ac.lk +assn.lk +com.lk +edu.lk +gov.lk +grp.lk +hotel.lk +int.lk +ltd.lk +net.lk +ngo.lk +org.lk +sch.lk +soc.lk +web.lk + +// lr : http://psg.com/dns/lr/lr.txt +// Submitted by registry +lr +com.lr +edu.lr +gov.lr +net.lr +org.lr + +// ls : http://www.nic.ls/ +// Confirmed by registry +ls +ac.ls +biz.ls +co.ls +edu.ls +gov.ls +info.ls +net.ls +org.ls +sc.ls + +// lt : https://www.iana.org/domains/root/db/lt.html +lt +// gov.lt : http://www.gov.lt/index_en.php +gov.lt + +// lu : http://www.dns.lu/en/ +lu + +// lv : https://www.iana.org/domains/root/db/lv.html +lv +asn.lv +com.lv +conf.lv +edu.lv +gov.lv +id.lv +mil.lv +net.lv +org.lv + +// ly : http://www.nic.ly/regulations.php +ly +com.ly +edu.ly +gov.ly +id.ly +med.ly +net.ly +org.ly +plc.ly +sch.ly + +// ma : https://www.iana.org/domains/root/db/ma.html +// http://www.anrt.ma/fr/admin/download/upload/file_fr782.pdf +ma +ac.ma +co.ma +gov.ma +net.ma +org.ma +press.ma + +// mc : http://www.nic.mc/ +mc +asso.mc +tm.mc + +// md : https://www.iana.org/domains/root/db/md.html +md + +// me : https://www.iana.org/domains/root/db/me.html +me +ac.me +co.me +edu.me +gov.me +its.me +net.me +org.me +priv.me + +// mg : https://nic.mg +mg +co.mg +com.mg +edu.mg +gov.mg +mil.mg +nom.mg +org.mg +prd.mg + +// mh : https://www.iana.org/domains/root/db/mh.html +mh + +// mil : https://www.iana.org/domains/root/db/mil.html +mil + +// mk : https://www.iana.org/domains/root/db/mk.html +// see also: http://dns.marnet.net.mk/postapka.php +mk +com.mk +edu.mk +gov.mk +inf.mk +name.mk +net.mk +org.mk + +// ml : https://www.iana.org/domains/root/db/ml.html +// Confirmed by Boubacar NDIAYE 2024-12-31 +ml +ac.ml +art.ml +asso.ml +com.ml +edu.ml +gouv.ml +gov.ml +info.ml +inst.ml +net.ml +org.ml +pr.ml +presse.ml + +// mm : https://www.iana.org/domains/root/db/mm.html +*.mm + +// mn : https://www.iana.org/domains/root/db/mn.html +mn +edu.mn +gov.mn +org.mn + +// mo : http://www.monic.net.mo/ +mo +com.mo +edu.mo +gov.mo +net.mo +org.mo + +// mobi : https://www.iana.org/domains/root/db/mobi.html +mobi + +// mp : http://www.dot.mp/ +// Confirmed by registry 2008-06-17 +mp + +// mq : https://www.iana.org/domains/root/db/mq.html +mq + +// mr : https://www.iana.org/domains/root/db/mr.html +mr +gov.mr + +// ms : https://www.iana.org/domains/root/db/ms.html +ms +com.ms +edu.ms +gov.ms +net.ms +org.ms + +// mt : https://www.nic.org.mt/go/policy +// Submitted by registry +mt +com.mt +edu.mt +net.mt +org.mt + +// mu : https://www.iana.org/domains/root/db/mu.html +mu +ac.mu +co.mu +com.mu +gov.mu +net.mu +or.mu +org.mu + +// museum : https://welcome.museum/wp-content/uploads/2018/05/20180525-Registration-Policy-MUSEUM-EN_VF-2.pdf https://welcome.museum/buy-your-dot-museum-2/ +museum + +// mv : https://www.iana.org/domains/root/db/mv.html +// "mv" included because, contra Wikipedia, google.mv exists. +mv +aero.mv +biz.mv +com.mv +coop.mv +edu.mv +gov.mv +info.mv +int.mv +mil.mv +museum.mv +name.mv +net.mv +org.mv +pro.mv + +// mw : http://www.registrar.mw/ +mw +ac.mw +biz.mw +co.mw +com.mw +coop.mw +edu.mw +gov.mw +int.mw +net.mw +org.mw + +// mx : http://www.nic.mx/ +// Submitted by registry +mx +com.mx +edu.mx +gob.mx +net.mx +org.mx + +// my : http://www.mynic.my/ +// Available strings: https://mynic.my/resources/domains/buying-a-domain/ +my +biz.my +com.my +edu.my +gov.my +mil.my +name.my +net.my +org.my + +// mz : http://www.uem.mz/ +// Submitted by registry +mz +ac.mz +adv.mz +co.mz +edu.mz +gov.mz +mil.mz +net.mz +org.mz + +// na : http://www.na-nic.com.na/ +na +alt.na +co.na +com.na +gov.na +net.na +org.na + +// name : http://www.nic.name/ +// Regarding 2LDs: https://github.com/publicsuffix/list/issues/2306 +name + +// nc : http://www.cctld.nc/ +nc +asso.nc +nom.nc + +// ne : https://www.iana.org/domains/root/db/ne.html +ne + +// net : https://www.iana.org/domains/root/db/net.html +net + +// nf : https://www.iana.org/domains/root/db/nf.html +nf +arts.nf +com.nf +firm.nf +info.nf +net.nf +other.nf +per.nf +rec.nf +store.nf +web.nf + +// ng : http://www.nira.org.ng/index.php/join-us/register-ng-domain/189-nira-slds +ng +com.ng +edu.ng +gov.ng +i.ng +mil.ng +mobi.ng +name.ng +net.ng +org.ng +sch.ng + +// ni : http://www.nic.ni/ +ni +ac.ni +biz.ni +co.ni +com.ni +edu.ni +gob.ni +in.ni +info.ni +int.ni +mil.ni +net.ni +nom.ni +org.ni +web.ni + +// nl : https://www.iana.org/domains/root/db/nl.html +// https://www.sidn.nl/ +nl + +// no : https://www.norid.no/en/om-domenenavn/regelverk-for-no/ +// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/ +// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/ +// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/ +// RSS feed: https://teknisk.norid.no/en/feed/ +no +// Norid category second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-c/ +fhs.no +folkebibl.no +fylkesbibl.no +idrett.no +museum.no +priv.no +vgs.no +// Norid category second-level domains managed by parties other than Norid : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-d/ +dep.no +herad.no +kommune.no +mil.no +stat.no +// Norid geographical second level domains : https://www.norid.no/en/om-domenenavn/regelverk-for-no/vedlegg-b/ +// counties +aa.no +ah.no +bu.no +fm.no +hl.no +hm.no +jan-mayen.no +mr.no +nl.no +nt.no +of.no +ol.no +oslo.no +rl.no +sf.no +st.no +svalbard.no +tm.no +tr.no +va.no +vf.no +// primary and lower secondary schools per county +gs.aa.no +gs.ah.no +gs.bu.no +gs.fm.no +gs.hl.no +gs.hm.no +gs.jan-mayen.no +gs.mr.no +gs.nl.no +gs.nt.no +gs.of.no +gs.ol.no +gs.oslo.no +gs.rl.no +gs.sf.no +gs.st.no +gs.svalbard.no +gs.tm.no +gs.tr.no +gs.va.no +gs.vf.no +// cities +akrehamn.no +åkrehamn.no +algard.no +ålgård.no +arna.no +bronnoysund.no +brønnøysund.no +brumunddal.no +bryne.no +drobak.no +drøbak.no +egersund.no +fetsund.no +floro.no +florø.no +fredrikstad.no +hokksund.no +honefoss.no +hønefoss.no +jessheim.no +jorpeland.no +jørpeland.no +kirkenes.no +kopervik.no +krokstadelva.no +langevag.no +langevåg.no +leirvik.no +mjondalen.no +mjøndalen.no +mo-i-rana.no +mosjoen.no +mosjøen.no +nesoddtangen.no +orkanger.no +osoyro.no +osøyro.no +raholt.no +råholt.no +sandnessjoen.no +sandnessjøen.no +skedsmokorset.no +slattum.no +spjelkavik.no +stathelle.no +stavern.no +stjordalshalsen.no +stjørdalshalsen.no +tananger.no +tranby.no +vossevangen.no +// communities +aarborte.no +aejrie.no +afjord.no +åfjord.no +agdenes.no +nes.akershus.no +aknoluokta.no +ákŋoluokta.no +al.no +ål.no +alaheadju.no +álaheadju.no +alesund.no +ålesund.no +alstahaug.no +alta.no +áltá.no +alvdal.no +amli.no +åmli.no +amot.no +åmot.no +andasuolo.no +andebu.no +andoy.no +andøy.no +ardal.no +årdal.no +aremark.no +arendal.no +ås.no +aseral.no +åseral.no +asker.no +askim.no +askoy.no +askøy.no +askvoll.no +asnes.no +åsnes.no +audnedaln.no +aukra.no +aure.no +aurland.no +aurskog-holand.no +aurskog-høland.no +austevoll.no +austrheim.no +averoy.no +averøy.no +badaddja.no +bådåddjå.no +bærum.no +bahcavuotna.no +báhcavuotna.no +bahccavuotna.no +báhccavuotna.no +baidar.no +báidár.no +bajddar.no +bájddar.no +balat.no +bálát.no +balestrand.no +ballangen.no +balsfjord.no +bamble.no +bardu.no +barum.no +batsfjord.no +båtsfjord.no +bearalvahki.no +bearalváhki.no +beardu.no +beiarn.no +berg.no +bergen.no +berlevag.no +berlevåg.no +bievat.no +bievát.no +bindal.no +birkenes.no +bjarkoy.no +bjarkøy.no +bjerkreim.no +bjugn.no +bodo.no +bodø.no +bokn.no +bomlo.no +bømlo.no +bremanger.no +bronnoy.no +brønnøy.no +budejju.no +nes.buskerud.no +bygland.no +bykle.no +cahcesuolo.no +čáhcesuolo.no +davvenjarga.no +davvenjárga.no +davvesiida.no +deatnu.no +dielddanuorri.no +divtasvuodna.no +divttasvuotna.no +donna.no +dønna.no +dovre.no +drammen.no +drangedal.no +dyroy.no +dyrøy.no +eid.no +eidfjord.no +eidsberg.no +eidskog.no +eidsvoll.no +eigersund.no +elverum.no +enebakk.no +engerdal.no +etne.no +etnedal.no +evenassi.no +evenášši.no +evenes.no +evje-og-hornnes.no +farsund.no +fauske.no +fedje.no +fet.no +finnoy.no +finnøy.no +fitjar.no +fjaler.no +fjell.no +fla.no +flå.no +flakstad.no +flatanger.no +flekkefjord.no +flesberg.no +flora.no +folldal.no +forde.no +førde.no +forsand.no +fosnes.no +fræna.no +frana.no +frei.no +frogn.no +froland.no +frosta.no +froya.no +frøya.no +fuoisku.no +fuossko.no +fusa.no +fyresdal.no +gaivuotna.no +gáivuotna.no +galsa.no +gálsá.no +gamvik.no +gangaviika.no +gáŋgaviika.no +gaular.no +gausdal.no +giehtavuoatna.no +gildeskal.no +gildeskål.no +giske.no +gjemnes.no +gjerdrum.no +gjerstad.no +gjesdal.no +gjovik.no +gjøvik.no +gloppen.no +gol.no +gran.no +grane.no +granvin.no +gratangen.no +grimstad.no +grong.no +grue.no +gulen.no +guovdageaidnu.no +ha.no +hå.no +habmer.no +hábmer.no +hadsel.no +hægebostad.no +hagebostad.no +halden.no +halsa.no +hamar.no +hamaroy.no +hammarfeasta.no +hámmárfeasta.no +hammerfest.no +hapmir.no +hápmir.no +haram.no +hareid.no +harstad.no +hasvik.no +hattfjelldal.no +haugesund.no +os.hedmark.no +valer.hedmark.no +våler.hedmark.no +hemne.no +hemnes.no +hemsedal.no +hitra.no +hjartdal.no +hjelmeland.no +hobol.no +hobøl.no +hof.no +hol.no +hole.no +holmestrand.no +holtalen.no +holtålen.no +os.hordaland.no +hornindal.no +horten.no +hoyanger.no +høyanger.no +hoylandet.no +høylandet.no +hurdal.no +hurum.no +hvaler.no +hyllestad.no +ibestad.no +inderoy.no +inderøy.no +iveland.no +ivgu.no +jevnaker.no +jolster.no +jølster.no +jondal.no +kafjord.no +kåfjord.no +karasjohka.no +kárášjohka.no +karasjok.no +karlsoy.no +karmoy.no +karmøy.no +kautokeino.no +klabu.no +klæbu.no +klepp.no +kongsberg.no +kongsvinger.no +kraanghke.no +kråanghke.no +kragero.no +kragerø.no +kristiansand.no +kristiansund.no +krodsherad.no +krødsherad.no +kvæfjord.no +kvænangen.no +kvafjord.no +kvalsund.no +kvam.no +kvanangen.no +kvinesdal.no +kvinnherad.no +kviteseid.no +kvitsoy.no +kvitsøy.no +laakesvuemie.no +lærdal.no +lahppi.no +láhppi.no +lardal.no +larvik.no +lavagis.no +lavangen.no +leangaviika.no +leaŋgaviika.no +lebesby.no +leikanger.no +leirfjord.no +leka.no +leksvik.no +lenvik.no +lerdal.no +lesja.no +levanger.no +lier.no +lierne.no +lillehammer.no +lillesand.no +lindas.no +lindås.no +lindesnes.no +loabat.no +loabát.no +lodingen.no +lødingen.no +lom.no +loppa.no +lorenskog.no +lørenskog.no +loten.no +løten.no +lund.no +lunner.no +luroy.no +lurøy.no +luster.no +lyngdal.no +lyngen.no +malatvuopmi.no +málatvuopmi.no +malselv.no +målselv.no +malvik.no +mandal.no +marker.no +marnardal.no +masfjorden.no +masoy.no +måsøy.no +matta-varjjat.no +mátta-várjjat.no +meland.no +meldal.no +melhus.no +meloy.no +meløy.no +meraker.no +meråker.no +midsund.no +midtre-gauldal.no +moareke.no +moåreke.no +modalen.no +modum.no +molde.no +heroy.more-og-romsdal.no +sande.more-og-romsdal.no +herøy.møre-og-romsdal.no +sande.møre-og-romsdal.no +moskenes.no +moss.no +mosvik.no +muosat.no +muosát.no +naamesjevuemie.no +nååmesjevuemie.no +nærøy.no +namdalseid.no +namsos.no +namsskogan.no +nannestad.no +naroy.no +narviika.no +narvik.no +naustdal.no +navuotna.no +návuotna.no +nedre-eiker.no +nesna.no +nesodden.no +nesseby.no +nesset.no +nissedal.no +nittedal.no +nord-aurdal.no +nord-fron.no +nord-odal.no +norddal.no +nordkapp.no +bo.nordland.no +bø.nordland.no +heroy.nordland.no +herøy.nordland.no +nordre-land.no +nordreisa.no +nore-og-uvdal.no +notodden.no +notteroy.no +nøtterøy.no +odda.no +oksnes.no +øksnes.no +omasvuotna.no +oppdal.no +oppegard.no +oppegård.no +orkdal.no +orland.no +ørland.no +orskog.no +ørskog.no +orsta.no +ørsta.no +osen.no +osteroy.no +osterøy.no +valer.ostfold.no +våler.østfold.no +ostre-toten.no +østre-toten.no +overhalla.no +ovre-eiker.no +øvre-eiker.no +oyer.no +øyer.no +oygarden.no +øygarden.no +oystre-slidre.no +øystre-slidre.no +porsanger.no +porsangu.no +porsáŋgu.no +porsgrunn.no +rade.no +råde.no +radoy.no +radøy.no +rælingen.no +rahkkeravju.no +ráhkkerávju.no +raisa.no +ráisa.no +rakkestad.no +ralingen.no +rana.no +randaberg.no +rauma.no +rendalen.no +rennebu.no +rennesoy.no +rennesøy.no +rindal.no +ringebu.no +ringerike.no +ringsaker.no +risor.no +risør.no +rissa.no +roan.no +rodoy.no +rødøy.no +rollag.no +romsa.no +romskog.no +rømskog.no +roros.no +røros.no +rost.no +røst.no +royken.no +røyken.no +royrvik.no +røyrvik.no +ruovat.no +rygge.no +salangen.no +salat.no +sálat.no +sálát.no +saltdal.no +samnanger.no +sandefjord.no +sandnes.no +sandoy.no +sandøy.no +sarpsborg.no +sauda.no +sauherad.no +sel.no +selbu.no +selje.no +seljord.no +siellak.no +sigdal.no +siljan.no +sirdal.no +skanit.no +skánit.no +skanland.no +skånland.no +skaun.no +skedsmo.no +ski.no +skien.no +skierva.no +skiervá.no +skiptvet.no +skjak.no +skjåk.no +skjervoy.no +skjervøy.no +skodje.no +smola.no +smøla.no +snaase.no +snåase.no +snasa.no +snåsa.no +snillfjord.no +snoasa.no +sogndal.no +sogne.no +søgne.no +sokndal.no +sola.no +solund.no +somna.no +sømna.no +sondre-land.no +søndre-land.no +songdalen.no +sor-aurdal.no +sør-aurdal.no +sor-fron.no +sør-fron.no +sor-odal.no +sør-odal.no +sor-varanger.no +sør-varanger.no +sorfold.no +sørfold.no +sorreisa.no +sørreisa.no +sortland.no +sorum.no +sørum.no +spydeberg.no +stange.no +stavanger.no +steigen.no +steinkjer.no +stjordal.no +stjørdal.no +stokke.no +stor-elvdal.no +stord.no +stordal.no +storfjord.no +strand.no +stranda.no +stryn.no +sula.no +suldal.no +sund.no +sunndal.no +surnadal.no +sveio.no +svelvik.no +sykkylven.no +tana.no +bo.telemark.no +bø.telemark.no +time.no +tingvoll.no +tinn.no +tjeldsund.no +tjome.no +tjøme.no +tokke.no +tolga.no +tonsberg.no +tønsberg.no +torsken.no +træna.no +trana.no +tranoy.no +tranøy.no +troandin.no +trogstad.no +trøgstad.no +tromsa.no +tromso.no +tromsø.no +trondheim.no +trysil.no +tvedestrand.no +tydal.no +tynset.no +tysfjord.no +tysnes.no +tysvær.no +tysvar.no +ullensaker.no +ullensvang.no +ulvik.no +unjarga.no +unjárga.no +utsira.no +vaapste.no +vadso.no +vadsø.no +værøy.no +vaga.no +vågå.no +vagan.no +vågan.no +vagsoy.no +vågsøy.no +vaksdal.no +valle.no +vang.no +vanylven.no +vardo.no +vardø.no +varggat.no +várggát.no +varoy.no +vefsn.no +vega.no +vegarshei.no +vegårshei.no +vennesla.no +verdal.no +verran.no +vestby.no +sande.vestfold.no +vestnes.no +vestre-slidre.no +vestre-toten.no +vestvagoy.no +vestvågøy.no +vevelstad.no +vik.no +vikna.no +vindafjord.no +voagat.no +volda.no +voss.no + +// np : http://www.mos.com.np/register.html +*.np + +// nr : http://cenpac.net.nr/dns/index.html +// Submitted by registry +nr +biz.nr +com.nr +edu.nr +gov.nr +info.nr +net.nr +org.nr + +// nu : https://www.iana.org/domains/root/db/nu.html +nu + +// nz : https://www.iana.org/domains/root/db/nz.html +// Submitted by registry +nz +ac.nz +co.nz +cri.nz +geek.nz +gen.nz +govt.nz +health.nz +iwi.nz +kiwi.nz +maori.nz +māori.nz +mil.nz +net.nz +org.nz +parliament.nz +school.nz + +// om : https://www.iana.org/domains/root/db/om.html +om +co.om +com.om +edu.om +gov.om +med.om +museum.om +net.om +org.om +pro.om + +// onion : https://tools.ietf.org/html/rfc7686 +onion + +// org : https://www.iana.org/domains/root/db/org.html +org + +// pa : http://www.nic.pa/ +// Some additional second level "domains" resolve directly as hostnames, such as +// pannet.pa, so we add a rule for "pa". +pa +abo.pa +ac.pa +com.pa +edu.pa +gob.pa +ing.pa +med.pa +net.pa +nom.pa +org.pa +sld.pa + +// pe : https://www.nic.pe/InformeFinalComision.pdf +pe +com.pe +edu.pe +gob.pe +mil.pe +net.pe +nom.pe +org.pe + +// pf : http://www.gobin.info/domainname/formulaire-pf.pdf +pf +com.pf +edu.pf +org.pf + +// pg : https://www.iana.org/domains/root/db/pg.html +*.pg + +// ph : https://www.iana.org/domains/root/db/ph.html +// Submitted by registry +ph +com.ph +edu.ph +gov.ph +i.ph +mil.ph +net.ph +ngo.ph +org.ph + +// pk : https://pk5.pknic.net.pk/pk5/msgNamepk.PK +// Contact Email: staff@pknic.net.pk +pk +ac.pk +biz.pk +com.pk +edu.pk +fam.pk +gkp.pk +gob.pk +gog.pk +gok.pk +gop.pk +gos.pk +gov.pk +net.pk +org.pk +web.pk + +// pl : https://www.dns.pl/en/ +// Confirmed by registry 2024-11-18 +pl +com.pl +net.pl +org.pl +// pl functional domains : https://www.dns.pl/en/list_of_functional_domain_names +agro.pl +aid.pl +atm.pl +auto.pl +biz.pl +edu.pl +gmina.pl +gsm.pl +info.pl +mail.pl +media.pl +miasta.pl +mil.pl +nieruchomosci.pl +nom.pl +pc.pl +powiat.pl +priv.pl +realestate.pl +rel.pl +sex.pl +shop.pl +sklep.pl +sos.pl +szkola.pl +targi.pl +tm.pl +tourism.pl +travel.pl +turystyka.pl +// Government domains : https://www.dns.pl/informacje_o_rejestracji_domen_gov_pl +// In accordance with the .gov.pl Domain Name Regulations : https://www.dns.pl/regulamin_gov_pl +gov.pl +ap.gov.pl +griw.gov.pl +ic.gov.pl +is.gov.pl +kmpsp.gov.pl +konsulat.gov.pl +kppsp.gov.pl +kwp.gov.pl +kwpsp.gov.pl +mup.gov.pl +mw.gov.pl +oia.gov.pl +oirm.gov.pl +oke.gov.pl +oow.gov.pl +oschr.gov.pl +oum.gov.pl +pa.gov.pl +pinb.gov.pl +piw.gov.pl +po.gov.pl +pr.gov.pl +psp.gov.pl +psse.gov.pl +pup.gov.pl +rzgw.gov.pl +sa.gov.pl +sdn.gov.pl +sko.gov.pl +so.gov.pl +sr.gov.pl +starostwo.gov.pl +ug.gov.pl +ugim.gov.pl +um.gov.pl +umig.gov.pl +upow.gov.pl +uppo.gov.pl +us.gov.pl +uw.gov.pl +uzs.gov.pl +wif.gov.pl +wiih.gov.pl +winb.gov.pl +wios.gov.pl +witd.gov.pl +wiw.gov.pl +wkz.gov.pl +wsa.gov.pl +wskr.gov.pl +wsse.gov.pl +wuoz.gov.pl +wzmiuw.gov.pl +zp.gov.pl +zpisdn.gov.pl +// pl regional domains : https://www.dns.pl/en/list_of_regional_domain_names +augustow.pl +babia-gora.pl +bedzin.pl +beskidy.pl +bialowieza.pl +bialystok.pl +bielawa.pl +bieszczady.pl +boleslawiec.pl +bydgoszcz.pl +bytom.pl +cieszyn.pl +czeladz.pl +czest.pl +dlugoleka.pl +elblag.pl +elk.pl +glogow.pl +gniezno.pl +gorlice.pl +grajewo.pl +ilawa.pl +jaworzno.pl +jelenia-gora.pl +jgora.pl +kalisz.pl +karpacz.pl +kartuzy.pl +kaszuby.pl +katowice.pl +kazimierz-dolny.pl +kepno.pl +ketrzyn.pl +klodzko.pl +kobierzyce.pl +kolobrzeg.pl +konin.pl +konskowola.pl +kutno.pl +lapy.pl +lebork.pl +legnica.pl +lezajsk.pl +limanowa.pl +lomza.pl +lowicz.pl +lubin.pl +lukow.pl +malbork.pl +malopolska.pl +mazowsze.pl +mazury.pl +mielec.pl +mielno.pl +mragowo.pl +naklo.pl +nowaruda.pl +nysa.pl +olawa.pl +olecko.pl +olkusz.pl +olsztyn.pl +opoczno.pl +opole.pl +ostroda.pl +ostroleka.pl +ostrowiec.pl +ostrowwlkp.pl +pila.pl +pisz.pl +podhale.pl +podlasie.pl +polkowice.pl +pomorskie.pl +pomorze.pl +prochowice.pl +pruszkow.pl +przeworsk.pl +pulawy.pl +radom.pl +rawa-maz.pl +rybnik.pl +rzeszow.pl +sanok.pl +sejny.pl +skoczow.pl +slask.pl +slupsk.pl +sosnowiec.pl +stalowa-wola.pl +starachowice.pl +stargard.pl +suwalki.pl +swidnica.pl +swiebodzin.pl +swinoujscie.pl +szczecin.pl +szczytno.pl +tarnobrzeg.pl +tgory.pl +turek.pl +tychy.pl +ustka.pl +walbrzych.pl +warmia.pl +warszawa.pl +waw.pl +wegrow.pl +wielun.pl +wlocl.pl +wloclawek.pl +wodzislaw.pl +wolomin.pl +wroclaw.pl +zachpomor.pl +zagan.pl +zarow.pl +zgora.pl +zgorzelec.pl + +// pm : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +pm + +// pn : https://www.iana.org/domains/root/db/pn.html +pn +co.pn +edu.pn +gov.pn +net.pn +org.pn + +// post : https://www.iana.org/domains/root/db/post.html +post + +// pr : http://www.nic.pr/index.asp?f=1 +pr +biz.pr +com.pr +edu.pr +gov.pr +info.pr +isla.pr +name.pr +net.pr +org.pr +pro.pr +// these aren't mentioned on nic.pr, but on https://www.iana.org/domains/root/db/pr.html +ac.pr +est.pr +prof.pr + +// pro : http://registry.pro/get-pro +pro +aaa.pro +aca.pro +acct.pro +avocat.pro +bar.pro +cpa.pro +eng.pro +jur.pro +law.pro +med.pro +recht.pro + +// ps : https://www.iana.org/domains/root/db/ps.html +// http://www.nic.ps/registration/policy.html#reg +ps +com.ps +edu.ps +gov.ps +net.ps +org.ps +plo.ps +sec.ps + +// pt : https://www.dns.pt/en/domain/pt-terms-and-conditions-registration-rules/ +pt +com.pt +edu.pt +gov.pt +int.pt +net.pt +nome.pt +org.pt +publ.pt + +// pw : https://www.iana.org/domains/root/db/pw.html +// Confirmed by registry in private correspondence with @dnsguru 2024-12-09 +pw +gov.pw + +// py : https://www.iana.org/domains/root/db/py.html +// Submitted by registry +py +com.py +coop.py +edu.py +gov.py +mil.py +net.py +org.py + +// qa : http://domains.qa/en/ +qa +com.qa +edu.qa +gov.qa +mil.qa +name.qa +net.qa +org.qa +sch.qa + +// re : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +// Confirmed by registry 2024-11-18 +re +// Closed for registration on 2013-03-15 but domains are still maintained +asso.re +com.re + +// ro : http://www.rotld.ro/ +ro +arts.ro +com.ro +firm.ro +info.ro +nom.ro +nt.ro +org.ro +rec.ro +store.ro +tm.ro +www.ro + +// rs : https://www.rnids.rs/en/domains/national-domains +rs +ac.rs +co.rs +edu.rs +gov.rs +in.rs +org.rs + +// ru : https://cctld.ru/files/pdf/docs/en/rules_ru-rf.pdf +// Submitted by George Georgievsky +ru + +// rw : https://www.iana.org/domains/root/db/rw.html +rw +ac.rw +co.rw +coop.rw +gov.rw +mil.rw +net.rw +org.rw + +// sa : http://www.nic.net.sa/ +sa +com.sa +edu.sa +gov.sa +med.sa +net.sa +org.sa +pub.sa +sch.sa + +// sb : http://www.sbnic.net.sb/ +// Submitted by registry +sb +com.sb +edu.sb +gov.sb +net.sb +org.sb + +// sc : http://www.nic.sc/ +sc +com.sc +edu.sc +gov.sc +net.sc +org.sc + +// sd : https://www.iana.org/domains/root/db/sd.html +// Submitted by registry +sd +com.sd +edu.sd +gov.sd +info.sd +med.sd +net.sd +org.sd +tv.sd + +// se : https://www.iana.org/domains/root/db/se.html +// https://data.internetstiftelsen.se/barred_domains_list.txt -> Second level domains & Sub-domains +// Confirmed by Registry Services 2024-11-20 +se +a.se +ac.se +b.se +bd.se +brand.se +c.se +d.se +e.se +f.se +fh.se +fhsk.se +fhv.se +g.se +h.se +i.se +k.se +komforb.se +kommunalforbund.se +komvux.se +l.se +lanbib.se +m.se +n.se +naturbruksgymn.se +o.se +org.se +p.se +parti.se +pp.se +press.se +r.se +s.se +t.se +tm.se +u.se +w.se +x.se +y.se +z.se + +// sg : https://www.sgnic.sg/domain-registration/sg-categories-rules +// Confirmed by registry 2024-11-19 +sg +com.sg +edu.sg +gov.sg +net.sg +org.sg + +// sh : http://nic.sh/rules.htm +sh +com.sh +gov.sh +mil.sh +net.sh +org.sh + +// si : https://www.iana.org/domains/root/db/si.html +si + +// sj : No registrations at this time. +// Submitted by registry +sj + +// sk : https://www.iana.org/domains/root/db/sk.html +sk + +// sl : http://www.nic.sl +// Submitted by registry +sl +com.sl +edu.sl +gov.sl +net.sl +org.sl + +// sm : https://www.iana.org/domains/root/db/sm.html +sm + +// sn : https://www.iana.org/domains/root/db/sn.html +sn +art.sn +com.sn +edu.sn +gouv.sn +org.sn +perso.sn +univ.sn + +// so : http://sonic.so/policies/ +so +com.so +edu.so +gov.so +me.so +net.so +org.so + +// sr : https://www.iana.org/domains/root/db/sr.html +sr + +// ss : https://registry.nic.ss/ +// Submitted by registry +ss +biz.ss +co.ss +com.ss +edu.ss +gov.ss +me.ss +net.ss +org.ss +sch.ss + +// st : http://www.nic.st/html/policyrules/ +st +co.st +com.st +consulado.st +edu.st +embaixada.st +mil.st +net.st +org.st +principe.st +saotome.st +store.st + +// su : https://www.iana.org/domains/root/db/su.html +su + +// sv : https://www.iana.org/domains/root/db/sv.html +sv +com.sv +edu.sv +gob.sv +org.sv +red.sv + +// sx : https://www.iana.org/domains/root/db/sx.html +// Submitted by registry +sx +gov.sx + +// sy : https://www.iana.org/domains/root/db/sy.html +sy +com.sy +edu.sy +gov.sy +mil.sy +net.sy +org.sy + +// sz : https://www.iana.org/domains/root/db/sz.html +// http://www.sispa.org.sz/ +sz +ac.sz +co.sz +org.sz + +// tc : https://www.iana.org/domains/root/db/tc.html +tc + +// td : https://www.iana.org/domains/root/db/td.html +td + +// tel : https://www.iana.org/domains/root/db/tel.html +// http://www.telnic.org/ +tel + +// tf : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +tf + +// tg : https://www.iana.org/domains/root/db/tg.html +// http://www.nic.tg/ +tg + +// th : https://www.iana.org/domains/root/db/th.html +// Submitted by registry +th +ac.th +co.th +go.th +in.th +mi.th +net.th +or.th + +// tj : http://www.nic.tj/policy.html +tj +ac.tj +biz.tj +co.tj +com.tj +edu.tj +go.tj +gov.tj +int.tj +mil.tj +name.tj +net.tj +nic.tj +org.tj +test.tj +web.tj + +// tk : https://www.iana.org/domains/root/db/tk.html +tk + +// tl : https://www.iana.org/domains/root/db/tl.html +tl +gov.tl + +// tm : https://www.nic.tm/local.html +// Confirmed by registry 2024-11-19 +tm +co.tm +com.tm +edu.tm +gov.tm +mil.tm +net.tm +nom.tm +org.tm + +// tn : http://www.registre.tn/fr/ +// https://whois.ati.tn/ +tn +com.tn +ens.tn +fin.tn +gov.tn +ind.tn +info.tn +intl.tn +mincom.tn +nat.tn +net.tn +org.tn +perso.tn +tourism.tn + +// to : https://www.iana.org/domains/root/db/to.html +// Submitted by registry +to +com.to +edu.to +gov.to +mil.to +net.to +org.to + +// tr : https://nic.tr/ +// https://nic.tr/forms/eng/policies.pdf +// https://nic.tr/index.php?USRACTN=PRICELST +tr +av.tr +bbs.tr +bel.tr +biz.tr +com.tr +dr.tr +edu.tr +gen.tr +gov.tr +info.tr +k12.tr +kep.tr +mil.tr +name.tr +net.tr +org.tr +pol.tr +tel.tr +tsk.tr +tv.tr +web.tr +// Used by Northern Cyprus +nc.tr +// Used by government agencies of Northern Cyprus +gov.nc.tr + +// tt : https://www.nic.tt/ +// Confirmed by registry 2024-11-19 +tt +biz.tt +co.tt +com.tt +edu.tt +gov.tt +info.tt +mil.tt +name.tt +net.tt +org.tt +pro.tt + +// tv : https://www.iana.org/domains/root/db/tv.html +// Not listing any 2LDs as reserved since none seem to exist in practice, +// Wikipedia notwithstanding. +tv + +// tw : https://www.iana.org/domains/root/db/tw.html +// https://twnic.tw/dnservice_catag.php +// Confirmed by registry 2024-11-26 +tw +club.tw +com.tw +ebiz.tw +edu.tw +game.tw +gov.tw +idv.tw +mil.tw +net.tw +org.tw + +// tz : http://www.tznic.or.tz/index.php/domains +// Submitted by registry +tz +ac.tz +co.tz +go.tz +hotel.tz +info.tz +me.tz +mil.tz +mobi.tz +ne.tz +or.tz +sc.tz +tv.tz + +// ua : https://hostmaster.ua/policy/?ua +// Submitted by registry +ua +// ua 2LD +com.ua +edu.ua +gov.ua +in.ua +net.ua +org.ua +// ua geographic names +// https://hostmaster.ua/2ld/ +cherkassy.ua +cherkasy.ua +chernigov.ua +chernihiv.ua +chernivtsi.ua +chernovtsy.ua +ck.ua +cn.ua +cr.ua +crimea.ua +cv.ua +dn.ua +dnepropetrovsk.ua +dnipropetrovsk.ua +donetsk.ua +dp.ua +if.ua +ivano-frankivsk.ua +kh.ua +kharkiv.ua +kharkov.ua +kherson.ua +khmelnitskiy.ua +khmelnytskyi.ua +kiev.ua +kirovograd.ua +km.ua +kr.ua +kropyvnytskyi.ua +krym.ua +ks.ua +kv.ua +kyiv.ua +lg.ua +lt.ua +lugansk.ua +luhansk.ua +lutsk.ua +lv.ua +lviv.ua +mk.ua +mykolaiv.ua +nikolaev.ua +od.ua +odesa.ua +odessa.ua +pl.ua +poltava.ua +rivne.ua +rovno.ua +rv.ua +sb.ua +sebastopol.ua +sevastopol.ua +sm.ua +sumy.ua +te.ua +ternopil.ua +uz.ua +uzhgorod.ua +uzhhorod.ua +vinnica.ua +vinnytsia.ua +vn.ua +volyn.ua +yalta.ua +zakarpattia.ua +zaporizhzhe.ua +zaporizhzhia.ua +zhitomir.ua +zhytomyr.ua +zp.ua +zt.ua + +// ug : https://www.registry.co.ug/ +// https://www.registry.co.ug, https://whois.co.ug +// Confirmed by registry 2025-01-20 +ug +ac.ug +co.ug +com.ug +edu.ug +go.ug +gov.ug +mil.ug +ne.ug +or.ug +org.ug +sc.ug +us.ug + +// uk : https://www.iana.org/domains/root/db/uk.html +// Submitted by registry +uk +ac.uk +co.uk +gov.uk +ltd.uk +me.uk +net.uk +nhs.uk +org.uk +plc.uk +police.uk +*.sch.uk + +// us : https://www.iana.org/domains/root/db/us.html +// Confirmed via the .us zone file by William Harrison 2024-12-10 +us +dni.us +isa.us +nsn.us +// Geographic Names +ak.us +al.us +ar.us +as.us +az.us +ca.us +co.us +ct.us +dc.us +de.us +fl.us +ga.us +gu.us +hi.us +ia.us +id.us +il.us +in.us +ks.us +ky.us +la.us +ma.us +md.us +me.us +mi.us +mn.us +mo.us +ms.us +mt.us +nc.us +nd.us +ne.us +nh.us +nj.us +nm.us +nv.us +ny.us +oh.us +ok.us +or.us +pa.us +pr.us +ri.us +sc.us +sd.us +tn.us +tx.us +ut.us +va.us +vi.us +vt.us +wa.us +wi.us +wv.us +wy.us +// The registrar notes several more specific domains available in each state, +// such as state.*.us, dst.*.us, etc., but resolution of these is somewhat +// haphazard; in some states these domains resolve as addresses, while in others +// only subdomains are available, or even nothing at all. We include the +// most common ones where it's clear that different sites are different +// entities. +k12.ak.us +k12.al.us +k12.ar.us +k12.as.us +k12.az.us +k12.ca.us +k12.co.us +k12.ct.us +k12.dc.us +k12.fl.us +k12.ga.us +k12.gu.us +// k12.hi.us - Bug 614565 - Hawaii has a state-wide DOE login +k12.ia.us +k12.id.us +k12.il.us +k12.in.us +k12.ks.us +k12.ky.us +k12.la.us +k12.ma.us +k12.md.us +k12.me.us +k12.mi.us +k12.mn.us +k12.mo.us +k12.ms.us +k12.mt.us +k12.nc.us +// k12.nd.us - Bug 1028347 - Removed at request of Travis Rosso +k12.ne.us +k12.nh.us +k12.nj.us +k12.nm.us +k12.nv.us +k12.ny.us +k12.oh.us +k12.ok.us +k12.or.us +k12.pa.us +k12.pr.us +// k12.ri.us - Removed at request of Kim Cournoyer +k12.sc.us +// k12.sd.us - Bug 934131 - Removed at request of James Booze +k12.tn.us +k12.tx.us +k12.ut.us +k12.va.us +k12.vi.us +k12.vt.us +k12.wa.us +k12.wi.us +// k12.wv.us - Bug 947705 - Removed at request of Verne Britton +cc.ak.us +lib.ak.us +cc.al.us +lib.al.us +cc.ar.us +lib.ar.us +cc.as.us +lib.as.us +cc.az.us +lib.az.us +cc.ca.us +lib.ca.us +cc.co.us +lib.co.us +cc.ct.us +lib.ct.us +cc.dc.us +lib.dc.us +cc.de.us +cc.fl.us +cc.ga.us +cc.gu.us +cc.hi.us +cc.ia.us +cc.id.us +cc.il.us +cc.in.us +cc.ks.us +cc.ky.us +cc.la.us +cc.ma.us +cc.md.us +cc.me.us +cc.mi.us +cc.mn.us +cc.mo.us +cc.ms.us +cc.mt.us +cc.nc.us +cc.nd.us +cc.ne.us +cc.nh.us +cc.nj.us +cc.nm.us +cc.nv.us +cc.ny.us +cc.oh.us +cc.ok.us +cc.or.us +cc.pa.us +cc.pr.us +cc.ri.us +cc.sc.us +cc.sd.us +cc.tn.us +cc.tx.us +cc.ut.us +cc.va.us +cc.vi.us +cc.vt.us +cc.wa.us +cc.wi.us +cc.wv.us +cc.wy.us +k12.wy.us +// lib.de.us - Issue #243 - Moved to Private section at request of Ed Moore +lib.fl.us +lib.ga.us +lib.gu.us +lib.hi.us +lib.ia.us +lib.id.us +lib.il.us +lib.in.us +lib.ks.us +lib.ky.us +lib.la.us +lib.ma.us +lib.md.us +lib.me.us +lib.mi.us +lib.mn.us +lib.mo.us +lib.ms.us +lib.mt.us +lib.nc.us +lib.nd.us +lib.ne.us +lib.nh.us +lib.nj.us +lib.nm.us +lib.nv.us +lib.ny.us +lib.oh.us +lib.ok.us +lib.or.us +lib.pa.us +lib.pr.us +lib.ri.us +lib.sc.us +lib.sd.us +lib.tn.us +lib.tx.us +lib.ut.us +lib.va.us +lib.vi.us +lib.vt.us +lib.wa.us +lib.wi.us +// lib.wv.us - Bug 941670 - Removed at request of Larry W Arnold +lib.wy.us +// k12.ma.us contains school districts in Massachusetts. The 4LDs are +// managed independently except for private (PVT), charter (CHTR) and +// parochial (PAROCH) schools. Those are delegated directly to the +// 5LD operators. +chtr.k12.ma.us +paroch.k12.ma.us +pvt.k12.ma.us +// Merit Network, Inc. maintains the registry for =~ /(k12|cc|lib).mi.us/ and the following +// see also: https://domreg.merit.edu : domreg@merit.edu +// see also: whois -h whois.domreg.merit.edu help +ann-arbor.mi.us +cog.mi.us +dst.mi.us +eaton.mi.us +gen.mi.us +mus.mi.us +tec.mi.us +washtenaw.mi.us + +// uy : http://www.nic.org.uy/ +uy +com.uy +edu.uy +gub.uy +mil.uy +net.uy +org.uy + +// uz : http://www.reg.uz/ +uz +co.uz +com.uz +net.uz +org.uz + +// va : https://www.iana.org/domains/root/db/va.html +va + +// vc : https://www.iana.org/domains/root/db/vc.html +// Submitted by registry +vc +com.vc +edu.vc +gov.vc +mil.vc +net.vc +org.vc + +// ve : https://registro.nic.ve/ +// Submitted by registry nic@nic.ve and nicve@conatel.gob.ve +ve +arts.ve +bib.ve +co.ve +com.ve +e12.ve +edu.ve +emprende.ve +firm.ve +gob.ve +gov.ve +info.ve +int.ve +mil.ve +net.ve +nom.ve +org.ve +rar.ve +rec.ve +store.ve +tec.ve +web.ve + +// vg : https://www.iana.org/domains/root/db/vg.html +// Confirmed by registry 2025-01-10 +vg +edu.vg + +// vi : https://www.iana.org/domains/root/db/vi.html +vi +co.vi +com.vi +k12.vi +net.vi +org.vi + +// vn : https://www.vnnic.vn/en/domain/cctld-vn +// https://vnnic.vn/sites/default/files/tailieu/vn.cctld.domains.txt +vn +ac.vn +ai.vn +biz.vn +com.vn +edu.vn +gov.vn +health.vn +id.vn +info.vn +int.vn +io.vn +name.vn +net.vn +org.vn +pro.vn + +// vn geographical names +angiang.vn +bacgiang.vn +backan.vn +baclieu.vn +bacninh.vn +baria-vungtau.vn +bentre.vn +binhdinh.vn +binhduong.vn +binhphuoc.vn +binhthuan.vn +camau.vn +cantho.vn +caobang.vn +daklak.vn +daknong.vn +danang.vn +dienbien.vn +dongnai.vn +dongthap.vn +gialai.vn +hagiang.vn +haiduong.vn +haiphong.vn +hanam.vn +hanoi.vn +hatinh.vn +haugiang.vn +hoabinh.vn +hungyen.vn +khanhhoa.vn +kiengiang.vn +kontum.vn +laichau.vn +lamdong.vn +langson.vn +laocai.vn +longan.vn +namdinh.vn +nghean.vn +ninhbinh.vn +ninhthuan.vn +phutho.vn +phuyen.vn +quangbinh.vn +quangnam.vn +quangngai.vn +quangninh.vn +quangtri.vn +soctrang.vn +sonla.vn +tayninh.vn +thaibinh.vn +thainguyen.vn +thanhhoa.vn +thanhphohochiminh.vn +thuathienhue.vn +tiengiang.vn +travinh.vn +tuyenquang.vn +vinhlong.vn +vinhphuc.vn +yenbai.vn + +// vu : https://www.iana.org/domains/root/db/vu.html +// http://www.vunic.vu/ +vu +com.vu +edu.vu +net.vu +org.vu + +// wf : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +wf + +// ws : https://www.iana.org/domains/root/db/ws.html +// http://samoanic.ws/index.dhtml +ws +com.ws +edu.ws +gov.ws +net.ws +org.ws + +// yt : https://www.afnic.fr/wp-media/uploads/2022/12/afnic-naming-policy-2023-01-01.pdf +yt + +// IDN ccTLDs +// When submitting patches, please maintain a sort by ISO 3166 ccTLD, then +// U-label, and follow this format: +// // A-Label ("", [, variant info]) : +// // [sponsoring org] +// U-Label + +// xn--mgbaam7a8h ("Emerat", Arabic) : AE +// http://nic.ae/english/arabicdomain/rules.jsp +امارات + +// xn--y9a3aq ("hye", Armenian) : AM +// ISOC AM (operated by .am Registry) +հայ + +// xn--54b7fta0cc ("Bangla", Bangla) : BD +বাংলা + +// xn--90ae ("bg", Bulgarian) : BG +бг + +// xn--mgbcpq6gpa1a ("albahrain", Arabic) : BH +البحرين + +// xn--90ais ("bel", Belarusian/Russian Cyrillic) : BY +// Operated by .by registry +бел + +// xn--fiqs8s ("Zhongguo/China", Chinese, Simplified) : CN +// CNNIC +// https://www.cnnic.cn/11/192/index.html +中国 + +// xn--fiqz9s ("Zhongguo/China", Chinese, Traditional) : CN +// CNNIC +// https://www.cnnic.com.cn/AU/MediaC/Announcement/201609/t20160905_54470.htm +中國 + +// xn--lgbbat1ad8j ("Algeria/Al Jazair", Arabic) : DZ +الجزائر + +// xn--wgbh1c ("Egypt/Masr", Arabic) : EG +// http://www.dotmasr.eg/ +مصر + +// xn--e1a4c ("eu", Cyrillic) : EU +// https://eurid.eu +ею + +// xn--qxa6a ("eu", Greek) : EU +// https://eurid.eu +ευ + +// xn--mgbah1a3hjkrd ("Mauritania", Arabic) : MR +موريتانيا + +// xn--node ("ge", Georgian Mkhedruli) : GE +გე + +// xn--qxam ("el", Greek) : GR +// Hellenic Ministry of Infrastructure, Transport, and Networks +ελ + +// xn--j6w193g ("Hong Kong", Chinese) : HK +// https://www.hkirc.hk +// Submitted by registry +// https://www.hkirc.hk/content.jsp?id=30#!/34 +香港 +個人.香港 +公司.香港 +政府.香港 +教育.香港 +組織.香港 +網絡.香港 + +// xn--2scrj9c ("Bharat", Kannada) : IN +// India +ಭಾರತ + +// xn--3hcrj9c ("Bharat", Oriya) : IN +// India +ଭାରତ + +// xn--45br5cyl ("Bharatam", Assamese) : IN +// India +ভাৰত + +// xn--h2breg3eve ("Bharatam", Sanskrit) : IN +// India +भारतम् + +// xn--h2brj9c8c ("Bharot", Santali) : IN +// India +भारोत + +// xn--mgbgu82a ("Bharat", Sindhi) : IN +// India +ڀارت + +// xn--rvc1e0am3e ("Bharatam", Malayalam) : IN +// India +ഭാരതം + +// xn--h2brj9c ("Bharat", Devanagari) : IN +// India +भारत + +// xn--mgbbh1a ("Bharat", Kashmiri) : IN +// India +بارت + +// xn--mgbbh1a71e ("Bharat", Arabic) : IN +// India +بھارت + +// xn--fpcrj9c3d ("Bharat", Telugu) : IN +// India +భారత్ + +// xn--gecrj9c ("Bharat", Gujarati) : IN +// India +ભારત + +// xn--s9brj9c ("Bharat", Gurmukhi) : IN +// India +ਭਾਰਤ + +// xn--45brj9c ("Bharat", Bengali) : IN +// India +ভারত + +// xn--xkc2dl3a5ee0h ("India", Tamil) : IN +// India +இந்தியா + +// xn--mgba3a4f16a ("Iran", Persian) : IR +ایران + +// xn--mgba3a4fra ("Iran", Arabic) : IR +ايران + +// xn--mgbtx2b ("Iraq", Arabic) : IQ +// Communications and Media Commission +عراق + +// xn--mgbayh7gpa ("al-Ordon", Arabic) : JO +// National Information Technology Center (NITC) +// Royal Scientific Society, Al-Jubeiha +الاردن + +// xn--3e0b707e ("Republic of Korea", Hangul) : KR +한국 + +// xn--80ao21a ("Kaz", Kazakh) : KZ +қаз + +// xn--q7ce6a ("Lao", Lao) : LA +ລາວ + +// xn--fzc2c9e2c ("Lanka", Sinhalese-Sinhala) : LK +// https://nic.lk +ලංකා + +// xn--xkc2al3hye2a ("Ilangai", Tamil) : LK +// https://nic.lk +இலங்கை + +// xn--mgbc0a9azcg ("Morocco/al-Maghrib", Arabic) : MA +المغرب + +// xn--d1alf ("mkd", Macedonian) : MK +// MARnet +мкд + +// xn--l1acc ("mon", Mongolian) : MN +мон + +// xn--mix891f ("Macao", Chinese, Traditional) : MO +// MONIC / HNET Asia (Registry Operator for .mo) +澳門 + +// xn--mix082f ("Macao", Chinese, Simplified) : MO +澳门 + +// xn--mgbx4cd0ab ("Malaysia", Malay) : MY +مليسيا + +// xn--mgb9awbf ("Oman", Arabic) : OM +عمان + +// xn--mgbai9azgqp6j ("Pakistan", Urdu/Arabic) : PK +پاکستان + +// xn--mgbai9a5eva00b ("Pakistan", Urdu/Arabic, variant) : PK +پاكستان + +// xn--ygbi2ammx ("Falasteen", Arabic) : PS +// The Palestinian National Internet Naming Authority (PNINA) +// http://www.pnina.ps +فلسطين + +// xn--90a3ac ("srb", Cyrillic) : RS +// https://www.rnids.rs/en/domains/national-domains +срб +ак.срб +обр.срб +од.срб +орг.срб +пр.срб +упр.срб + +// xn--p1ai ("rf", Russian-Cyrillic) : RU +// https://cctld.ru/files/pdf/docs/en/rules_ru-rf.pdf +// Submitted by George Georgievsky +рф + +// xn--wgbl6a ("Qatar", Arabic) : QA +// http://www.ict.gov.qa/ +قطر + +// xn--mgberp4a5d4ar ("AlSaudiah", Arabic) : SA +// http://www.nic.net.sa/ +السعودية + +// xn--mgberp4a5d4a87g ("AlSaudiah", Arabic, variant): SA +السعودیة + +// xn--mgbqly7c0a67fbc ("AlSaudiah", Arabic, variant) : SA +السعودیۃ + +// xn--mgbqly7cvafr ("AlSaudiah", Arabic, variant) : SA +السعوديه + +// xn--mgbpl2fh ("sudan", Arabic) : SD +// Operated by .sd registry +سودان + +// xn--yfro4i67o Singapore ("Singapore", Chinese) : SG +新加坡 + +// xn--clchc0ea0b2g2a9gcd ("Singapore", Tamil) : SG +சிங்கப்பூர் + +// xn--ogbpf8fl ("Syria", Arabic) : SY +سورية + +// xn--mgbtf8fl ("Syria", Arabic, variant) : SY +سوريا + +// xn--o3cw4h ("Thai", Thai) : TH +// http://www.thnic.co.th +ไทย +ทหาร.ไทย +ธุรกิจ.ไทย +เน็ต.ไทย +รัฐบาล.ไทย +ศึกษา.ไทย +องค์กร.ไทย + +// xn--pgbs0dh ("Tunisia", Arabic) : TN +// http://nic.tn +تونس + +// xn--kpry57d ("Taiwan", Chinese, Traditional) : TW +// https://twnic.tw/dnservice_catag.php +台灣 + +// xn--kprw13d ("Taiwan", Chinese, Simplified) : TW +// http://www.twnic.net/english/dn/dn_07a.htm +台湾 + +// xn--nnx388a ("Taiwan", Chinese, variant) : TW +臺灣 + +// xn--j1amh ("ukr", Cyrillic) : UA +укр + +// xn--mgb2ddes ("AlYemen", Arabic) : YE +اليمن + +// xxx : http://icmregistry.com +xxx + +// ye : http://www.y.net.ye/services/domain_name.htm +ye +com.ye +edu.ye +gov.ye +mil.ye +net.ye +org.ye + +// za : https://www.iana.org/domains/root/db/za.html +ac.za +agric.za +alt.za +co.za +edu.za +gov.za +grondar.za +law.za +mil.za +net.za +ngo.za +nic.za +nis.za +nom.za +org.za +school.za +tm.za +web.za + +// zm : https://zicta.zm/ +// Submitted by registry +zm +ac.zm +biz.zm +co.zm +com.zm +edu.zm +gov.zm +info.zm +mil.zm +net.zm +org.zm +sch.zm + +// zw : https://www.potraz.gov.zw/ +// Confirmed by registry 2017-01-25 +zw +ac.zw +co.zw +gov.zw +mil.zw +org.zw + +// newGTLDs + +// List of new gTLDs imported from https://www.icann.org/resources/registries/gtlds/v2/gtlds.json on 2025-07-09T15:20:49Z +// This list is auto-generated, don't edit it manually. +// aaa : American Automobile Association, Inc. +// https://www.iana.org/domains/root/db/aaa.html +aaa + +// aarp : AARP +// https://www.iana.org/domains/root/db/aarp.html +aarp + +// abb : ABB Ltd +// https://www.iana.org/domains/root/db/abb.html +abb + +// abbott : Abbott Laboratories, Inc. +// https://www.iana.org/domains/root/db/abbott.html +abbott + +// abbvie : AbbVie Inc. +// https://www.iana.org/domains/root/db/abbvie.html +abbvie + +// abc : Disney Enterprises, Inc. +// https://www.iana.org/domains/root/db/abc.html +abc + +// able : Able Inc. +// https://www.iana.org/domains/root/db/able.html +able + +// abogado : Registry Services, LLC +// https://www.iana.org/domains/root/db/abogado.html +abogado + +// abudhabi : Abu Dhabi Systems and Information Centre +// https://www.iana.org/domains/root/db/abudhabi.html +abudhabi + +// academy : Binky Moon, LLC +// https://www.iana.org/domains/root/db/academy.html +academy + +// accenture : Accenture plc +// https://www.iana.org/domains/root/db/accenture.html +accenture + +// accountant : dot Accountant Limited +// https://www.iana.org/domains/root/db/accountant.html +accountant + +// accountants : Binky Moon, LLC +// https://www.iana.org/domains/root/db/accountants.html +accountants + +// aco : ACO Severin Ahlmann GmbH & Co. KG +// https://www.iana.org/domains/root/db/aco.html +aco + +// actor : Dog Beach, LLC +// https://www.iana.org/domains/root/db/actor.html +actor + +// ads : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/ads.html +ads + +// adult : ICM Registry AD LLC +// https://www.iana.org/domains/root/db/adult.html +adult + +// aeg : Aktiebolaget Electrolux +// https://www.iana.org/domains/root/db/aeg.html +aeg + +// aetna : Aetna Life Insurance Company +// https://www.iana.org/domains/root/db/aetna.html +aetna + +// afl : Australian Football League +// https://www.iana.org/domains/root/db/afl.html +afl + +// africa : ZA Central Registry NPC trading as Registry.Africa +// https://www.iana.org/domains/root/db/africa.html +africa + +// agakhan : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/agakhan.html +agakhan + +// agency : Binky Moon, LLC +// https://www.iana.org/domains/root/db/agency.html +agency + +// aig : American International Group, Inc. +// https://www.iana.org/domains/root/db/aig.html +aig + +// airbus : Airbus S.A.S. +// https://www.iana.org/domains/root/db/airbus.html +airbus + +// airforce : Dog Beach, LLC +// https://www.iana.org/domains/root/db/airforce.html +airforce + +// airtel : Bharti Airtel Limited +// https://www.iana.org/domains/root/db/airtel.html +airtel + +// akdn : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/akdn.html +akdn + +// alibaba : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/alibaba.html +alibaba + +// alipay : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/alipay.html +alipay + +// allfinanz : Allfinanz Deutsche Vermögensberatung Aktiengesellschaft +// https://www.iana.org/domains/root/db/allfinanz.html +allfinanz + +// allstate : Allstate Fire and Casualty Insurance Company +// https://www.iana.org/domains/root/db/allstate.html +allstate + +// ally : Ally Financial Inc. +// https://www.iana.org/domains/root/db/ally.html +ally + +// alsace : Region Grand Est +// https://www.iana.org/domains/root/db/alsace.html +alsace + +// alstom : ALSTOM +// https://www.iana.org/domains/root/db/alstom.html +alstom + +// amazon : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/amazon.html +amazon + +// americanexpress : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/americanexpress.html +americanexpress + +// americanfamily : AmFam, Inc. +// https://www.iana.org/domains/root/db/americanfamily.html +americanfamily + +// amex : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/amex.html +amex + +// amfam : AmFam, Inc. +// https://www.iana.org/domains/root/db/amfam.html +amfam + +// amica : Amica Mutual Insurance Company +// https://www.iana.org/domains/root/db/amica.html +amica + +// amsterdam : Gemeente Amsterdam +// https://www.iana.org/domains/root/db/amsterdam.html +amsterdam + +// analytics : Campus IP LLC +// https://www.iana.org/domains/root/db/analytics.html +analytics + +// android : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/android.html +android + +// anquan : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/anquan.html +anquan + +// anz : Australia and New Zealand Banking Group Limited +// https://www.iana.org/domains/root/db/anz.html +anz + +// aol : Yahoo Inc. +// https://www.iana.org/domains/root/db/aol.html +aol + +// apartments : Binky Moon, LLC +// https://www.iana.org/domains/root/db/apartments.html +apartments + +// app : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/app.html +app + +// apple : Apple Inc. +// https://www.iana.org/domains/root/db/apple.html +apple + +// aquarelle : Aquarelle.com +// https://www.iana.org/domains/root/db/aquarelle.html +aquarelle + +// arab : League of Arab States +// https://www.iana.org/domains/root/db/arab.html +arab + +// aramco : Aramco Services Company +// https://www.iana.org/domains/root/db/aramco.html +aramco + +// archi : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/archi.html +archi + +// army : Dog Beach, LLC +// https://www.iana.org/domains/root/db/army.html +army + +// art : UK Creative Ideas Limited +// https://www.iana.org/domains/root/db/art.html +art + +// arte : Association Relative à la Télévision Européenne G.E.I.E. +// https://www.iana.org/domains/root/db/arte.html +arte + +// asda : Asda Stores Limited +// https://www.iana.org/domains/root/db/asda.html +asda + +// associates : Binky Moon, LLC +// https://www.iana.org/domains/root/db/associates.html +associates + +// athleta : The Gap, Inc. +// https://www.iana.org/domains/root/db/athleta.html +athleta + +// attorney : Dog Beach, LLC +// https://www.iana.org/domains/root/db/attorney.html +attorney + +// auction : Dog Beach, LLC +// https://www.iana.org/domains/root/db/auction.html +auction + +// audi : AUDI Aktiengesellschaft +// https://www.iana.org/domains/root/db/audi.html +audi + +// audible : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/audible.html +audible + +// audio : XYZ.COM LLC +// https://www.iana.org/domains/root/db/audio.html +audio + +// auspost : Australian Postal Corporation +// https://www.iana.org/domains/root/db/auspost.html +auspost + +// author : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/author.html +author + +// auto : XYZ.COM LLC +// https://www.iana.org/domains/root/db/auto.html +auto + +// autos : XYZ.COM LLC +// https://www.iana.org/domains/root/db/autos.html +autos + +// aws : AWS Registry LLC +// https://www.iana.org/domains/root/db/aws.html +aws + +// axa : AXA Group Operations SAS +// https://www.iana.org/domains/root/db/axa.html +axa + +// azure : Microsoft Corporation +// https://www.iana.org/domains/root/db/azure.html +azure + +// baby : XYZ.COM LLC +// https://www.iana.org/domains/root/db/baby.html +baby + +// baidu : Baidu, Inc. +// https://www.iana.org/domains/root/db/baidu.html +baidu + +// banamex : Citigroup Inc. +// https://www.iana.org/domains/root/db/banamex.html +banamex + +// band : Dog Beach, LLC +// https://www.iana.org/domains/root/db/band.html +band + +// bank : fTLD Registry Services LLC +// https://www.iana.org/domains/root/db/bank.html +bank + +// bar : Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable +// https://www.iana.org/domains/root/db/bar.html +bar + +// barcelona : Municipi de Barcelona +// https://www.iana.org/domains/root/db/barcelona.html +barcelona + +// barclaycard : Barclays Bank PLC +// https://www.iana.org/domains/root/db/barclaycard.html +barclaycard + +// barclays : Barclays Bank PLC +// https://www.iana.org/domains/root/db/barclays.html +barclays + +// barefoot : Gallo Vineyards, Inc. +// https://www.iana.org/domains/root/db/barefoot.html +barefoot + +// bargains : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bargains.html +bargains + +// baseball : MLB Advanced Media DH, LLC +// https://www.iana.org/domains/root/db/baseball.html +baseball + +// basketball : Fédération Internationale de Basketball (FIBA) +// https://www.iana.org/domains/root/db/basketball.html +basketball + +// bauhaus : Werkhaus GmbH +// https://www.iana.org/domains/root/db/bauhaus.html +bauhaus + +// bayern : Bayern Connect GmbH +// https://www.iana.org/domains/root/db/bayern.html +bayern + +// bbc : British Broadcasting Corporation +// https://www.iana.org/domains/root/db/bbc.html +bbc + +// bbt : BB&T Corporation +// https://www.iana.org/domains/root/db/bbt.html +bbt + +// bbva : BANCO BILBAO VIZCAYA ARGENTARIA, S.A. +// https://www.iana.org/domains/root/db/bbva.html +bbva + +// bcg : The Boston Consulting Group, Inc. +// https://www.iana.org/domains/root/db/bcg.html +bcg + +// bcn : Municipi de Barcelona +// https://www.iana.org/domains/root/db/bcn.html +bcn + +// beats : Beats Electronics, LLC +// https://www.iana.org/domains/root/db/beats.html +beats + +// beauty : XYZ.COM LLC +// https://www.iana.org/domains/root/db/beauty.html +beauty + +// beer : Registry Services, LLC +// https://www.iana.org/domains/root/db/beer.html +beer + +// berlin : dotBERLIN GmbH & Co. KG +// https://www.iana.org/domains/root/db/berlin.html +berlin + +// best : BestTLD Pty Ltd +// https://www.iana.org/domains/root/db/best.html +best + +// bestbuy : BBY Solutions, Inc. +// https://www.iana.org/domains/root/db/bestbuy.html +bestbuy + +// bet : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/bet.html +bet + +// bharti : Bharti Enterprises (Holding) Private Limited +// https://www.iana.org/domains/root/db/bharti.html +bharti + +// bible : American Bible Society +// https://www.iana.org/domains/root/db/bible.html +bible + +// bid : dot Bid Limited +// https://www.iana.org/domains/root/db/bid.html +bid + +// bike : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bike.html +bike + +// bing : Microsoft Corporation +// https://www.iana.org/domains/root/db/bing.html +bing + +// bingo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/bingo.html +bingo + +// bio : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/bio.html +bio + +// black : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/black.html +black + +// blackfriday : Registry Services, LLC +// https://www.iana.org/domains/root/db/blackfriday.html +blackfriday + +// blockbuster : Dish DBS Corporation +// https://www.iana.org/domains/root/db/blockbuster.html +blockbuster + +// blog : Knock Knock WHOIS There, LLC +// https://www.iana.org/domains/root/db/blog.html +blog + +// bloomberg : Bloomberg IP Holdings LLC +// https://www.iana.org/domains/root/db/bloomberg.html +bloomberg + +// blue : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/blue.html +blue + +// bms : Bristol-Myers Squibb Company +// https://www.iana.org/domains/root/db/bms.html +bms + +// bmw : Bayerische Motoren Werke Aktiengesellschaft +// https://www.iana.org/domains/root/db/bmw.html +bmw + +// bnpparibas : BNP Paribas +// https://www.iana.org/domains/root/db/bnpparibas.html +bnpparibas + +// boats : XYZ.COM LLC +// https://www.iana.org/domains/root/db/boats.html +boats + +// boehringer : Boehringer Ingelheim International GmbH +// https://www.iana.org/domains/root/db/boehringer.html +boehringer + +// bofa : Bank of America Corporation +// https://www.iana.org/domains/root/db/bofa.html +bofa + +// bom : Núcleo de Informação e Coordenação do Ponto BR - NIC.br +// https://www.iana.org/domains/root/db/bom.html +bom + +// bond : ShortDot SA +// https://www.iana.org/domains/root/db/bond.html +bond + +// boo : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/boo.html +boo + +// book : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/book.html +book + +// booking : Booking.com B.V. +// https://www.iana.org/domains/root/db/booking.html +booking + +// bosch : Robert Bosch GMBH +// https://www.iana.org/domains/root/db/bosch.html +bosch + +// bostik : Bostik SA +// https://www.iana.org/domains/root/db/bostik.html +bostik + +// boston : Registry Services, LLC +// https://www.iana.org/domains/root/db/boston.html +boston + +// bot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/bot.html +bot + +// boutique : Binky Moon, LLC +// https://www.iana.org/domains/root/db/boutique.html +boutique + +// box : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/box.html +box + +// bradesco : Banco Bradesco S.A. +// https://www.iana.org/domains/root/db/bradesco.html +bradesco + +// bridgestone : Bridgestone Corporation +// https://www.iana.org/domains/root/db/bridgestone.html +bridgestone + +// broadway : Celebrate Broadway, Inc. +// https://www.iana.org/domains/root/db/broadway.html +broadway + +// broker : Dog Beach, LLC +// https://www.iana.org/domains/root/db/broker.html +broker + +// brother : Brother Industries, Ltd. +// https://www.iana.org/domains/root/db/brother.html +brother + +// brussels : DNS.be vzw +// https://www.iana.org/domains/root/db/brussels.html +brussels + +// build : Plan Bee LLC +// https://www.iana.org/domains/root/db/build.html +build + +// builders : Binky Moon, LLC +// https://www.iana.org/domains/root/db/builders.html +builders + +// business : Binky Moon, LLC +// https://www.iana.org/domains/root/db/business.html +business + +// buy : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/buy.html +buy + +// buzz : DOTSTRATEGY CO. +// https://www.iana.org/domains/root/db/buzz.html +buzz + +// bzh : Association www.bzh +// https://www.iana.org/domains/root/db/bzh.html +bzh + +// cab : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cab.html +cab + +// cafe : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cafe.html +cafe + +// cal : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/cal.html +cal + +// call : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/call.html +call + +// calvinklein : PVH gTLD Holdings LLC +// https://www.iana.org/domains/root/db/calvinklein.html +calvinklein + +// cam : Cam Connecting SARL +// https://www.iana.org/domains/root/db/cam.html +cam + +// camera : Binky Moon, LLC +// https://www.iana.org/domains/root/db/camera.html +camera + +// camp : Binky Moon, LLC +// https://www.iana.org/domains/root/db/camp.html +camp + +// canon : Canon Inc. +// https://www.iana.org/domains/root/db/canon.html +canon + +// capetown : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/capetown.html +capetown + +// capital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/capital.html +capital + +// capitalone : Capital One Financial Corporation +// https://www.iana.org/domains/root/db/capitalone.html +capitalone + +// car : XYZ.COM LLC +// https://www.iana.org/domains/root/db/car.html +car + +// caravan : Caravan International, Inc. +// https://www.iana.org/domains/root/db/caravan.html +caravan + +// cards : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cards.html +cards + +// care : Binky Moon, LLC +// https://www.iana.org/domains/root/db/care.html +care + +// career : dotCareer LLC +// https://www.iana.org/domains/root/db/career.html +career + +// careers : Binky Moon, LLC +// https://www.iana.org/domains/root/db/careers.html +careers + +// cars : XYZ.COM LLC +// https://www.iana.org/domains/root/db/cars.html +cars + +// casa : Registry Services, LLC +// https://www.iana.org/domains/root/db/casa.html +casa + +// case : Digity, LLC +// https://www.iana.org/domains/root/db/case.html +case + +// cash : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cash.html +cash + +// casino : Binky Moon, LLC +// https://www.iana.org/domains/root/db/casino.html +casino + +// catering : Binky Moon, LLC +// https://www.iana.org/domains/root/db/catering.html +catering + +// catholic : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/catholic.html +catholic + +// cba : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/cba.html +cba + +// cbn : The Christian Broadcasting Network, Inc. +// https://www.iana.org/domains/root/db/cbn.html +cbn + +// cbre : CBRE, Inc. +// https://www.iana.org/domains/root/db/cbre.html +cbre + +// center : Binky Moon, LLC +// https://www.iana.org/domains/root/db/center.html +center + +// ceo : XYZ.COM LLC +// https://www.iana.org/domains/root/db/ceo.html +ceo + +// cern : European Organization for Nuclear Research ("CERN") +// https://www.iana.org/domains/root/db/cern.html +cern + +// cfa : CFA Institute +// https://www.iana.org/domains/root/db/cfa.html +cfa + +// cfd : ShortDot SA +// https://www.iana.org/domains/root/db/cfd.html +cfd + +// chanel : Chanel International B.V. +// https://www.iana.org/domains/root/db/chanel.html +chanel + +// channel : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/channel.html +channel + +// charity : Public Interest Registry +// https://www.iana.org/domains/root/db/charity.html +charity + +// chase : JPMorgan Chase Bank, National Association +// https://www.iana.org/domains/root/db/chase.html +chase + +// chat : Binky Moon, LLC +// https://www.iana.org/domains/root/db/chat.html +chat + +// cheap : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cheap.html +cheap + +// chintai : CHINTAI Corporation +// https://www.iana.org/domains/root/db/chintai.html +chintai + +// christmas : XYZ.COM LLC +// https://www.iana.org/domains/root/db/christmas.html +christmas + +// chrome : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/chrome.html +chrome + +// church : Binky Moon, LLC +// https://www.iana.org/domains/root/db/church.html +church + +// cipriani : Hotel Cipriani Srl +// https://www.iana.org/domains/root/db/cipriani.html +cipriani + +// circle : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/circle.html +circle + +// cisco : Cisco Technology, Inc. +// https://www.iana.org/domains/root/db/cisco.html +cisco + +// citadel : Citadel Domain LLC +// https://www.iana.org/domains/root/db/citadel.html +citadel + +// citi : Citigroup Inc. +// https://www.iana.org/domains/root/db/citi.html +citi + +// citic : CITIC Group Corporation +// https://www.iana.org/domains/root/db/citic.html +citic + +// city : Binky Moon, LLC +// https://www.iana.org/domains/root/db/city.html +city + +// claims : Binky Moon, LLC +// https://www.iana.org/domains/root/db/claims.html +claims + +// cleaning : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cleaning.html +cleaning + +// click : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/click.html +click + +// clinic : Binky Moon, LLC +// https://www.iana.org/domains/root/db/clinic.html +clinic + +// clinique : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/clinique.html +clinique + +// clothing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/clothing.html +clothing + +// cloud : Aruba PEC S.p.A. +// https://www.iana.org/domains/root/db/cloud.html +cloud + +// club : Registry Services, LLC +// https://www.iana.org/domains/root/db/club.html +club + +// clubmed : Club Méditerranée S.A. +// https://www.iana.org/domains/root/db/clubmed.html +clubmed + +// coach : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coach.html +coach + +// codes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/codes.html +codes + +// coffee : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coffee.html +coffee + +// college : XYZ.COM LLC +// https://www.iana.org/domains/root/db/college.html +college + +// cologne : dotKoeln GmbH +// https://www.iana.org/domains/root/db/cologne.html +cologne + +// commbank : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/commbank.html +commbank + +// community : Binky Moon, LLC +// https://www.iana.org/domains/root/db/community.html +community + +// company : Binky Moon, LLC +// https://www.iana.org/domains/root/db/company.html +company + +// compare : Registry Services, LLC +// https://www.iana.org/domains/root/db/compare.html +compare + +// computer : Binky Moon, LLC +// https://www.iana.org/domains/root/db/computer.html +computer + +// comsec : VeriSign, Inc. +// https://www.iana.org/domains/root/db/comsec.html +comsec + +// condos : Binky Moon, LLC +// https://www.iana.org/domains/root/db/condos.html +condos + +// construction : Binky Moon, LLC +// https://www.iana.org/domains/root/db/construction.html +construction + +// consulting : Dog Beach, LLC +// https://www.iana.org/domains/root/db/consulting.html +consulting + +// contact : Dog Beach, LLC +// https://www.iana.org/domains/root/db/contact.html +contact + +// contractors : Binky Moon, LLC +// https://www.iana.org/domains/root/db/contractors.html +contractors + +// cooking : Registry Services, LLC +// https://www.iana.org/domains/root/db/cooking.html +cooking + +// cool : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cool.html +cool + +// corsica : Collectivité de Corse +// https://www.iana.org/domains/root/db/corsica.html +corsica + +// country : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/country.html +country + +// coupon : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/coupon.html +coupon + +// coupons : Binky Moon, LLC +// https://www.iana.org/domains/root/db/coupons.html +coupons + +// courses : Registry Services, LLC +// https://www.iana.org/domains/root/db/courses.html +courses + +// cpa : American Institute of Certified Public Accountants +// https://www.iana.org/domains/root/db/cpa.html +cpa + +// credit : Binky Moon, LLC +// https://www.iana.org/domains/root/db/credit.html +credit + +// creditcard : Binky Moon, LLC +// https://www.iana.org/domains/root/db/creditcard.html +creditcard + +// creditunion : DotCooperation LLC +// https://www.iana.org/domains/root/db/creditunion.html +creditunion + +// cricket : dot Cricket Limited +// https://www.iana.org/domains/root/db/cricket.html +cricket + +// crown : Crown Equipment Corporation +// https://www.iana.org/domains/root/db/crown.html +crown + +// crs : Federated Co-operatives Limited +// https://www.iana.org/domains/root/db/crs.html +crs + +// cruise : Viking River Cruises (Bermuda) Ltd. +// https://www.iana.org/domains/root/db/cruise.html +cruise + +// cruises : Binky Moon, LLC +// https://www.iana.org/domains/root/db/cruises.html +cruises + +// cuisinella : SCHMIDT GROUPE S.A.S. +// https://www.iana.org/domains/root/db/cuisinella.html +cuisinella + +// cymru : Nominet UK +// https://www.iana.org/domains/root/db/cymru.html +cymru + +// cyou : ShortDot SA +// https://www.iana.org/domains/root/db/cyou.html +cyou + +// dad : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dad.html +dad + +// dance : Dog Beach, LLC +// https://www.iana.org/domains/root/db/dance.html +dance + +// data : Dish DBS Corporation +// https://www.iana.org/domains/root/db/data.html +data + +// date : dot Date Limited +// https://www.iana.org/domains/root/db/date.html +date + +// dating : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dating.html +dating + +// datsun : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/datsun.html +datsun + +// day : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/day.html +day + +// dclk : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dclk.html +dclk + +// dds : Registry Services, LLC +// https://www.iana.org/domains/root/db/dds.html +dds + +// deal : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/deal.html +deal + +// dealer : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/dealer.html +dealer + +// deals : Binky Moon, LLC +// https://www.iana.org/domains/root/db/deals.html +deals + +// degree : Dog Beach, LLC +// https://www.iana.org/domains/root/db/degree.html +degree + +// delivery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/delivery.html +delivery + +// dell : Dell Inc. +// https://www.iana.org/domains/root/db/dell.html +dell + +// deloitte : Deloitte Touche Tohmatsu +// https://www.iana.org/domains/root/db/deloitte.html +deloitte + +// delta : Delta Air Lines, Inc. +// https://www.iana.org/domains/root/db/delta.html +delta + +// democrat : Dog Beach, LLC +// https://www.iana.org/domains/root/db/democrat.html +democrat + +// dental : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dental.html +dental + +// dentist : Dog Beach, LLC +// https://www.iana.org/domains/root/db/dentist.html +dentist + +// desi +// https://www.iana.org/domains/root/db/desi.html +desi + +// design : Registry Services, LLC +// https://www.iana.org/domains/root/db/design.html +design + +// dev : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/dev.html +dev + +// dhl : Deutsche Post AG +// https://www.iana.org/domains/root/db/dhl.html +dhl + +// diamonds : Binky Moon, LLC +// https://www.iana.org/domains/root/db/diamonds.html +diamonds + +// diet : XYZ.COM LLC +// https://www.iana.org/domains/root/db/diet.html +diet + +// digital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/digital.html +digital + +// direct : Binky Moon, LLC +// https://www.iana.org/domains/root/db/direct.html +direct + +// directory : Binky Moon, LLC +// https://www.iana.org/domains/root/db/directory.html +directory + +// discount : Binky Moon, LLC +// https://www.iana.org/domains/root/db/discount.html +discount + +// discover : Discover Financial Services +// https://www.iana.org/domains/root/db/discover.html +discover + +// dish : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dish.html +dish + +// diy : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/diy.html +diy + +// dnp : Dai Nippon Printing Co., Ltd. +// https://www.iana.org/domains/root/db/dnp.html +dnp + +// docs : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/docs.html +docs + +// doctor : Binky Moon, LLC +// https://www.iana.org/domains/root/db/doctor.html +doctor + +// dog : Binky Moon, LLC +// https://www.iana.org/domains/root/db/dog.html +dog + +// domains : Binky Moon, LLC +// https://www.iana.org/domains/root/db/domains.html +domains + +// dot : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dot.html +dot + +// download : dot Support Limited +// https://www.iana.org/domains/root/db/download.html +download + +// drive : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/drive.html +drive + +// dtv : Dish DBS Corporation +// https://www.iana.org/domains/root/db/dtv.html +dtv + +// dubai : Dubai Smart Government Department +// https://www.iana.org/domains/root/db/dubai.html +dubai + +// dunlop : The Goodyear Tire & Rubber Company +// https://www.iana.org/domains/root/db/dunlop.html +dunlop + +// dupont : DuPont Specialty Products USA, LLC +// https://www.iana.org/domains/root/db/dupont.html +dupont + +// durban : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/durban.html +durban + +// dvag : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/dvag.html +dvag + +// dvr : DISH Technologies L.L.C. +// https://www.iana.org/domains/root/db/dvr.html +dvr + +// earth : Interlink Systems Innovation Institute K.K. +// https://www.iana.org/domains/root/db/earth.html +earth + +// eat : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/eat.html +eat + +// eco : Big Room Inc. +// https://www.iana.org/domains/root/db/eco.html +eco + +// edeka : EDEKA Verband kaufmännischer Genossenschaften e.V. +// https://www.iana.org/domains/root/db/edeka.html +edeka + +// education : Binky Moon, LLC +// https://www.iana.org/domains/root/db/education.html +education + +// email : Binky Moon, LLC +// https://www.iana.org/domains/root/db/email.html +email + +// emerck : Merck KGaA +// https://www.iana.org/domains/root/db/emerck.html +emerck + +// energy : Binky Moon, LLC +// https://www.iana.org/domains/root/db/energy.html +energy + +// engineer : Dog Beach, LLC +// https://www.iana.org/domains/root/db/engineer.html +engineer + +// engineering : Binky Moon, LLC +// https://www.iana.org/domains/root/db/engineering.html +engineering + +// enterprises : Binky Moon, LLC +// https://www.iana.org/domains/root/db/enterprises.html +enterprises + +// epson : Seiko Epson Corporation +// https://www.iana.org/domains/root/db/epson.html +epson + +// equipment : Binky Moon, LLC +// https://www.iana.org/domains/root/db/equipment.html +equipment + +// ericsson : Telefonaktiebolaget L M Ericsson +// https://www.iana.org/domains/root/db/ericsson.html +ericsson + +// erni : ERNI Group Holding AG +// https://www.iana.org/domains/root/db/erni.html +erni + +// esq : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/esq.html +esq + +// estate : Binky Moon, LLC +// https://www.iana.org/domains/root/db/estate.html +estate + +// eurovision : European Broadcasting Union (EBU) +// https://www.iana.org/domains/root/db/eurovision.html +eurovision + +// eus : Puntueus Fundazioa +// https://www.iana.org/domains/root/db/eus.html +eus + +// events : Binky Moon, LLC +// https://www.iana.org/domains/root/db/events.html +events + +// exchange : Binky Moon, LLC +// https://www.iana.org/domains/root/db/exchange.html +exchange + +// expert : Binky Moon, LLC +// https://www.iana.org/domains/root/db/expert.html +expert + +// exposed : Binky Moon, LLC +// https://www.iana.org/domains/root/db/exposed.html +exposed + +// express : Binky Moon, LLC +// https://www.iana.org/domains/root/db/express.html +express + +// extraspace : Extra Space Storage LLC +// https://www.iana.org/domains/root/db/extraspace.html +extraspace + +// fage : Fage International S.A. +// https://www.iana.org/domains/root/db/fage.html +fage + +// fail : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fail.html +fail + +// fairwinds : FairWinds Partners, LLC +// https://www.iana.org/domains/root/db/fairwinds.html +fairwinds + +// faith : dot Faith Limited +// https://www.iana.org/domains/root/db/faith.html +faith + +// family : Dog Beach, LLC +// https://www.iana.org/domains/root/db/family.html +family + +// fan : Dog Beach, LLC +// https://www.iana.org/domains/root/db/fan.html +fan + +// fans : ZDNS International Limited +// https://www.iana.org/domains/root/db/fans.html +fans + +// farm : Binky Moon, LLC +// https://www.iana.org/domains/root/db/farm.html +farm + +// farmers : Farmers Insurance Exchange +// https://www.iana.org/domains/root/db/farmers.html +farmers + +// fashion : Registry Services, LLC +// https://www.iana.org/domains/root/db/fashion.html +fashion + +// fast : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/fast.html +fast + +// fedex : Federal Express Corporation +// https://www.iana.org/domains/root/db/fedex.html +fedex + +// feedback : Top Level Spectrum, Inc. +// https://www.iana.org/domains/root/db/feedback.html +feedback + +// ferrari : Fiat Chrysler Automobiles N.V. +// https://www.iana.org/domains/root/db/ferrari.html +ferrari + +// ferrero : Ferrero Trading Lux S.A. +// https://www.iana.org/domains/root/db/ferrero.html +ferrero + +// fidelity : Fidelity Brokerage Services LLC +// https://www.iana.org/domains/root/db/fidelity.html +fidelity + +// fido : Rogers Communications Canada Inc. +// https://www.iana.org/domains/root/db/fido.html +fido + +// film : Motion Picture Domain Registry Pty Ltd +// https://www.iana.org/domains/root/db/film.html +film + +// final : Núcleo de Informação e Coordenação do Ponto BR - NIC.br +// https://www.iana.org/domains/root/db/final.html +final + +// finance : Binky Moon, LLC +// https://www.iana.org/domains/root/db/finance.html +finance + +// financial : Binky Moon, LLC +// https://www.iana.org/domains/root/db/financial.html +financial + +// fire : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/fire.html +fire + +// firestone : Bridgestone Licensing Services, Inc +// https://www.iana.org/domains/root/db/firestone.html +firestone + +// firmdale : Firmdale Holdings Limited +// https://www.iana.org/domains/root/db/firmdale.html +firmdale + +// fish : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fish.html +fish + +// fishing : Registry Services, LLC +// https://www.iana.org/domains/root/db/fishing.html +fishing + +// fit : Registry Services, LLC +// https://www.iana.org/domains/root/db/fit.html +fit + +// fitness : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fitness.html +fitness + +// flickr : Flickr, Inc. +// https://www.iana.org/domains/root/db/flickr.html +flickr + +// flights : Binky Moon, LLC +// https://www.iana.org/domains/root/db/flights.html +flights + +// flir : FLIR Systems, Inc. +// https://www.iana.org/domains/root/db/flir.html +flir + +// florist : Binky Moon, LLC +// https://www.iana.org/domains/root/db/florist.html +florist + +// flowers : XYZ.COM LLC +// https://www.iana.org/domains/root/db/flowers.html +flowers + +// fly : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/fly.html +fly + +// foo : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/foo.html +foo + +// food : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/food.html +food + +// football : Binky Moon, LLC +// https://www.iana.org/domains/root/db/football.html +football + +// ford : Ford Motor Company +// https://www.iana.org/domains/root/db/ford.html +ford + +// forex : Dog Beach, LLC +// https://www.iana.org/domains/root/db/forex.html +forex + +// forsale : Dog Beach, LLC +// https://www.iana.org/domains/root/db/forsale.html +forsale + +// forum : Waterford Limited +// https://www.iana.org/domains/root/db/forum.html +forum + +// foundation : Public Interest Registry +// https://www.iana.org/domains/root/db/foundation.html +foundation + +// fox : FOX Registry, LLC +// https://www.iana.org/domains/root/db/fox.html +fox + +// free : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/free.html +free + +// fresenius : Fresenius Immobilien-Verwaltungs-GmbH +// https://www.iana.org/domains/root/db/fresenius.html +fresenius + +// frl : FRLregistry B.V. +// https://www.iana.org/domains/root/db/frl.html +frl + +// frogans : OP3FT +// https://www.iana.org/domains/root/db/frogans.html +frogans + +// frontier : Frontier Communications Corporation +// https://www.iana.org/domains/root/db/frontier.html +frontier + +// ftr : Frontier Communications Corporation +// https://www.iana.org/domains/root/db/ftr.html +ftr + +// fujitsu : Fujitsu Limited +// https://www.iana.org/domains/root/db/fujitsu.html +fujitsu + +// fun : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/fun.html +fun + +// fund : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fund.html +fund + +// furniture : Binky Moon, LLC +// https://www.iana.org/domains/root/db/furniture.html +furniture + +// futbol : Dog Beach, LLC +// https://www.iana.org/domains/root/db/futbol.html +futbol + +// fyi : Binky Moon, LLC +// https://www.iana.org/domains/root/db/fyi.html +fyi + +// gal : Asociación puntoGAL +// https://www.iana.org/domains/root/db/gal.html +gal + +// gallery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gallery.html +gallery + +// gallo : Gallo Vineyards, Inc. +// https://www.iana.org/domains/root/db/gallo.html +gallo + +// gallup : Gallup, Inc. +// https://www.iana.org/domains/root/db/gallup.html +gallup + +// game : XYZ.COM LLC +// https://www.iana.org/domains/root/db/game.html +game + +// games : Dog Beach, LLC +// https://www.iana.org/domains/root/db/games.html +games + +// gap : The Gap, Inc. +// https://www.iana.org/domains/root/db/gap.html +gap + +// garden : Registry Services, LLC +// https://www.iana.org/domains/root/db/garden.html +garden + +// gay : Registry Services, LLC +// https://www.iana.org/domains/root/db/gay.html +gay + +// gbiz : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gbiz.html +gbiz + +// gdn : Joint Stock Company "Navigation-information systems" +// https://www.iana.org/domains/root/db/gdn.html +gdn + +// gea : GEA Group Aktiengesellschaft +// https://www.iana.org/domains/root/db/gea.html +gea + +// gent : Easyhost BV +// https://www.iana.org/domains/root/db/gent.html +gent + +// genting : Resorts World Inc Pte. Ltd. +// https://www.iana.org/domains/root/db/genting.html +genting + +// george : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/george.html +george + +// ggee : GMO Internet, Inc. +// https://www.iana.org/domains/root/db/ggee.html +ggee + +// gift : DotGift, LLC +// https://www.iana.org/domains/root/db/gift.html +gift + +// gifts : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gifts.html +gifts + +// gives : Public Interest Registry +// https://www.iana.org/domains/root/db/gives.html +gives + +// giving : Public Interest Registry +// https://www.iana.org/domains/root/db/giving.html +giving + +// glass : Binky Moon, LLC +// https://www.iana.org/domains/root/db/glass.html +glass + +// gle : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gle.html +gle + +// global : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/global.html +global + +// globo : Globo Comunicação e Participações S.A +// https://www.iana.org/domains/root/db/globo.html +globo + +// gmail : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/gmail.html +gmail + +// gmbh : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gmbh.html +gmbh + +// gmo : GMO Internet, Inc. +// https://www.iana.org/domains/root/db/gmo.html +gmo + +// gmx : 1&1 Mail & Media GmbH +// https://www.iana.org/domains/root/db/gmx.html +gmx + +// godaddy : Go Daddy East, LLC +// https://www.iana.org/domains/root/db/godaddy.html +godaddy + +// gold : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gold.html +gold + +// goldpoint : YODOBASHI CAMERA CO.,LTD. +// https://www.iana.org/domains/root/db/goldpoint.html +goldpoint + +// golf : Binky Moon, LLC +// https://www.iana.org/domains/root/db/golf.html +golf + +// goo : NTT DOCOMO, INC. +// https://www.iana.org/domains/root/db/goo.html +goo + +// goodyear : The Goodyear Tire & Rubber Company +// https://www.iana.org/domains/root/db/goodyear.html +goodyear + +// goog : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/goog.html +goog + +// google : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/google.html +google + +// gop : Republican State Leadership Committee, Inc. +// https://www.iana.org/domains/root/db/gop.html +gop + +// got : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/got.html +got + +// grainger : Grainger Registry Services, LLC +// https://www.iana.org/domains/root/db/grainger.html +grainger + +// graphics : Binky Moon, LLC +// https://www.iana.org/domains/root/db/graphics.html +graphics + +// gratis : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gratis.html +gratis + +// green : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/green.html +green + +// gripe : Binky Moon, LLC +// https://www.iana.org/domains/root/db/gripe.html +gripe + +// grocery : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/grocery.html +grocery + +// group : Binky Moon, LLC +// https://www.iana.org/domains/root/db/group.html +group + +// gucci : Guccio Gucci S.p.a. +// https://www.iana.org/domains/root/db/gucci.html +gucci + +// guge : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/guge.html +guge + +// guide : Binky Moon, LLC +// https://www.iana.org/domains/root/db/guide.html +guide + +// guitars : XYZ.COM LLC +// https://www.iana.org/domains/root/db/guitars.html +guitars + +// guru : Binky Moon, LLC +// https://www.iana.org/domains/root/db/guru.html +guru + +// hair : XYZ.COM LLC +// https://www.iana.org/domains/root/db/hair.html +hair + +// hamburg : Hamburg Top-Level-Domain GmbH +// https://www.iana.org/domains/root/db/hamburg.html +hamburg + +// hangout : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/hangout.html +hangout + +// haus : Dog Beach, LLC +// https://www.iana.org/domains/root/db/haus.html +haus + +// hbo : HBO Registry Services, Inc. +// https://www.iana.org/domains/root/db/hbo.html +hbo + +// hdfc : HDFC BANK LIMITED +// https://www.iana.org/domains/root/db/hdfc.html +hdfc + +// hdfcbank : HDFC BANK LIMITED +// https://www.iana.org/domains/root/db/hdfcbank.html +hdfcbank + +// health : Registry Services, LLC +// https://www.iana.org/domains/root/db/health.html +health + +// healthcare : Binky Moon, LLC +// https://www.iana.org/domains/root/db/healthcare.html +healthcare + +// help : Innovation service Limited +// https://www.iana.org/domains/root/db/help.html +help + +// helsinki : City of Helsinki +// https://www.iana.org/domains/root/db/helsinki.html +helsinki + +// here : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/here.html +here + +// hermes : HERMES INTERNATIONAL +// https://www.iana.org/domains/root/db/hermes.html +hermes + +// hiphop : Dot Hip Hop, LLC +// https://www.iana.org/domains/root/db/hiphop.html +hiphop + +// hisamitsu : Hisamitsu Pharmaceutical Co.,Inc. +// https://www.iana.org/domains/root/db/hisamitsu.html +hisamitsu + +// hitachi : Hitachi, Ltd. +// https://www.iana.org/domains/root/db/hitachi.html +hitachi + +// hiv : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/hiv.html +hiv + +// hkt : PCCW-HKT DataCom Services Limited +// https://www.iana.org/domains/root/db/hkt.html +hkt + +// hockey : Binky Moon, LLC +// https://www.iana.org/domains/root/db/hockey.html +hockey + +// holdings : Binky Moon, LLC +// https://www.iana.org/domains/root/db/holdings.html +holdings + +// holiday : Binky Moon, LLC +// https://www.iana.org/domains/root/db/holiday.html +holiday + +// homedepot : Home Depot Product Authority, LLC +// https://www.iana.org/domains/root/db/homedepot.html +homedepot + +// homegoods : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/homegoods.html +homegoods + +// homes : XYZ.COM LLC +// https://www.iana.org/domains/root/db/homes.html +homes + +// homesense : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/homesense.html +homesense + +// honda : Honda Motor Co., Ltd. +// https://www.iana.org/domains/root/db/honda.html +honda + +// horse : Registry Services, LLC +// https://www.iana.org/domains/root/db/horse.html +horse + +// hospital : Binky Moon, LLC +// https://www.iana.org/domains/root/db/hospital.html +hospital + +// host : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/host.html +host + +// hosting : XYZ.COM LLC +// https://www.iana.org/domains/root/db/hosting.html +hosting + +// hot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/hot.html +hot + +// hotel : HOTEL Top-Level-Domain S.a.r.l +// https://www.iana.org/domains/root/db/hotel.html +hotel + +// hotels : Booking.com B.V. +// https://www.iana.org/domains/root/db/hotels.html +hotels + +// hotmail : Microsoft Corporation +// https://www.iana.org/domains/root/db/hotmail.html +hotmail + +// house : Binky Moon, LLC +// https://www.iana.org/domains/root/db/house.html +house + +// how : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/how.html +how + +// hsbc : HSBC Global Services (UK) Limited +// https://www.iana.org/domains/root/db/hsbc.html +hsbc + +// hughes : Hughes Satellite Systems Corporation +// https://www.iana.org/domains/root/db/hughes.html +hughes + +// hyatt : Hyatt GTLD, L.L.C. +// https://www.iana.org/domains/root/db/hyatt.html +hyatt + +// hyundai : Hyundai Motor Company +// https://www.iana.org/domains/root/db/hyundai.html +hyundai + +// ibm : International Business Machines Corporation +// https://www.iana.org/domains/root/db/ibm.html +ibm + +// icbc : Industrial and Commercial Bank of China Limited +// https://www.iana.org/domains/root/db/icbc.html +icbc + +// ice : IntercontinentalExchange, Inc. +// https://www.iana.org/domains/root/db/ice.html +ice + +// icu : ShortDot SA +// https://www.iana.org/domains/root/db/icu.html +icu + +// ieee : IEEE Global LLC +// https://www.iana.org/domains/root/db/ieee.html +ieee + +// ifm : ifm electronic gmbh +// https://www.iana.org/domains/root/db/ifm.html +ifm + +// ikano : Ikano S.A. +// https://www.iana.org/domains/root/db/ikano.html +ikano + +// imamat : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/imamat.html +imamat + +// imdb : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/imdb.html +imdb + +// immo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/immo.html +immo + +// immobilien : Dog Beach, LLC +// https://www.iana.org/domains/root/db/immobilien.html +immobilien + +// inc : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/inc.html +inc + +// industries : Binky Moon, LLC +// https://www.iana.org/domains/root/db/industries.html +industries + +// infiniti : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/infiniti.html +infiniti + +// ing : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/ing.html +ing + +// ink : Registry Services, LLC +// https://www.iana.org/domains/root/db/ink.html +ink + +// institute : Binky Moon, LLC +// https://www.iana.org/domains/root/db/institute.html +institute + +// insurance : fTLD Registry Services LLC +// https://www.iana.org/domains/root/db/insurance.html +insurance + +// insure : Binky Moon, LLC +// https://www.iana.org/domains/root/db/insure.html +insure + +// international : Binky Moon, LLC +// https://www.iana.org/domains/root/db/international.html +international + +// intuit : Intuit Administrative Services, Inc. +// https://www.iana.org/domains/root/db/intuit.html +intuit + +// investments : Binky Moon, LLC +// https://www.iana.org/domains/root/db/investments.html +investments + +// ipiranga : Ipiranga Produtos de Petroleo S.A. +// https://www.iana.org/domains/root/db/ipiranga.html +ipiranga + +// irish : Binky Moon, LLC +// https://www.iana.org/domains/root/db/irish.html +irish + +// ismaili : Fondation Aga Khan (Aga Khan Foundation) +// https://www.iana.org/domains/root/db/ismaili.html +ismaili + +// ist : Istanbul Metropolitan Municipality +// https://www.iana.org/domains/root/db/ist.html +ist + +// istanbul : Istanbul Metropolitan Municipality +// https://www.iana.org/domains/root/db/istanbul.html +istanbul + +// itau : Itau Unibanco Holding S.A. +// https://www.iana.org/domains/root/db/itau.html +itau + +// itv : ITV Services Limited +// https://www.iana.org/domains/root/db/itv.html +itv + +// jaguar : Jaguar Land Rover Ltd +// https://www.iana.org/domains/root/db/jaguar.html +jaguar + +// java : Oracle Corporation +// https://www.iana.org/domains/root/db/java.html +java + +// jcb : JCB Co., Ltd. +// https://www.iana.org/domains/root/db/jcb.html +jcb + +// jeep : FCA US LLC. +// https://www.iana.org/domains/root/db/jeep.html +jeep + +// jetzt : Binky Moon, LLC +// https://www.iana.org/domains/root/db/jetzt.html +jetzt + +// jewelry : Binky Moon, LLC +// https://www.iana.org/domains/root/db/jewelry.html +jewelry + +// jio : Reliance Industries Limited +// https://www.iana.org/domains/root/db/jio.html +jio + +// jll : Jones Lang LaSalle Incorporated +// https://www.iana.org/domains/root/db/jll.html +jll + +// jmp : Matrix IP LLC +// https://www.iana.org/domains/root/db/jmp.html +jmp + +// jnj : Johnson & Johnson Services, Inc. +// https://www.iana.org/domains/root/db/jnj.html +jnj + +// joburg : ZA Central Registry NPC trading as ZA Central Registry +// https://www.iana.org/domains/root/db/joburg.html +joburg + +// jot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/jot.html +jot + +// joy : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/joy.html +joy + +// jpmorgan : JPMorgan Chase Bank, National Association +// https://www.iana.org/domains/root/db/jpmorgan.html +jpmorgan + +// jprs : Japan Registry Services Co., Ltd. +// https://www.iana.org/domains/root/db/jprs.html +jprs + +// juegos : Dog Beach, LLC +// https://www.iana.org/domains/root/db/juegos.html +juegos + +// juniper : JUNIPER NETWORKS, INC. +// https://www.iana.org/domains/root/db/juniper.html +juniper + +// kaufen : Dog Beach, LLC +// https://www.iana.org/domains/root/db/kaufen.html +kaufen + +// kddi : KDDI CORPORATION +// https://www.iana.org/domains/root/db/kddi.html +kddi + +// kerryhotels : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kerryhotels.html +kerryhotels + +// kerryproperties : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kerryproperties.html +kerryproperties + +// kfh : Kuwait Finance House +// https://www.iana.org/domains/root/db/kfh.html +kfh + +// kia : KIA MOTORS CORPORATION +// https://www.iana.org/domains/root/db/kia.html +kia + +// kids : DotKids Foundation Limited +// https://www.iana.org/domains/root/db/kids.html +kids + +// kim : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/kim.html +kim + +// kindle : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/kindle.html +kindle + +// kitchen : Binky Moon, LLC +// https://www.iana.org/domains/root/db/kitchen.html +kitchen + +// kiwi : DOT KIWI LIMITED +// https://www.iana.org/domains/root/db/kiwi.html +kiwi + +// koeln : dotKoeln GmbH +// https://www.iana.org/domains/root/db/koeln.html +koeln + +// komatsu : Komatsu Ltd. +// https://www.iana.org/domains/root/db/komatsu.html +komatsu + +// kosher : Kosher Marketing Assets LLC +// https://www.iana.org/domains/root/db/kosher.html +kosher + +// kpmg : KPMG International Cooperative (KPMG International Genossenschaft) +// https://www.iana.org/domains/root/db/kpmg.html +kpmg + +// kpn : Koninklijke KPN N.V. +// https://www.iana.org/domains/root/db/kpn.html +kpn + +// krd : KRG Department of Information Technology +// https://www.iana.org/domains/root/db/krd.html +krd + +// kred : KredTLD Pty Ltd +// https://www.iana.org/domains/root/db/kred.html +kred + +// kuokgroup : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/kuokgroup.html +kuokgroup + +// kyoto : Academic Institution: Kyoto Jyoho Gakuen +// https://www.iana.org/domains/root/db/kyoto.html +kyoto + +// lacaixa : Fundación Bancaria Caixa d’Estalvis i Pensions de Barcelona, “la Caixa” +// https://www.iana.org/domains/root/db/lacaixa.html +lacaixa + +// lamborghini : Automobili Lamborghini S.p.A. +// https://www.iana.org/domains/root/db/lamborghini.html +lamborghini + +// lamer : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/lamer.html +lamer + +// land : Binky Moon, LLC +// https://www.iana.org/domains/root/db/land.html +land + +// landrover : Jaguar Land Rover Ltd +// https://www.iana.org/domains/root/db/landrover.html +landrover + +// lanxess : LANXESS Corporation +// https://www.iana.org/domains/root/db/lanxess.html +lanxess + +// lasalle : Jones Lang LaSalle Incorporated +// https://www.iana.org/domains/root/db/lasalle.html +lasalle + +// lat : XYZ.COM LLC +// https://www.iana.org/domains/root/db/lat.html +lat + +// latino : Dish DBS Corporation +// https://www.iana.org/domains/root/db/latino.html +latino + +// latrobe : La Trobe University +// https://www.iana.org/domains/root/db/latrobe.html +latrobe + +// law : Registry Services, LLC +// https://www.iana.org/domains/root/db/law.html +law + +// lawyer : Dog Beach, LLC +// https://www.iana.org/domains/root/db/lawyer.html +lawyer + +// lds : IRI Domain Management, LLC +// https://www.iana.org/domains/root/db/lds.html +lds + +// lease : Binky Moon, LLC +// https://www.iana.org/domains/root/db/lease.html +lease + +// leclerc : A.C.D. LEC Association des Centres Distributeurs Edouard Leclerc +// https://www.iana.org/domains/root/db/leclerc.html +leclerc + +// lefrak : LeFrak Organization, Inc. +// https://www.iana.org/domains/root/db/lefrak.html +lefrak + +// legal : Binky Moon, LLC +// https://www.iana.org/domains/root/db/legal.html +legal + +// lego : LEGO Juris A/S +// https://www.iana.org/domains/root/db/lego.html +lego + +// lexus : TOYOTA MOTOR CORPORATION +// https://www.iana.org/domains/root/db/lexus.html +lexus + +// lgbt : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/lgbt.html +lgbt + +// lidl : Schwarz Domains und Services GmbH & Co. KG +// https://www.iana.org/domains/root/db/lidl.html +lidl + +// life : Binky Moon, LLC +// https://www.iana.org/domains/root/db/life.html +life + +// lifeinsurance : American Council of Life Insurers +// https://www.iana.org/domains/root/db/lifeinsurance.html +lifeinsurance + +// lifestyle : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/lifestyle.html +lifestyle + +// lighting : Binky Moon, LLC +// https://www.iana.org/domains/root/db/lighting.html +lighting + +// like : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/like.html +like + +// lilly : Eli Lilly and Company +// https://www.iana.org/domains/root/db/lilly.html +lilly + +// limited : Binky Moon, LLC +// https://www.iana.org/domains/root/db/limited.html +limited + +// limo : Binky Moon, LLC +// https://www.iana.org/domains/root/db/limo.html +limo + +// lincoln : Ford Motor Company +// https://www.iana.org/domains/root/db/lincoln.html +lincoln + +// link : Nova Registry Ltd +// https://www.iana.org/domains/root/db/link.html +link + +// live : Dog Beach, LLC +// https://www.iana.org/domains/root/db/live.html +live + +// living : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/living.html +living + +// llc : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/llc.html +llc + +// llp : Intercap Registry Inc. +// https://www.iana.org/domains/root/db/llp.html +llp + +// loan : dot Loan Limited +// https://www.iana.org/domains/root/db/loan.html +loan + +// loans : Binky Moon, LLC +// https://www.iana.org/domains/root/db/loans.html +loans + +// locker : Orange Domains LLC +// https://www.iana.org/domains/root/db/locker.html +locker + +// locus : Locus Analytics LLC +// https://www.iana.org/domains/root/db/locus.html +locus + +// lol : XYZ.COM LLC +// https://www.iana.org/domains/root/db/lol.html +lol + +// london : Dot London Domains Limited +// https://www.iana.org/domains/root/db/london.html +london + +// lotte : Lotte Holdings Co., Ltd. +// https://www.iana.org/domains/root/db/lotte.html +lotte + +// lotto : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/lotto.html +lotto + +// love : Waterford Limited +// https://www.iana.org/domains/root/db/love.html +love + +// lpl : LPL Holdings, Inc. +// https://www.iana.org/domains/root/db/lpl.html +lpl + +// lplfinancial : LPL Holdings, Inc. +// https://www.iana.org/domains/root/db/lplfinancial.html +lplfinancial + +// ltd : Binky Moon, LLC +// https://www.iana.org/domains/root/db/ltd.html +ltd + +// ltda : InterNetX, Corp +// https://www.iana.org/domains/root/db/ltda.html +ltda + +// lundbeck : H. Lundbeck A/S +// https://www.iana.org/domains/root/db/lundbeck.html +lundbeck + +// luxe : Registry Services, LLC +// https://www.iana.org/domains/root/db/luxe.html +luxe + +// luxury : Luxury Partners, LLC +// https://www.iana.org/domains/root/db/luxury.html +luxury + +// madrid : Comunidad de Madrid +// https://www.iana.org/domains/root/db/madrid.html +madrid + +// maif : Mutuelle Assurance Instituteur France (MAIF) +// https://www.iana.org/domains/root/db/maif.html +maif + +// maison : Binky Moon, LLC +// https://www.iana.org/domains/root/db/maison.html +maison + +// makeup : XYZ.COM LLC +// https://www.iana.org/domains/root/db/makeup.html +makeup + +// man : MAN Truck & Bus SE +// https://www.iana.org/domains/root/db/man.html +man + +// management : Binky Moon, LLC +// https://www.iana.org/domains/root/db/management.html +management + +// mango : PUNTO FA S.L. +// https://www.iana.org/domains/root/db/mango.html +mango + +// map : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/map.html +map + +// market : Dog Beach, LLC +// https://www.iana.org/domains/root/db/market.html +market + +// marketing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/marketing.html +marketing + +// markets : Dog Beach, LLC +// https://www.iana.org/domains/root/db/markets.html +markets + +// marriott : Marriott Worldwide Corporation +// https://www.iana.org/domains/root/db/marriott.html +marriott + +// marshalls : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/marshalls.html +marshalls + +// mattel : Mattel IT Services, Inc. +// https://www.iana.org/domains/root/db/mattel.html +mattel + +// mba : Binky Moon, LLC +// https://www.iana.org/domains/root/db/mba.html +mba + +// mckinsey : McKinsey Holdings, Inc. +// https://www.iana.org/domains/root/db/mckinsey.html +mckinsey + +// med : Medistry LLC +// https://www.iana.org/domains/root/db/med.html +med + +// media : Binky Moon, LLC +// https://www.iana.org/domains/root/db/media.html +media + +// meet : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/meet.html +meet + +// melbourne : The Crown in right of the State of Victoria, represented by its Department of State Development, Business and Innovation +// https://www.iana.org/domains/root/db/melbourne.html +melbourne + +// meme : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/meme.html +meme + +// memorial : Dog Beach, LLC +// https://www.iana.org/domains/root/db/memorial.html +memorial + +// men : Exclusive Registry Limited +// https://www.iana.org/domains/root/db/men.html +men + +// menu : Dot Menu Registry, LLC +// https://www.iana.org/domains/root/db/menu.html +menu + +// merck : Merck Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/merck.html +merck + +// merckmsd : MSD Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/merckmsd.html +merckmsd + +// miami : Registry Services, LLC +// https://www.iana.org/domains/root/db/miami.html +miami + +// microsoft : Microsoft Corporation +// https://www.iana.org/domains/root/db/microsoft.html +microsoft + +// mini : Bayerische Motoren Werke Aktiengesellschaft +// https://www.iana.org/domains/root/db/mini.html +mini + +// mint : Intuit Administrative Services, Inc. +// https://www.iana.org/domains/root/db/mint.html +mint + +// mit : Massachusetts Institute of Technology +// https://www.iana.org/domains/root/db/mit.html +mit + +// mitsubishi : Mitsubishi Corporation +// https://www.iana.org/domains/root/db/mitsubishi.html +mitsubishi + +// mlb : MLB Advanced Media DH, LLC +// https://www.iana.org/domains/root/db/mlb.html +mlb + +// mls : The Canadian Real Estate Association +// https://www.iana.org/domains/root/db/mls.html +mls + +// mma : MMA IARD +// https://www.iana.org/domains/root/db/mma.html +mma + +// mobile : Dish DBS Corporation +// https://www.iana.org/domains/root/db/mobile.html +mobile + +// moda : Dog Beach, LLC +// https://www.iana.org/domains/root/db/moda.html +moda + +// moe : Interlink Systems Innovation Institute K.K. +// https://www.iana.org/domains/root/db/moe.html +moe + +// moi : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/moi.html +moi + +// mom : XYZ.COM LLC +// https://www.iana.org/domains/root/db/mom.html +mom + +// monash : Monash University +// https://www.iana.org/domains/root/db/monash.html +monash + +// money : Binky Moon, LLC +// https://www.iana.org/domains/root/db/money.html +money + +// monster : XYZ.COM LLC +// https://www.iana.org/domains/root/db/monster.html +monster + +// mormon : IRI Domain Management, LLC +// https://www.iana.org/domains/root/db/mormon.html +mormon + +// mortgage : Dog Beach, LLC +// https://www.iana.org/domains/root/db/mortgage.html +mortgage + +// moscow : Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID) +// https://www.iana.org/domains/root/db/moscow.html +moscow + +// moto : Motorola Trademark Holdings, LLC +// https://www.iana.org/domains/root/db/moto.html +moto + +// motorcycles : XYZ.COM LLC +// https://www.iana.org/domains/root/db/motorcycles.html +motorcycles + +// mov : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/mov.html +mov + +// movie : Binky Moon, LLC +// https://www.iana.org/domains/root/db/movie.html +movie + +// msd : MSD Registry Holdings, Inc. +// https://www.iana.org/domains/root/db/msd.html +msd + +// mtn : MTN Dubai Limited +// https://www.iana.org/domains/root/db/mtn.html +mtn + +// mtr : MTR Corporation Limited +// https://www.iana.org/domains/root/db/mtr.html +mtr + +// music : DotMusic Limited +// https://www.iana.org/domains/root/db/music.html +music + +// nab : National Australia Bank Limited +// https://www.iana.org/domains/root/db/nab.html +nab + +// nagoya : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/nagoya.html +nagoya + +// navy : Dog Beach, LLC +// https://www.iana.org/domains/root/db/navy.html +navy + +// nba : NBA REGISTRY, LLC +// https://www.iana.org/domains/root/db/nba.html +nba + +// nec : NEC Corporation +// https://www.iana.org/domains/root/db/nec.html +nec + +// netbank : COMMONWEALTH BANK OF AUSTRALIA +// https://www.iana.org/domains/root/db/netbank.html +netbank + +// netflix : Netflix, Inc. +// https://www.iana.org/domains/root/db/netflix.html +netflix + +// network : Binky Moon, LLC +// https://www.iana.org/domains/root/db/network.html +network + +// neustar : NeuStar, Inc. +// https://www.iana.org/domains/root/db/neustar.html +neustar + +// new : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/new.html +new + +// news : Dog Beach, LLC +// https://www.iana.org/domains/root/db/news.html +news + +// next : Next plc +// https://www.iana.org/domains/root/db/next.html +next + +// nextdirect : Next plc +// https://www.iana.org/domains/root/db/nextdirect.html +nextdirect + +// nexus : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/nexus.html +nexus + +// nfl : NFL Reg Ops LLC +// https://www.iana.org/domains/root/db/nfl.html +nfl + +// ngo : Public Interest Registry +// https://www.iana.org/domains/root/db/ngo.html +ngo + +// nhk : Japan Broadcasting Corporation (NHK) +// https://www.iana.org/domains/root/db/nhk.html +nhk + +// nico : DWANGO Co., Ltd. +// https://www.iana.org/domains/root/db/nico.html +nico + +// nike : NIKE, Inc. +// https://www.iana.org/domains/root/db/nike.html +nike + +// nikon : NIKON CORPORATION +// https://www.iana.org/domains/root/db/nikon.html +nikon + +// ninja : Dog Beach, LLC +// https://www.iana.org/domains/root/db/ninja.html +ninja + +// nissan : NISSAN MOTOR CO., LTD. +// https://www.iana.org/domains/root/db/nissan.html +nissan + +// nissay : Nippon Life Insurance Company +// https://www.iana.org/domains/root/db/nissay.html +nissay + +// nokia : Nokia Corporation +// https://www.iana.org/domains/root/db/nokia.html +nokia + +// norton : Gen Digital Inc. +// https://www.iana.org/domains/root/db/norton.html +norton + +// now : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/now.html +now + +// nowruz +// https://www.iana.org/domains/root/db/nowruz.html +nowruz + +// nowtv : Starbucks (HK) Limited +// https://www.iana.org/domains/root/db/nowtv.html +nowtv + +// nra : NRA Holdings Company, INC. +// https://www.iana.org/domains/root/db/nra.html +nra + +// nrw : Minds + Machines GmbH +// https://www.iana.org/domains/root/db/nrw.html +nrw + +// ntt : NIPPON TELEGRAPH AND TELEPHONE CORPORATION +// https://www.iana.org/domains/root/db/ntt.html +ntt + +// nyc : The City of New York by and through the New York City Department of Information Technology & Telecommunications +// https://www.iana.org/domains/root/db/nyc.html +nyc + +// obi : OBI Group Holding SE & Co. KGaA +// https://www.iana.org/domains/root/db/obi.html +obi + +// observer : Fegistry, LLC +// https://www.iana.org/domains/root/db/observer.html +observer + +// office : Microsoft Corporation +// https://www.iana.org/domains/root/db/office.html +office + +// okinawa : BRregistry, Inc. +// https://www.iana.org/domains/root/db/okinawa.html +okinawa + +// olayan : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/olayan.html +olayan + +// olayangroup : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/olayangroup.html +olayangroup + +// ollo : Dish DBS Corporation +// https://www.iana.org/domains/root/db/ollo.html +ollo + +// omega : The Swatch Group Ltd +// https://www.iana.org/domains/root/db/omega.html +omega + +// one : One.com A/S +// https://www.iana.org/domains/root/db/one.html +one + +// ong : Public Interest Registry +// https://www.iana.org/domains/root/db/ong.html +ong + +// onl : iRegistry GmbH +// https://www.iana.org/domains/root/db/onl.html +onl + +// online : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/online.html +online + +// ooo : INFIBEAM AVENUES LIMITED +// https://www.iana.org/domains/root/db/ooo.html +ooo + +// open : American Express Travel Related Services Company, Inc. +// https://www.iana.org/domains/root/db/open.html +open + +// oracle : Oracle Corporation +// https://www.iana.org/domains/root/db/oracle.html +oracle + +// orange : Orange Brand Services Limited +// https://www.iana.org/domains/root/db/orange.html +orange + +// organic : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/organic.html +organic + +// origins : The Estée Lauder Companies Inc. +// https://www.iana.org/domains/root/db/origins.html +origins + +// osaka : Osaka Registry Co., Ltd. +// https://www.iana.org/domains/root/db/osaka.html +osaka + +// otsuka : Otsuka Holdings Co., Ltd. +// https://www.iana.org/domains/root/db/otsuka.html +otsuka + +// ott : Dish DBS Corporation +// https://www.iana.org/domains/root/db/ott.html +ott + +// ovh : MédiaBC +// https://www.iana.org/domains/root/db/ovh.html +ovh + +// page : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/page.html +page + +// panasonic : Panasonic Holdings Corporation +// https://www.iana.org/domains/root/db/panasonic.html +panasonic + +// paris : City of Paris +// https://www.iana.org/domains/root/db/paris.html +paris + +// pars +// https://www.iana.org/domains/root/db/pars.html +pars + +// partners : Binky Moon, LLC +// https://www.iana.org/domains/root/db/partners.html +partners + +// parts : Binky Moon, LLC +// https://www.iana.org/domains/root/db/parts.html +parts + +// party : Blue Sky Registry Limited +// https://www.iana.org/domains/root/db/party.html +party + +// pay : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/pay.html +pay + +// pccw : PCCW Enterprises Limited +// https://www.iana.org/domains/root/db/pccw.html +pccw + +// pet : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/pet.html +pet + +// pfizer : Pfizer Inc. +// https://www.iana.org/domains/root/db/pfizer.html +pfizer + +// pharmacy : National Association of Boards of Pharmacy +// https://www.iana.org/domains/root/db/pharmacy.html +pharmacy + +// phd : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/phd.html +phd + +// philips : Koninklijke Philips N.V. +// https://www.iana.org/domains/root/db/philips.html +philips + +// phone : Dish DBS Corporation +// https://www.iana.org/domains/root/db/phone.html +phone + +// photo : Registry Services, LLC +// https://www.iana.org/domains/root/db/photo.html +photo + +// photography : Binky Moon, LLC +// https://www.iana.org/domains/root/db/photography.html +photography + +// photos : Binky Moon, LLC +// https://www.iana.org/domains/root/db/photos.html +photos + +// physio : PhysBiz Pty Ltd +// https://www.iana.org/domains/root/db/physio.html +physio + +// pics : XYZ.COM LLC +// https://www.iana.org/domains/root/db/pics.html +pics + +// pictet : Banque Pictet & Cie SA +// https://www.iana.org/domains/root/db/pictet.html +pictet + +// pictures : Binky Moon, LLC +// https://www.iana.org/domains/root/db/pictures.html +pictures + +// pid : Top Level Spectrum, Inc. +// https://www.iana.org/domains/root/db/pid.html +pid + +// pin : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/pin.html +pin + +// ping : Ping Registry Provider, Inc. +// https://www.iana.org/domains/root/db/ping.html +ping + +// pink : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/pink.html +pink + +// pioneer : Pioneer Corporation +// https://www.iana.org/domains/root/db/pioneer.html +pioneer + +// pizza : Binky Moon, LLC +// https://www.iana.org/domains/root/db/pizza.html +pizza + +// place : Binky Moon, LLC +// https://www.iana.org/domains/root/db/place.html +place + +// play : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/play.html +play + +// playstation : Sony Interactive Entertainment Inc. +// https://www.iana.org/domains/root/db/playstation.html +playstation + +// plumbing : Binky Moon, LLC +// https://www.iana.org/domains/root/db/plumbing.html +plumbing + +// plus : Binky Moon, LLC +// https://www.iana.org/domains/root/db/plus.html +plus + +// pnc : PNC Domain Co., LLC +// https://www.iana.org/domains/root/db/pnc.html +pnc + +// pohl : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/pohl.html +pohl + +// poker : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/poker.html +poker + +// politie : Politie Nederland +// https://www.iana.org/domains/root/db/politie.html +politie + +// porn : ICM Registry PN LLC +// https://www.iana.org/domains/root/db/porn.html +porn + +// praxi : Praxi S.p.A. +// https://www.iana.org/domains/root/db/praxi.html +praxi + +// press : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/press.html +press + +// prime : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/prime.html +prime + +// prod : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/prod.html +prod + +// productions : Binky Moon, LLC +// https://www.iana.org/domains/root/db/productions.html +productions + +// prof : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/prof.html +prof + +// progressive : Progressive Casualty Insurance Company +// https://www.iana.org/domains/root/db/progressive.html +progressive + +// promo : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/promo.html +promo + +// properties : Binky Moon, LLC +// https://www.iana.org/domains/root/db/properties.html +properties + +// property : Digital Property Infrastructure Limited +// https://www.iana.org/domains/root/db/property.html +property + +// protection : XYZ.COM LLC +// https://www.iana.org/domains/root/db/protection.html +protection + +// pru : Prudential Financial, Inc. +// https://www.iana.org/domains/root/db/pru.html +pru + +// prudential : Prudential Financial, Inc. +// https://www.iana.org/domains/root/db/prudential.html +prudential + +// pub : Dog Beach, LLC +// https://www.iana.org/domains/root/db/pub.html +pub + +// pwc : PricewaterhouseCoopers LLP +// https://www.iana.org/domains/root/db/pwc.html +pwc + +// qpon : dotQPON LLC +// https://www.iana.org/domains/root/db/qpon.html +qpon + +// quebec : PointQuébec Inc +// https://www.iana.org/domains/root/db/quebec.html +quebec + +// quest : XYZ.COM LLC +// https://www.iana.org/domains/root/db/quest.html +quest + +// racing : Premier Registry Limited +// https://www.iana.org/domains/root/db/racing.html +racing + +// radio : European Broadcasting Union (EBU) +// https://www.iana.org/domains/root/db/radio.html +radio + +// read : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/read.html +read + +// realestate : dotRealEstate LLC +// https://www.iana.org/domains/root/db/realestate.html +realestate + +// realtor : Real Estate Domains LLC +// https://www.iana.org/domains/root/db/realtor.html +realtor + +// realty : Waterford Limited +// https://www.iana.org/domains/root/db/realty.html +realty + +// recipes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/recipes.html +recipes + +// red : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/red.html +red + +// redstone : Redstone Haute Couture Co., Ltd. +// https://www.iana.org/domains/root/db/redstone.html +redstone + +// redumbrella : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/redumbrella.html +redumbrella + +// rehab : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rehab.html +rehab + +// reise : Binky Moon, LLC +// https://www.iana.org/domains/root/db/reise.html +reise + +// reisen : Binky Moon, LLC +// https://www.iana.org/domains/root/db/reisen.html +reisen + +// reit : National Association of Real Estate Investment Trusts, Inc. +// https://www.iana.org/domains/root/db/reit.html +reit + +// reliance : Reliance Industries Limited +// https://www.iana.org/domains/root/db/reliance.html +reliance + +// ren : ZDNS International Limited +// https://www.iana.org/domains/root/db/ren.html +ren + +// rent : XYZ.COM LLC +// https://www.iana.org/domains/root/db/rent.html +rent + +// rentals : Binky Moon, LLC +// https://www.iana.org/domains/root/db/rentals.html +rentals + +// repair : Binky Moon, LLC +// https://www.iana.org/domains/root/db/repair.html +repair + +// report : Binky Moon, LLC +// https://www.iana.org/domains/root/db/report.html +report + +// republican : Dog Beach, LLC +// https://www.iana.org/domains/root/db/republican.html +republican + +// rest : Punto 2012 Sociedad Anonima Promotora de Inversion de Capital Variable +// https://www.iana.org/domains/root/db/rest.html +rest + +// restaurant : Binky Moon, LLC +// https://www.iana.org/domains/root/db/restaurant.html +restaurant + +// review : dot Review Limited +// https://www.iana.org/domains/root/db/review.html +review + +// reviews : Dog Beach, LLC +// https://www.iana.org/domains/root/db/reviews.html +reviews + +// rexroth : Robert Bosch GMBH +// https://www.iana.org/domains/root/db/rexroth.html +rexroth + +// rich : iRegistry GmbH +// https://www.iana.org/domains/root/db/rich.html +rich + +// richardli : Pacific Century Asset Management (HK) Limited +// https://www.iana.org/domains/root/db/richardli.html +richardli + +// ricoh : Ricoh Company, Ltd. +// https://www.iana.org/domains/root/db/ricoh.html +ricoh + +// ril : Reliance Industries Limited +// https://www.iana.org/domains/root/db/ril.html +ril + +// rio : Empresa Municipal de Informática SA - IPLANRIO +// https://www.iana.org/domains/root/db/rio.html +rio + +// rip : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rip.html +rip + +// rocks : Dog Beach, LLC +// https://www.iana.org/domains/root/db/rocks.html +rocks + +// rodeo : Registry Services, LLC +// https://www.iana.org/domains/root/db/rodeo.html +rodeo + +// rogers : Rogers Communications Canada Inc. +// https://www.iana.org/domains/root/db/rogers.html +rogers + +// room : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/room.html +room + +// rsvp : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/rsvp.html +rsvp + +// rugby : World Rugby Strategic Developments Limited +// https://www.iana.org/domains/root/db/rugby.html +rugby + +// ruhr : dotSaarland GmbH +// https://www.iana.org/domains/root/db/ruhr.html +ruhr + +// run : Binky Moon, LLC +// https://www.iana.org/domains/root/db/run.html +run + +// rwe : RWE AG +// https://www.iana.org/domains/root/db/rwe.html +rwe + +// ryukyu : BRregistry, Inc. +// https://www.iana.org/domains/root/db/ryukyu.html +ryukyu + +// saarland : dotSaarland GmbH +// https://www.iana.org/domains/root/db/saarland.html +saarland + +// safe : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/safe.html +safe + +// safety : Safety Registry Services, LLC. +// https://www.iana.org/domains/root/db/safety.html +safety + +// sakura : SAKURA Internet Inc. +// https://www.iana.org/domains/root/db/sakura.html +sakura + +// sale : Dog Beach, LLC +// https://www.iana.org/domains/root/db/sale.html +sale + +// salon : Binky Moon, LLC +// https://www.iana.org/domains/root/db/salon.html +salon + +// samsclub : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/samsclub.html +samsclub + +// samsung : SAMSUNG SDS CO., LTD +// https://www.iana.org/domains/root/db/samsung.html +samsung + +// sandvik : Sandvik AB +// https://www.iana.org/domains/root/db/sandvik.html +sandvik + +// sandvikcoromant : Sandvik AB +// https://www.iana.org/domains/root/db/sandvikcoromant.html +sandvikcoromant + +// sanofi : Sanofi +// https://www.iana.org/domains/root/db/sanofi.html +sanofi + +// sap : SAP AG +// https://www.iana.org/domains/root/db/sap.html +sap + +// sarl : Binky Moon, LLC +// https://www.iana.org/domains/root/db/sarl.html +sarl + +// sas : Research IP LLC +// https://www.iana.org/domains/root/db/sas.html +sas + +// save : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/save.html +save + +// saxo : Saxo Bank A/S +// https://www.iana.org/domains/root/db/saxo.html +saxo + +// sbi : STATE BANK OF INDIA +// https://www.iana.org/domains/root/db/sbi.html +sbi + +// sbs : ShortDot SA +// https://www.iana.org/domains/root/db/sbs.html +sbs + +// scb : The Siam Commercial Bank Public Company Limited ("SCB") +// https://www.iana.org/domains/root/db/scb.html +scb + +// schaeffler : Schaeffler Technologies AG & Co. KG +// https://www.iana.org/domains/root/db/schaeffler.html +schaeffler + +// schmidt : SCHMIDT GROUPE S.A.S. +// https://www.iana.org/domains/root/db/schmidt.html +schmidt + +// scholarships : Scholarships.com, LLC +// https://www.iana.org/domains/root/db/scholarships.html +scholarships + +// school : Binky Moon, LLC +// https://www.iana.org/domains/root/db/school.html +school + +// schule : Binky Moon, LLC +// https://www.iana.org/domains/root/db/schule.html +schule + +// schwarz : Schwarz Domains und Services GmbH & Co. KG +// https://www.iana.org/domains/root/db/schwarz.html +schwarz + +// science : dot Science Limited +// https://www.iana.org/domains/root/db/science.html +science + +// scot : Dot Scot Registry Limited +// https://www.iana.org/domains/root/db/scot.html +scot + +// search : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/search.html +search + +// seat : SEAT, S.A. (Sociedad Unipersonal) +// https://www.iana.org/domains/root/db/seat.html +seat + +// secure : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/secure.html +secure + +// security : XYZ.COM LLC +// https://www.iana.org/domains/root/db/security.html +security + +// seek : Seek Limited +// https://www.iana.org/domains/root/db/seek.html +seek + +// select : Registry Services, LLC +// https://www.iana.org/domains/root/db/select.html +select + +// sener : Sener Ingeniería y Sistemas, S.A. +// https://www.iana.org/domains/root/db/sener.html +sener + +// services : Binky Moon, LLC +// https://www.iana.org/domains/root/db/services.html +services + +// seven : Seven West Media Ltd +// https://www.iana.org/domains/root/db/seven.html +seven + +// sew : SEW-EURODRIVE GmbH & Co KG +// https://www.iana.org/domains/root/db/sew.html +sew + +// sex : ICM Registry SX LLC +// https://www.iana.org/domains/root/db/sex.html +sex + +// sexy : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/sexy.html +sexy + +// sfr : Societe Francaise du Radiotelephone - SFR +// https://www.iana.org/domains/root/db/sfr.html +sfr + +// shangrila : Shangri‐La International Hotel Management Limited +// https://www.iana.org/domains/root/db/shangrila.html +shangrila + +// sharp : Sharp Corporation +// https://www.iana.org/domains/root/db/sharp.html +sharp + +// shell : Shell Information Technology International Inc +// https://www.iana.org/domains/root/db/shell.html +shell + +// shia +// https://www.iana.org/domains/root/db/shia.html +shia + +// shiksha : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/shiksha.html +shiksha + +// shoes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/shoes.html +shoes + +// shop : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/shop.html +shop + +// shopping : Binky Moon, LLC +// https://www.iana.org/domains/root/db/shopping.html +shopping + +// shouji : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/shouji.html +shouji + +// show : Binky Moon, LLC +// https://www.iana.org/domains/root/db/show.html +show + +// silk : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/silk.html +silk + +// sina : Sina Corporation +// https://www.iana.org/domains/root/db/sina.html +sina + +// singles : Binky Moon, LLC +// https://www.iana.org/domains/root/db/singles.html +singles + +// site : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/site.html +site + +// ski : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/ski.html +ski + +// skin : XYZ.COM LLC +// https://www.iana.org/domains/root/db/skin.html +skin + +// sky : Sky UK Limited +// https://www.iana.org/domains/root/db/sky.html +sky + +// skype : Microsoft Corporation +// https://www.iana.org/domains/root/db/skype.html +skype + +// sling : DISH Technologies L.L.C. +// https://www.iana.org/domains/root/db/sling.html +sling + +// smart : Smart Communications, Inc. (SMART) +// https://www.iana.org/domains/root/db/smart.html +smart + +// smile : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/smile.html +smile + +// sncf : Société Nationale SNCF +// https://www.iana.org/domains/root/db/sncf.html +sncf + +// soccer : Binky Moon, LLC +// https://www.iana.org/domains/root/db/soccer.html +soccer + +// social : Dog Beach, LLC +// https://www.iana.org/domains/root/db/social.html +social + +// softbank : SoftBank Group Corp. +// https://www.iana.org/domains/root/db/softbank.html +softbank + +// software : Dog Beach, LLC +// https://www.iana.org/domains/root/db/software.html +software + +// sohu : Sohu.com Limited +// https://www.iana.org/domains/root/db/sohu.html +sohu + +// solar : Binky Moon, LLC +// https://www.iana.org/domains/root/db/solar.html +solar + +// solutions : Binky Moon, LLC +// https://www.iana.org/domains/root/db/solutions.html +solutions + +// song : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/song.html +song + +// sony : Sony Corporation +// https://www.iana.org/domains/root/db/sony.html +sony + +// soy : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/soy.html +soy + +// spa : Asia Spa and Wellness Promotion Council Limited +// https://www.iana.org/domains/root/db/spa.html +spa + +// space : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/space.html +space + +// sport : SportAccord +// https://www.iana.org/domains/root/db/sport.html +sport + +// spot : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/spot.html +spot + +// srl : InterNetX, Corp +// https://www.iana.org/domains/root/db/srl.html +srl + +// stada : STADA Arzneimittel AG +// https://www.iana.org/domains/root/db/stada.html +stada + +// staples : Staples, Inc. +// https://www.iana.org/domains/root/db/staples.html +staples + +// star : Star India Private Limited +// https://www.iana.org/domains/root/db/star.html +star + +// statebank : STATE BANK OF INDIA +// https://www.iana.org/domains/root/db/statebank.html +statebank + +// statefarm : State Farm Mutual Automobile Insurance Company +// https://www.iana.org/domains/root/db/statefarm.html +statefarm + +// stc : Saudi Telecom Company +// https://www.iana.org/domains/root/db/stc.html +stc + +// stcgroup : Saudi Telecom Company +// https://www.iana.org/domains/root/db/stcgroup.html +stcgroup + +// stockholm : Stockholms kommun +// https://www.iana.org/domains/root/db/stockholm.html +stockholm + +// storage : XYZ.COM LLC +// https://www.iana.org/domains/root/db/storage.html +storage + +// store : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/store.html +store + +// stream : dot Stream Limited +// https://www.iana.org/domains/root/db/stream.html +stream + +// studio : Dog Beach, LLC +// https://www.iana.org/domains/root/db/studio.html +studio + +// study : Registry Services, LLC +// https://www.iana.org/domains/root/db/study.html +study + +// style : Binky Moon, LLC +// https://www.iana.org/domains/root/db/style.html +style + +// sucks : Vox Populi Registry Ltd. +// https://www.iana.org/domains/root/db/sucks.html +sucks + +// supplies : Binky Moon, LLC +// https://www.iana.org/domains/root/db/supplies.html +supplies + +// supply : Binky Moon, LLC +// https://www.iana.org/domains/root/db/supply.html +supply + +// support : Binky Moon, LLC +// https://www.iana.org/domains/root/db/support.html +support + +// surf : Registry Services, LLC +// https://www.iana.org/domains/root/db/surf.html +surf + +// surgery : Binky Moon, LLC +// https://www.iana.org/domains/root/db/surgery.html +surgery + +// suzuki : SUZUKI MOTOR CORPORATION +// https://www.iana.org/domains/root/db/suzuki.html +suzuki + +// swatch : The Swatch Group Ltd +// https://www.iana.org/domains/root/db/swatch.html +swatch + +// swiss : Swiss Confederation +// https://www.iana.org/domains/root/db/swiss.html +swiss + +// sydney : State of New South Wales, Department of Premier and Cabinet +// https://www.iana.org/domains/root/db/sydney.html +sydney + +// systems : Binky Moon, LLC +// https://www.iana.org/domains/root/db/systems.html +systems + +// tab : Tabcorp Holdings Limited +// https://www.iana.org/domains/root/db/tab.html +tab + +// taipei : Taipei City Government +// https://www.iana.org/domains/root/db/taipei.html +taipei + +// talk : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/talk.html +talk + +// taobao : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/taobao.html +taobao + +// target : Target Domain Holdings, LLC +// https://www.iana.org/domains/root/db/target.html +target + +// tatamotors : Tata Motors Ltd +// https://www.iana.org/domains/root/db/tatamotors.html +tatamotors + +// tatar : Limited Liability Company "Coordination Center of Regional Domain of Tatarstan Republic" +// https://www.iana.org/domains/root/db/tatar.html +tatar + +// tattoo : Registry Services, LLC +// https://www.iana.org/domains/root/db/tattoo.html +tattoo + +// tax : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tax.html +tax + +// taxi : Binky Moon, LLC +// https://www.iana.org/domains/root/db/taxi.html +taxi + +// tci +// https://www.iana.org/domains/root/db/tci.html +tci + +// tdk : TDK Corporation +// https://www.iana.org/domains/root/db/tdk.html +tdk + +// team : Binky Moon, LLC +// https://www.iana.org/domains/root/db/team.html +team + +// tech : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/tech.html +tech + +// technology : Binky Moon, LLC +// https://www.iana.org/domains/root/db/technology.html +technology + +// temasek : Temasek Holdings (Private) Limited +// https://www.iana.org/domains/root/db/temasek.html +temasek + +// tennis : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tennis.html +tennis + +// teva : Teva Pharmaceutical Industries Limited +// https://www.iana.org/domains/root/db/teva.html +teva + +// thd : Home Depot Product Authority, LLC +// https://www.iana.org/domains/root/db/thd.html +thd + +// theater : Binky Moon, LLC +// https://www.iana.org/domains/root/db/theater.html +theater + +// theatre : XYZ.COM LLC +// https://www.iana.org/domains/root/db/theatre.html +theatre + +// tiaa : Teachers Insurance and Annuity Association of America +// https://www.iana.org/domains/root/db/tiaa.html +tiaa + +// tickets : XYZ.COM LLC +// https://www.iana.org/domains/root/db/tickets.html +tickets + +// tienda : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tienda.html +tienda + +// tips : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tips.html +tips + +// tires : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tires.html +tires + +// tirol : punkt Tirol GmbH +// https://www.iana.org/domains/root/db/tirol.html +tirol + +// tjmaxx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tjmaxx.html +tjmaxx + +// tjx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tjx.html +tjx + +// tkmaxx : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/tkmaxx.html +tkmaxx + +// tmall : Alibaba Group Holding Limited +// https://www.iana.org/domains/root/db/tmall.html +tmall + +// today : Binky Moon, LLC +// https://www.iana.org/domains/root/db/today.html +today + +// tokyo : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/tokyo.html +tokyo + +// tools : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tools.html +tools + +// top : .TOP Registry +// https://www.iana.org/domains/root/db/top.html +top + +// toray : Toray Industries, Inc. +// https://www.iana.org/domains/root/db/toray.html +toray + +// toshiba : TOSHIBA Corporation +// https://www.iana.org/domains/root/db/toshiba.html +toshiba + +// total : TotalEnergies SE +// https://www.iana.org/domains/root/db/total.html +total + +// tours : Binky Moon, LLC +// https://www.iana.org/domains/root/db/tours.html +tours + +// town : Binky Moon, LLC +// https://www.iana.org/domains/root/db/town.html +town + +// toyota : TOYOTA MOTOR CORPORATION +// https://www.iana.org/domains/root/db/toyota.html +toyota + +// toys : Binky Moon, LLC +// https://www.iana.org/domains/root/db/toys.html +toys + +// trade : Elite Registry Limited +// https://www.iana.org/domains/root/db/trade.html +trade + +// trading : Dog Beach, LLC +// https://www.iana.org/domains/root/db/trading.html +trading + +// training : Binky Moon, LLC +// https://www.iana.org/domains/root/db/training.html +training + +// travel : Dog Beach, LLC +// https://www.iana.org/domains/root/db/travel.html +travel + +// travelers : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/travelers.html +travelers + +// travelersinsurance : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/travelersinsurance.html +travelersinsurance + +// trust : Internet Naming Company LLC +// https://www.iana.org/domains/root/db/trust.html +trust + +// trv : Travelers TLD, LLC +// https://www.iana.org/domains/root/db/trv.html +trv + +// tube : Latin American Telecom LLC +// https://www.iana.org/domains/root/db/tube.html +tube + +// tui : TUI AG +// https://www.iana.org/domains/root/db/tui.html +tui + +// tunes : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/tunes.html +tunes + +// tushu : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/tushu.html +tushu + +// tvs : T V SUNDRAM IYENGAR & SONS LIMITED +// https://www.iana.org/domains/root/db/tvs.html +tvs + +// ubank : National Australia Bank Limited +// https://www.iana.org/domains/root/db/ubank.html +ubank + +// ubs : UBS AG +// https://www.iana.org/domains/root/db/ubs.html +ubs + +// unicom : China United Network Communications Corporation Limited +// https://www.iana.org/domains/root/db/unicom.html +unicom + +// university : Binky Moon, LLC +// https://www.iana.org/domains/root/db/university.html +university + +// uno : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/uno.html +uno + +// uol : UBN INTERNET LTDA. +// https://www.iana.org/domains/root/db/uol.html +uol + +// ups : UPS Market Driver, Inc. +// https://www.iana.org/domains/root/db/ups.html +ups + +// vacations : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vacations.html +vacations + +// vana : D3 Registry LLC +// https://www.iana.org/domains/root/db/vana.html +vana + +// vanguard : The Vanguard Group, Inc. +// https://www.iana.org/domains/root/db/vanguard.html +vanguard + +// vegas : Dot Vegas, Inc. +// https://www.iana.org/domains/root/db/vegas.html +vegas + +// ventures : Binky Moon, LLC +// https://www.iana.org/domains/root/db/ventures.html +ventures + +// verisign : VeriSign, Inc. +// https://www.iana.org/domains/root/db/verisign.html +verisign + +// versicherung : tldbox GmbH +// https://www.iana.org/domains/root/db/versicherung.html +versicherung + +// vet : Dog Beach, LLC +// https://www.iana.org/domains/root/db/vet.html +vet + +// viajes : Binky Moon, LLC +// https://www.iana.org/domains/root/db/viajes.html +viajes + +// video : Dog Beach, LLC +// https://www.iana.org/domains/root/db/video.html +video + +// vig : VIENNA INSURANCE GROUP AG Wiener Versicherung Gruppe +// https://www.iana.org/domains/root/db/vig.html +vig + +// viking : Viking River Cruises (Bermuda) Ltd. +// https://www.iana.org/domains/root/db/viking.html +viking + +// villas : Binky Moon, LLC +// https://www.iana.org/domains/root/db/villas.html +villas + +// vin : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vin.html +vin + +// vip : Registry Services, LLC +// https://www.iana.org/domains/root/db/vip.html +vip + +// virgin : Virgin Enterprises Limited +// https://www.iana.org/domains/root/db/virgin.html +virgin + +// visa : Visa Worldwide Pte. Limited +// https://www.iana.org/domains/root/db/visa.html +visa + +// vision : Binky Moon, LLC +// https://www.iana.org/domains/root/db/vision.html +vision + +// viva : Saudi Telecom Company +// https://www.iana.org/domains/root/db/viva.html +viva + +// vivo : Telefonica Brasil S.A. +// https://www.iana.org/domains/root/db/vivo.html +vivo + +// vlaanderen : DNS.be vzw +// https://www.iana.org/domains/root/db/vlaanderen.html +vlaanderen + +// vodka : Registry Services, LLC +// https://www.iana.org/domains/root/db/vodka.html +vodka + +// volvo : Volvo Holding Sverige Aktiebolag +// https://www.iana.org/domains/root/db/volvo.html +volvo + +// vote : Monolith Registry LLC +// https://www.iana.org/domains/root/db/vote.html +vote + +// voting : Valuetainment Corp. +// https://www.iana.org/domains/root/db/voting.html +voting + +// voto : Monolith Registry LLC +// https://www.iana.org/domains/root/db/voto.html +voto + +// voyage : Binky Moon, LLC +// https://www.iana.org/domains/root/db/voyage.html +voyage + +// wales : Nominet UK +// https://www.iana.org/domains/root/db/wales.html +wales + +// walmart : Wal-Mart Stores, Inc. +// https://www.iana.org/domains/root/db/walmart.html +walmart + +// walter : Sandvik AB +// https://www.iana.org/domains/root/db/walter.html +walter + +// wang : Zodiac Wang Limited +// https://www.iana.org/domains/root/db/wang.html +wang + +// wanggou : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/wanggou.html +wanggou + +// watch : Binky Moon, LLC +// https://www.iana.org/domains/root/db/watch.html +watch + +// watches : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/watches.html +watches + +// weather : International Business Machines Corporation +// https://www.iana.org/domains/root/db/weather.html +weather + +// weatherchannel : International Business Machines Corporation +// https://www.iana.org/domains/root/db/weatherchannel.html +weatherchannel + +// webcam : dot Webcam Limited +// https://www.iana.org/domains/root/db/webcam.html +webcam + +// weber : Saint-Gobain Weber SA +// https://www.iana.org/domains/root/db/weber.html +weber + +// website : Radix Technologies Inc SEZC +// https://www.iana.org/domains/root/db/website.html +website + +// wed +// https://www.iana.org/domains/root/db/wed.html +wed + +// wedding : Registry Services, LLC +// https://www.iana.org/domains/root/db/wedding.html +wedding + +// weibo : Sina Corporation +// https://www.iana.org/domains/root/db/weibo.html +weibo + +// weir : Weir Group IP Limited +// https://www.iana.org/domains/root/db/weir.html +weir + +// whoswho : Who's Who Registry +// https://www.iana.org/domains/root/db/whoswho.html +whoswho + +// wien : punkt.wien GmbH +// https://www.iana.org/domains/root/db/wien.html +wien + +// wiki : Registry Services, LLC +// https://www.iana.org/domains/root/db/wiki.html +wiki + +// williamhill : William Hill Organization Limited +// https://www.iana.org/domains/root/db/williamhill.html +williamhill + +// win : First Registry Limited +// https://www.iana.org/domains/root/db/win.html +win + +// windows : Microsoft Corporation +// https://www.iana.org/domains/root/db/windows.html +windows + +// wine : Binky Moon, LLC +// https://www.iana.org/domains/root/db/wine.html +wine + +// winners : The TJX Companies, Inc. +// https://www.iana.org/domains/root/db/winners.html +winners + +// wme : William Morris Endeavor Entertainment, LLC +// https://www.iana.org/domains/root/db/wme.html +wme + +// wolterskluwer : Wolters Kluwer N.V. +// https://www.iana.org/domains/root/db/wolterskluwer.html +wolterskluwer + +// woodside : Woodside Petroleum Limited +// https://www.iana.org/domains/root/db/woodside.html +woodside + +// work : Registry Services, LLC +// https://www.iana.org/domains/root/db/work.html +work + +// works : Binky Moon, LLC +// https://www.iana.org/domains/root/db/works.html +works + +// world : Binky Moon, LLC +// https://www.iana.org/domains/root/db/world.html +world + +// wow : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/wow.html +wow + +// wtc : World Trade Centers Association, Inc. +// https://www.iana.org/domains/root/db/wtc.html +wtc + +// wtf : Binky Moon, LLC +// https://www.iana.org/domains/root/db/wtf.html +wtf + +// xbox : Microsoft Corporation +// https://www.iana.org/domains/root/db/xbox.html +xbox + +// xerox : Xerox DNHC LLC +// https://www.iana.org/domains/root/db/xerox.html +xerox + +// xihuan : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/xihuan.html +xihuan + +// xin : Elegant Leader Limited +// https://www.iana.org/domains/root/db/xin.html +xin + +// xn--11b4c3d : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--11b4c3d.html +कॉम + +// xn--1ck2e1b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--1ck2e1b.html +セール + +// xn--1qqw23a : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--1qqw23a.html +佛山 + +// xn--30rr7y : Excellent First Limited +// https://www.iana.org/domains/root/db/xn--30rr7y.html +慈善 + +// xn--3bst00m : Eagle Horizon Limited +// https://www.iana.org/domains/root/db/xn--3bst00m.html +集团 + +// xn--3ds443g : Beijing TLD Registry Technology Limited +// https://www.iana.org/domains/root/db/xn--3ds443g.html +在线 + +// xn--3pxu8k : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--3pxu8k.html +点看 + +// xn--42c2d9a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--42c2d9a.html +คอม + +// xn--45q11c : Zodiac Gemini Ltd +// https://www.iana.org/domains/root/db/xn--45q11c.html +八卦 + +// xn--4gbrim : Helium TLDs Ltd +// https://www.iana.org/domains/root/db/xn--4gbrim.html +موقع + +// xn--55qw42g : China Organizational Name Administration Center +// https://www.iana.org/domains/root/db/xn--55qw42g.html +公益 + +// xn--55qx5d : China Internet Network Information Center (CNNIC) +// https://www.iana.org/domains/root/db/xn--55qx5d.html +公司 + +// xn--5su34j936bgsg : Shangri‐La International Hotel Management Limited +// https://www.iana.org/domains/root/db/xn--5su34j936bgsg.html +香格里拉 + +// xn--5tzm5g : Global Website TLD Asia Limited +// https://www.iana.org/domains/root/db/xn--5tzm5g.html +网站 + +// xn--6frz82g : Identity Digital Domains Limited +// https://www.iana.org/domains/root/db/xn--6frz82g.html +移动 + +// xn--6qq986b3xl : Tycoon Treasure Limited +// https://www.iana.org/domains/root/db/xn--6qq986b3xl.html +我爱你 + +// xn--80adxhks : Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID) +// https://www.iana.org/domains/root/db/xn--80adxhks.html +москва + +// xn--80aqecdr1a : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--80aqecdr1a.html +католик + +// xn--80asehdb : CORE Association +// https://www.iana.org/domains/root/db/xn--80asehdb.html +онлайн + +// xn--80aswg : CORE Association +// https://www.iana.org/domains/root/db/xn--80aswg.html +сайт + +// xn--8y0a063a : China United Network Communications Corporation Limited +// https://www.iana.org/domains/root/db/xn--8y0a063a.html +联通 + +// xn--9dbq2a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--9dbq2a.html +קום + +// xn--9et52u : RISE VICTORY LIMITED +// https://www.iana.org/domains/root/db/xn--9et52u.html +时尚 + +// xn--9krt00a : Sina Corporation +// https://www.iana.org/domains/root/db/xn--9krt00a.html +微博 + +// xn--b4w605ferd : Temasek Holdings (Private) Limited +// https://www.iana.org/domains/root/db/xn--b4w605ferd.html +淡马锡 + +// xn--bck1b9a5dre4c : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--bck1b9a5dre4c.html +ファッション + +// xn--c1avg : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--c1avg.html +орг + +// xn--c2br7g : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--c2br7g.html +नेट + +// xn--cck2b3b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--cck2b3b.html +ストア + +// xn--cckwcxetd : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--cckwcxetd.html +アマゾン + +// xn--cg4bki : SAMSUNG SDS CO., LTD +// https://www.iana.org/domains/root/db/xn--cg4bki.html +삼성 + +// xn--czr694b : Internet DotTrademark Organisation Limited +// https://www.iana.org/domains/root/db/xn--czr694b.html +商标 + +// xn--czrs0t : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--czrs0t.html +商店 + +// xn--czru2d : Zodiac Aquarius Limited +// https://www.iana.org/domains/root/db/xn--czru2d.html +商城 + +// xn--d1acj3b : The Foundation for Network Initiatives “The Smart Internet” +// https://www.iana.org/domains/root/db/xn--d1acj3b.html +дети + +// xn--eckvdtc9d : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--eckvdtc9d.html +ポイント + +// xn--efvy88h : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--efvy88h.html +新闻 + +// xn--fct429k : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--fct429k.html +家電 + +// xn--fhbei : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--fhbei.html +كوم + +// xn--fiq228c5hs : TLD REGISTRY LIMITED OY +// https://www.iana.org/domains/root/db/xn--fiq228c5hs.html +中文网 + +// xn--fiq64b : CITIC Group Corporation +// https://www.iana.org/domains/root/db/xn--fiq64b.html +中信 + +// xn--fjq720a : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--fjq720a.html +娱乐 + +// xn--flw351e : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--flw351e.html +谷歌 + +// xn--fzys8d69uvgm : PCCW Enterprises Limited +// https://www.iana.org/domains/root/db/xn--fzys8d69uvgm.html +電訊盈科 + +// xn--g2xx48c : Nawang Heli(Xiamen) Network Service Co., LTD. +// https://www.iana.org/domains/root/db/xn--g2xx48c.html +购物 + +// xn--gckr3f0f : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--gckr3f0f.html +クラウド + +// xn--gk3at1e : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--gk3at1e.html +通販 + +// xn--hxt814e : Zodiac Taurus Limited +// https://www.iana.org/domains/root/db/xn--hxt814e.html +网店 + +// xn--i1b6b1a6a2e : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--i1b6b1a6a2e.html +संगठन + +// xn--imr513n : Internet DotTrademark Organisation Limited +// https://www.iana.org/domains/root/db/xn--imr513n.html +餐厅 + +// xn--io0a7i : China Internet Network Information Center (CNNIC) +// https://www.iana.org/domains/root/db/xn--io0a7i.html +网络 + +// xn--j1aef : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--j1aef.html +ком + +// xn--jlq480n2rg : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--jlq480n2rg.html +亚马逊 + +// xn--jvr189m : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--jvr189m.html +食品 + +// xn--kcrx77d1x4a : Koninklijke Philips N.V. +// https://www.iana.org/domains/root/db/xn--kcrx77d1x4a.html +飞利浦 + +// xn--kput3i : Beijing RITT-Net Technology Development Co., Ltd +// https://www.iana.org/domains/root/db/xn--kput3i.html +手机 + +// xn--mgba3a3ejt : Aramco Services Company +// https://www.iana.org/domains/root/db/xn--mgba3a3ejt.html +ارامكو + +// xn--mgba7c0bbn0a : Competrol (Luxembourg) Sarl +// https://www.iana.org/domains/root/db/xn--mgba7c0bbn0a.html +العليان + +// xn--mgbab2bd : CORE Association +// https://www.iana.org/domains/root/db/xn--mgbab2bd.html +بازار + +// xn--mgbca7dzdo : Abu Dhabi Systems and Information Centre +// https://www.iana.org/domains/root/db/xn--mgbca7dzdo.html +ابوظبي + +// xn--mgbi4ecexp : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--mgbi4ecexp.html +كاثوليك + +// xn--mgbt3dhd +// https://www.iana.org/domains/root/db/xn--mgbt3dhd.html +همراه + +// xn--mk1bu44c : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--mk1bu44c.html +닷컴 + +// xn--mxtq1m : Net-Chinese Co., Ltd. +// https://www.iana.org/domains/root/db/xn--mxtq1m.html +政府 + +// xn--ngbc5azd : International Domain Registry Pty. Ltd. +// https://www.iana.org/domains/root/db/xn--ngbc5azd.html +شبكة + +// xn--ngbe9e0a : Kuwait Finance House +// https://www.iana.org/domains/root/db/xn--ngbe9e0a.html +بيتك + +// xn--ngbrx : League of Arab States +// https://www.iana.org/domains/root/db/xn--ngbrx.html +عرب + +// xn--nqv7f : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--nqv7f.html +机构 + +// xn--nqv7fs00ema : Public Interest Registry +// https://www.iana.org/domains/root/db/xn--nqv7fs00ema.html +组织机构 + +// xn--nyqy26a : Stable Tone Limited +// https://www.iana.org/domains/root/db/xn--nyqy26a.html +健康 + +// xn--otu796d : Jiang Yu Liang Cai Technology Company Limited +// https://www.iana.org/domains/root/db/xn--otu796d.html +招聘 + +// xn--p1acf : Rusnames Limited +// https://www.iana.org/domains/root/db/xn--p1acf.html +рус + +// xn--pssy2u : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--pssy2u.html +大拿 + +// xn--q9jyb4c : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--q9jyb4c.html +みんな + +// xn--qcka1pmc : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/xn--qcka1pmc.html +グーグル + +// xn--rhqv96g : Stable Tone Limited +// https://www.iana.org/domains/root/db/xn--rhqv96g.html +世界 + +// xn--rovu88b : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/xn--rovu88b.html +書籍 + +// xn--ses554g : KNET Co., Ltd. +// https://www.iana.org/domains/root/db/xn--ses554g.html +网址 + +// xn--t60b56a : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--t60b56a.html +닷넷 + +// xn--tckwe : VeriSign Sarl +// https://www.iana.org/domains/root/db/xn--tckwe.html +コム + +// xn--tiq49xqyj : Pontificium Consilium de Comunicationibus Socialibus (PCCS) (Pontifical Council for Social Communication) +// https://www.iana.org/domains/root/db/xn--tiq49xqyj.html +天主教 + +// xn--unup4y : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--unup4y.html +游戏 + +// xn--vermgensberater-ctb : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/xn--vermgensberater-ctb.html +vermögensberater + +// xn--vermgensberatung-pwb : Deutsche Vermögensberatung Aktiengesellschaft DVAG +// https://www.iana.org/domains/root/db/xn--vermgensberatung-pwb.html +vermögensberatung + +// xn--vhquv : Binky Moon, LLC +// https://www.iana.org/domains/root/db/xn--vhquv.html +企业 + +// xn--vuq861b : Beijing Tele-info Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--vuq861b.html +信息 + +// xn--w4r85el8fhu5dnra : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/xn--w4r85el8fhu5dnra.html +嘉里大酒店 + +// xn--w4rs40l : Kerry Trading Co. Limited +// https://www.iana.org/domains/root/db/xn--w4rs40l.html +嘉里 + +// xn--xhq521b : Guangzhou YU Wei Information Technology Co., Ltd. +// https://www.iana.org/domains/root/db/xn--xhq521b.html +广东 + +// xn--zfr164b : China Organizational Name Administration Center +// https://www.iana.org/domains/root/db/xn--zfr164b.html +政务 + +// xyz : XYZ.COM LLC +// https://www.iana.org/domains/root/db/xyz.html +xyz + +// yachts : XYZ.COM LLC +// https://www.iana.org/domains/root/db/yachts.html +yachts + +// yahoo : Yahoo Inc. +// https://www.iana.org/domains/root/db/yahoo.html +yahoo + +// yamaxun : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/yamaxun.html +yamaxun + +// yandex : YANDEX, LLC +// https://www.iana.org/domains/root/db/yandex.html +yandex + +// yodobashi : YODOBASHI CAMERA CO.,LTD. +// https://www.iana.org/domains/root/db/yodobashi.html +yodobashi + +// yoga : Registry Services, LLC +// https://www.iana.org/domains/root/db/yoga.html +yoga + +// yokohama : GMO Registry, Inc. +// https://www.iana.org/domains/root/db/yokohama.html +yokohama + +// you : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/you.html +you + +// youtube : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/youtube.html +youtube + +// yun : Beijing Qihu Keji Co., Ltd. +// https://www.iana.org/domains/root/db/yun.html +yun + +// zappos : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/zappos.html +zappos + +// zara : Industria de Diseño Textil, S.A. (INDITEX, S.A.) +// https://www.iana.org/domains/root/db/zara.html +zara + +// zero : Amazon Registry Services, Inc. +// https://www.iana.org/domains/root/db/zero.html +zero + +// zip : Charleston Road Registry Inc. +// https://www.iana.org/domains/root/db/zip.html +zip + +// zone : Binky Moon, LLC +// https://www.iana.org/domains/root/db/zone.html +zone + +// zuerich : Kanton Zürich (Canton of Zurich) +// https://www.iana.org/domains/root/db/zuerich.html +zuerich + +// ===END ICANN DOMAINS=== + +// ===BEGIN PRIVATE DOMAINS=== + +// (Note: these are in alphabetical order by company name) + +// .KRD : https://nic.krd +co.krd +edu.krd + +// .pl domains (grandfathered) +art.pl +gliwice.pl +krakow.pl +poznan.pl +wroc.pl +zakopane.pl + +// .US +// Submitted by Ed Moore +lib.de.us + +// 12CHARS : https://12chars.com +// Submitted by Kenny Niehage +12chars.dev +12chars.it +12chars.pro + +// 1GB LLC : https://www.1gb.ua/ +// Submitted by 1GB LLC +cc.ua +inf.ua +ltd.ua + +// 611 blockchain domain name system : https://611project.net/ +611.to + +// A2 Hosting +// Submitted by Tyler Hall +a2hosted.com +cpserver.com + +// Acorn Labs : https://acorn.io +// Submitted by Craig Jellick +*.on-acorn.io + +// ActiveTrail : https://www.activetrail.biz/ +// Submitted by Ofer Kalaora +activetrail.biz + +// Adaptable.io : https://adaptable.io +// Submitted by Mark Terrel +adaptable.app + +// addr.tools : https://addr.tools/ +// Submitted by Brian Shea +myaddr.dev +myaddr.io +dyn.addr.tools +myaddr.tools + +// Adobe : https://www.adobe.com/ +// Submitted by Ian Boston and Lars Trieloff +adobeaemcloud.com +*.dev.adobeaemcloud.com +aem.live +hlx.live +adobeaemcloud.net +aem.network +aem.page +hlx.page +aem.reviews + +// Adobe Developer Platform : https://developer.adobe.com +// Submitted by Jesse MacFadyen +adobeio-static.net +adobeioruntime.net + +// Africa.com Web Solutions Ltd : https://registry.africa.com +// Submitted by Gavin Brown +africa.com + +// Agnat sp. z o.o. : https://domena.pl +// Submitted by Przemyslaw Plewa +beep.pl + +// Airkit : https://www.airkit.com/ +// Submitted by Grant Cooksey +airkitapps.com +airkitapps-au.com +airkitapps.eu + +// Aiven : https://aiven.io/ +// Submitted by Aiven Security Team +aiven.app +aivencloud.com + +// Akamai : https://www.akamai.com/ +// Submitted by Akamai Team +akadns.net +akamai.net +akamai-staging.net +akamaiedge.net +akamaiedge-staging.net +akamaihd.net +akamaihd-staging.net +akamaiorigin.net +akamaiorigin-staging.net +akamaized.net +akamaized-staging.net +edgekey.net +edgekey-staging.net +edgesuite.net +edgesuite-staging.net + +// alboto.ca : http://alboto.ca +// Submitted by Anton Avramov +barsy.ca + +// Alces Software Ltd : http://alces-software.com +// Submitted by Mark J. Titorenko +*.compute.estate +*.alces.network + +// Alibaba Cloud API Gateway +// Submitted by Alibaba Cloud Security +alibabacloudcs.com + +// all-inkl.com : https://all-inkl.com +// Submitted by Werner Kaltofen +kasserver.com + +// Altervista : https://www.altervista.org +// Submitted by Carlo Cannas +altervista.org + +// alwaysdata : https://www.alwaysdata.com +// Submitted by Cyril +alwaysdata.net + +// Amaze Software : https://amaze.co +// Submitted by Domain Admin +myamaze.net + +// Amazon : https://www.amazon.com/ +// Submitted by AWS Security +// Subsections of Amazon/subsidiaries will appear until "concludes" tag + +// Amazon API Gateway +// Submitted by AWS Security +// Reference: 6a4f5a95-8c7d-4077-a7af-9cf1abec0a53 +execute-api.cn-north-1.amazonaws.com.cn +execute-api.cn-northwest-1.amazonaws.com.cn +execute-api.af-south-1.amazonaws.com +execute-api.ap-east-1.amazonaws.com +execute-api.ap-northeast-1.amazonaws.com +execute-api.ap-northeast-2.amazonaws.com +execute-api.ap-northeast-3.amazonaws.com +execute-api.ap-south-1.amazonaws.com +execute-api.ap-south-2.amazonaws.com +execute-api.ap-southeast-1.amazonaws.com +execute-api.ap-southeast-2.amazonaws.com +execute-api.ap-southeast-3.amazonaws.com +execute-api.ap-southeast-4.amazonaws.com +execute-api.ap-southeast-5.amazonaws.com +execute-api.ca-central-1.amazonaws.com +execute-api.ca-west-1.amazonaws.com +execute-api.eu-central-1.amazonaws.com +execute-api.eu-central-2.amazonaws.com +execute-api.eu-north-1.amazonaws.com +execute-api.eu-south-1.amazonaws.com +execute-api.eu-south-2.amazonaws.com +execute-api.eu-west-1.amazonaws.com +execute-api.eu-west-2.amazonaws.com +execute-api.eu-west-3.amazonaws.com +execute-api.il-central-1.amazonaws.com +execute-api.me-central-1.amazonaws.com +execute-api.me-south-1.amazonaws.com +execute-api.sa-east-1.amazonaws.com +execute-api.us-east-1.amazonaws.com +execute-api.us-east-2.amazonaws.com +execute-api.us-gov-east-1.amazonaws.com +execute-api.us-gov-west-1.amazonaws.com +execute-api.us-west-1.amazonaws.com +execute-api.us-west-2.amazonaws.com + +// Amazon CloudFront +// Submitted by Donavan Miller +// Reference: 54144616-fd49-4435-8535-19c6a601bdb3 +cloudfront.net + +// Amazon Cognito +// Submitted by AWS Security +// Reference: eb4652f0-20f0-43f5-b323-e6cc6ae02ad7 +auth.af-south-1.amazoncognito.com +auth.ap-east-1.amazoncognito.com +auth.ap-northeast-1.amazoncognito.com +auth.ap-northeast-2.amazoncognito.com +auth.ap-northeast-3.amazoncognito.com +auth.ap-south-1.amazoncognito.com +auth.ap-south-2.amazoncognito.com +auth.ap-southeast-1.amazoncognito.com +auth.ap-southeast-2.amazoncognito.com +auth.ap-southeast-3.amazoncognito.com +auth.ap-southeast-4.amazoncognito.com +auth.ap-southeast-5.amazoncognito.com +auth.ca-central-1.amazoncognito.com +auth.ca-west-1.amazoncognito.com +auth.eu-central-1.amazoncognito.com +auth.eu-central-2.amazoncognito.com +auth.eu-north-1.amazoncognito.com +auth.eu-south-1.amazoncognito.com +auth.eu-south-2.amazoncognito.com +auth.eu-west-1.amazoncognito.com +auth.eu-west-2.amazoncognito.com +auth.eu-west-3.amazoncognito.com +auth.il-central-1.amazoncognito.com +auth.me-central-1.amazoncognito.com +auth.me-south-1.amazoncognito.com +auth.sa-east-1.amazoncognito.com +auth.us-east-1.amazoncognito.com +auth-fips.us-east-1.amazoncognito.com +auth.us-east-2.amazoncognito.com +auth-fips.us-east-2.amazoncognito.com +auth-fips.us-gov-east-1.amazoncognito.com +auth-fips.us-gov-west-1.amazoncognito.com +auth.us-west-1.amazoncognito.com +auth-fips.us-west-1.amazoncognito.com +auth.us-west-2.amazoncognito.com +auth-fips.us-west-2.amazoncognito.com + +// Amazon EC2 +// Submitted by Luke Wells +// Reference: 4c38fa71-58ac-4768-99e5-689c1767e537 +*.compute.amazonaws.com.cn +*.compute.amazonaws.com +*.compute-1.amazonaws.com +us-east-1.amazonaws.com + +// Amazon EMR +// Submitted by AWS Security +// Reference: 82f43f9f-bbb8-400e-8349-854f5a62f20d +emrappui-prod.cn-north-1.amazonaws.com.cn +emrnotebooks-prod.cn-north-1.amazonaws.com.cn +emrstudio-prod.cn-north-1.amazonaws.com.cn +emrappui-prod.cn-northwest-1.amazonaws.com.cn +emrnotebooks-prod.cn-northwest-1.amazonaws.com.cn +emrstudio-prod.cn-northwest-1.amazonaws.com.cn +emrappui-prod.af-south-1.amazonaws.com +emrnotebooks-prod.af-south-1.amazonaws.com +emrstudio-prod.af-south-1.amazonaws.com +emrappui-prod.ap-east-1.amazonaws.com +emrnotebooks-prod.ap-east-1.amazonaws.com +emrstudio-prod.ap-east-1.amazonaws.com +emrappui-prod.ap-northeast-1.amazonaws.com +emrnotebooks-prod.ap-northeast-1.amazonaws.com +emrstudio-prod.ap-northeast-1.amazonaws.com +emrappui-prod.ap-northeast-2.amazonaws.com +emrnotebooks-prod.ap-northeast-2.amazonaws.com +emrstudio-prod.ap-northeast-2.amazonaws.com +emrappui-prod.ap-northeast-3.amazonaws.com +emrnotebooks-prod.ap-northeast-3.amazonaws.com +emrstudio-prod.ap-northeast-3.amazonaws.com +emrappui-prod.ap-south-1.amazonaws.com +emrnotebooks-prod.ap-south-1.amazonaws.com +emrstudio-prod.ap-south-1.amazonaws.com +emrappui-prod.ap-south-2.amazonaws.com +emrnotebooks-prod.ap-south-2.amazonaws.com +emrstudio-prod.ap-south-2.amazonaws.com +emrappui-prod.ap-southeast-1.amazonaws.com +emrnotebooks-prod.ap-southeast-1.amazonaws.com +emrstudio-prod.ap-southeast-1.amazonaws.com +emrappui-prod.ap-southeast-2.amazonaws.com +emrnotebooks-prod.ap-southeast-2.amazonaws.com +emrstudio-prod.ap-southeast-2.amazonaws.com +emrappui-prod.ap-southeast-3.amazonaws.com +emrnotebooks-prod.ap-southeast-3.amazonaws.com +emrstudio-prod.ap-southeast-3.amazonaws.com +emrappui-prod.ap-southeast-4.amazonaws.com +emrnotebooks-prod.ap-southeast-4.amazonaws.com +emrstudio-prod.ap-southeast-4.amazonaws.com +emrappui-prod.ca-central-1.amazonaws.com +emrnotebooks-prod.ca-central-1.amazonaws.com +emrstudio-prod.ca-central-1.amazonaws.com +emrappui-prod.ca-west-1.amazonaws.com +emrnotebooks-prod.ca-west-1.amazonaws.com +emrstudio-prod.ca-west-1.amazonaws.com +emrappui-prod.eu-central-1.amazonaws.com +emrnotebooks-prod.eu-central-1.amazonaws.com +emrstudio-prod.eu-central-1.amazonaws.com +emrappui-prod.eu-central-2.amazonaws.com +emrnotebooks-prod.eu-central-2.amazonaws.com +emrstudio-prod.eu-central-2.amazonaws.com +emrappui-prod.eu-north-1.amazonaws.com +emrnotebooks-prod.eu-north-1.amazonaws.com +emrstudio-prod.eu-north-1.amazonaws.com +emrappui-prod.eu-south-1.amazonaws.com +emrnotebooks-prod.eu-south-1.amazonaws.com +emrstudio-prod.eu-south-1.amazonaws.com +emrappui-prod.eu-south-2.amazonaws.com +emrnotebooks-prod.eu-south-2.amazonaws.com +emrstudio-prod.eu-south-2.amazonaws.com +emrappui-prod.eu-west-1.amazonaws.com +emrnotebooks-prod.eu-west-1.amazonaws.com +emrstudio-prod.eu-west-1.amazonaws.com +emrappui-prod.eu-west-2.amazonaws.com +emrnotebooks-prod.eu-west-2.amazonaws.com +emrstudio-prod.eu-west-2.amazonaws.com +emrappui-prod.eu-west-3.amazonaws.com +emrnotebooks-prod.eu-west-3.amazonaws.com +emrstudio-prod.eu-west-3.amazonaws.com +emrappui-prod.il-central-1.amazonaws.com +emrnotebooks-prod.il-central-1.amazonaws.com +emrstudio-prod.il-central-1.amazonaws.com +emrappui-prod.me-central-1.amazonaws.com +emrnotebooks-prod.me-central-1.amazonaws.com +emrstudio-prod.me-central-1.amazonaws.com +emrappui-prod.me-south-1.amazonaws.com +emrnotebooks-prod.me-south-1.amazonaws.com +emrstudio-prod.me-south-1.amazonaws.com +emrappui-prod.sa-east-1.amazonaws.com +emrnotebooks-prod.sa-east-1.amazonaws.com +emrstudio-prod.sa-east-1.amazonaws.com +emrappui-prod.us-east-1.amazonaws.com +emrnotebooks-prod.us-east-1.amazonaws.com +emrstudio-prod.us-east-1.amazonaws.com +emrappui-prod.us-east-2.amazonaws.com +emrnotebooks-prod.us-east-2.amazonaws.com +emrstudio-prod.us-east-2.amazonaws.com +emrappui-prod.us-gov-east-1.amazonaws.com +emrnotebooks-prod.us-gov-east-1.amazonaws.com +emrstudio-prod.us-gov-east-1.amazonaws.com +emrappui-prod.us-gov-west-1.amazonaws.com +emrnotebooks-prod.us-gov-west-1.amazonaws.com +emrstudio-prod.us-gov-west-1.amazonaws.com +emrappui-prod.us-west-1.amazonaws.com +emrnotebooks-prod.us-west-1.amazonaws.com +emrstudio-prod.us-west-1.amazonaws.com +emrappui-prod.us-west-2.amazonaws.com +emrnotebooks-prod.us-west-2.amazonaws.com +emrstudio-prod.us-west-2.amazonaws.com + +// Amazon Managed Workflows for Apache Airflow +// Submitted by AWS Security +// Reference: f5ea5d0a-ec6a-4f23-ac1c-553fbff13f5c +*.cn-north-1.airflow.amazonaws.com.cn +*.cn-northwest-1.airflow.amazonaws.com.cn +*.af-south-1.airflow.amazonaws.com +*.ap-east-1.airflow.amazonaws.com +*.ap-northeast-1.airflow.amazonaws.com +*.ap-northeast-2.airflow.amazonaws.com +*.ap-northeast-3.airflow.amazonaws.com +*.ap-south-1.airflow.amazonaws.com +*.ap-south-2.airflow.amazonaws.com +*.ap-southeast-1.airflow.amazonaws.com +*.ap-southeast-2.airflow.amazonaws.com +*.ap-southeast-3.airflow.amazonaws.com +*.ap-southeast-4.airflow.amazonaws.com +*.ca-central-1.airflow.amazonaws.com +*.ca-west-1.airflow.amazonaws.com +*.eu-central-1.airflow.amazonaws.com +*.eu-central-2.airflow.amazonaws.com +*.eu-north-1.airflow.amazonaws.com +*.eu-south-1.airflow.amazonaws.com +*.eu-south-2.airflow.amazonaws.com +*.eu-west-1.airflow.amazonaws.com +*.eu-west-2.airflow.amazonaws.com +*.eu-west-3.airflow.amazonaws.com +*.il-central-1.airflow.amazonaws.com +*.me-central-1.airflow.amazonaws.com +*.me-south-1.airflow.amazonaws.com +*.sa-east-1.airflow.amazonaws.com +*.us-east-1.airflow.amazonaws.com +*.us-east-2.airflow.amazonaws.com +*.us-west-1.airflow.amazonaws.com +*.us-west-2.airflow.amazonaws.com + +// Amazon S3 +// Submitted by AWS Security +// Reference: ada5c9df-55e1-4195-a1ce-732d6c81e357 +s3.dualstack.cn-north-1.amazonaws.com.cn +s3-accesspoint.dualstack.cn-north-1.amazonaws.com.cn +s3-website.dualstack.cn-north-1.amazonaws.com.cn +s3.cn-north-1.amazonaws.com.cn +s3-accesspoint.cn-north-1.amazonaws.com.cn +s3-deprecated.cn-north-1.amazonaws.com.cn +s3-object-lambda.cn-north-1.amazonaws.com.cn +s3-website.cn-north-1.amazonaws.com.cn +s3.dualstack.cn-northwest-1.amazonaws.com.cn +s3-accesspoint.dualstack.cn-northwest-1.amazonaws.com.cn +s3.cn-northwest-1.amazonaws.com.cn +s3-accesspoint.cn-northwest-1.amazonaws.com.cn +s3-object-lambda.cn-northwest-1.amazonaws.com.cn +s3-website.cn-northwest-1.amazonaws.com.cn +s3.dualstack.af-south-1.amazonaws.com +s3-accesspoint.dualstack.af-south-1.amazonaws.com +s3-website.dualstack.af-south-1.amazonaws.com +s3.af-south-1.amazonaws.com +s3-accesspoint.af-south-1.amazonaws.com +s3-object-lambda.af-south-1.amazonaws.com +s3-website.af-south-1.amazonaws.com +s3.dualstack.ap-east-1.amazonaws.com +s3-accesspoint.dualstack.ap-east-1.amazonaws.com +s3.ap-east-1.amazonaws.com +s3-accesspoint.ap-east-1.amazonaws.com +s3-object-lambda.ap-east-1.amazonaws.com +s3-website.ap-east-1.amazonaws.com +s3.dualstack.ap-northeast-1.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-1.amazonaws.com +s3-website.dualstack.ap-northeast-1.amazonaws.com +s3.ap-northeast-1.amazonaws.com +s3-accesspoint.ap-northeast-1.amazonaws.com +s3-object-lambda.ap-northeast-1.amazonaws.com +s3-website.ap-northeast-1.amazonaws.com +s3.dualstack.ap-northeast-2.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-2.amazonaws.com +s3-website.dualstack.ap-northeast-2.amazonaws.com +s3.ap-northeast-2.amazonaws.com +s3-accesspoint.ap-northeast-2.amazonaws.com +s3-object-lambda.ap-northeast-2.amazonaws.com +s3-website.ap-northeast-2.amazonaws.com +s3.dualstack.ap-northeast-3.amazonaws.com +s3-accesspoint.dualstack.ap-northeast-3.amazonaws.com +s3-website.dualstack.ap-northeast-3.amazonaws.com +s3.ap-northeast-3.amazonaws.com +s3-accesspoint.ap-northeast-3.amazonaws.com +s3-object-lambda.ap-northeast-3.amazonaws.com +s3-website.ap-northeast-3.amazonaws.com +s3.dualstack.ap-south-1.amazonaws.com +s3-accesspoint.dualstack.ap-south-1.amazonaws.com +s3-website.dualstack.ap-south-1.amazonaws.com +s3.ap-south-1.amazonaws.com +s3-accesspoint.ap-south-1.amazonaws.com +s3-object-lambda.ap-south-1.amazonaws.com +s3-website.ap-south-1.amazonaws.com +s3.dualstack.ap-south-2.amazonaws.com +s3-accesspoint.dualstack.ap-south-2.amazonaws.com +s3-website.dualstack.ap-south-2.amazonaws.com +s3.ap-south-2.amazonaws.com +s3-accesspoint.ap-south-2.amazonaws.com +s3-object-lambda.ap-south-2.amazonaws.com +s3-website.ap-south-2.amazonaws.com +s3.dualstack.ap-southeast-1.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-1.amazonaws.com +s3-website.dualstack.ap-southeast-1.amazonaws.com +s3.ap-southeast-1.amazonaws.com +s3-accesspoint.ap-southeast-1.amazonaws.com +s3-object-lambda.ap-southeast-1.amazonaws.com +s3-website.ap-southeast-1.amazonaws.com +s3.dualstack.ap-southeast-2.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-2.amazonaws.com +s3-website.dualstack.ap-southeast-2.amazonaws.com +s3.ap-southeast-2.amazonaws.com +s3-accesspoint.ap-southeast-2.amazonaws.com +s3-object-lambda.ap-southeast-2.amazonaws.com +s3-website.ap-southeast-2.amazonaws.com +s3.dualstack.ap-southeast-3.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-3.amazonaws.com +s3-website.dualstack.ap-southeast-3.amazonaws.com +s3.ap-southeast-3.amazonaws.com +s3-accesspoint.ap-southeast-3.amazonaws.com +s3-object-lambda.ap-southeast-3.amazonaws.com +s3-website.ap-southeast-3.amazonaws.com +s3.dualstack.ap-southeast-4.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-4.amazonaws.com +s3-website.dualstack.ap-southeast-4.amazonaws.com +s3.ap-southeast-4.amazonaws.com +s3-accesspoint.ap-southeast-4.amazonaws.com +s3-object-lambda.ap-southeast-4.amazonaws.com +s3-website.ap-southeast-4.amazonaws.com +s3.dualstack.ap-southeast-5.amazonaws.com +s3-accesspoint.dualstack.ap-southeast-5.amazonaws.com +s3-website.dualstack.ap-southeast-5.amazonaws.com +s3.ap-southeast-5.amazonaws.com +s3-accesspoint.ap-southeast-5.amazonaws.com +s3-deprecated.ap-southeast-5.amazonaws.com +s3-object-lambda.ap-southeast-5.amazonaws.com +s3-website.ap-southeast-5.amazonaws.com +s3.dualstack.ca-central-1.amazonaws.com +s3-accesspoint.dualstack.ca-central-1.amazonaws.com +s3-accesspoint-fips.dualstack.ca-central-1.amazonaws.com +s3-fips.dualstack.ca-central-1.amazonaws.com +s3-website.dualstack.ca-central-1.amazonaws.com +s3.ca-central-1.amazonaws.com +s3-accesspoint.ca-central-1.amazonaws.com +s3-accesspoint-fips.ca-central-1.amazonaws.com +s3-fips.ca-central-1.amazonaws.com +s3-object-lambda.ca-central-1.amazonaws.com +s3-website.ca-central-1.amazonaws.com +s3.dualstack.ca-west-1.amazonaws.com +s3-accesspoint.dualstack.ca-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.ca-west-1.amazonaws.com +s3-fips.dualstack.ca-west-1.amazonaws.com +s3-website.dualstack.ca-west-1.amazonaws.com +s3.ca-west-1.amazonaws.com +s3-accesspoint.ca-west-1.amazonaws.com +s3-accesspoint-fips.ca-west-1.amazonaws.com +s3-fips.ca-west-1.amazonaws.com +s3-object-lambda.ca-west-1.amazonaws.com +s3-website.ca-west-1.amazonaws.com +s3.dualstack.eu-central-1.amazonaws.com +s3-accesspoint.dualstack.eu-central-1.amazonaws.com +s3-website.dualstack.eu-central-1.amazonaws.com +s3.eu-central-1.amazonaws.com +s3-accesspoint.eu-central-1.amazonaws.com +s3-object-lambda.eu-central-1.amazonaws.com +s3-website.eu-central-1.amazonaws.com +s3.dualstack.eu-central-2.amazonaws.com +s3-accesspoint.dualstack.eu-central-2.amazonaws.com +s3-website.dualstack.eu-central-2.amazonaws.com +s3.eu-central-2.amazonaws.com +s3-accesspoint.eu-central-2.amazonaws.com +s3-object-lambda.eu-central-2.amazonaws.com +s3-website.eu-central-2.amazonaws.com +s3.dualstack.eu-north-1.amazonaws.com +s3-accesspoint.dualstack.eu-north-1.amazonaws.com +s3.eu-north-1.amazonaws.com +s3-accesspoint.eu-north-1.amazonaws.com +s3-object-lambda.eu-north-1.amazonaws.com +s3-website.eu-north-1.amazonaws.com +s3.dualstack.eu-south-1.amazonaws.com +s3-accesspoint.dualstack.eu-south-1.amazonaws.com +s3-website.dualstack.eu-south-1.amazonaws.com +s3.eu-south-1.amazonaws.com +s3-accesspoint.eu-south-1.amazonaws.com +s3-object-lambda.eu-south-1.amazonaws.com +s3-website.eu-south-1.amazonaws.com +s3.dualstack.eu-south-2.amazonaws.com +s3-accesspoint.dualstack.eu-south-2.amazonaws.com +s3-website.dualstack.eu-south-2.amazonaws.com +s3.eu-south-2.amazonaws.com +s3-accesspoint.eu-south-2.amazonaws.com +s3-object-lambda.eu-south-2.amazonaws.com +s3-website.eu-south-2.amazonaws.com +s3.dualstack.eu-west-1.amazonaws.com +s3-accesspoint.dualstack.eu-west-1.amazonaws.com +s3-website.dualstack.eu-west-1.amazonaws.com +s3.eu-west-1.amazonaws.com +s3-accesspoint.eu-west-1.amazonaws.com +s3-deprecated.eu-west-1.amazonaws.com +s3-object-lambda.eu-west-1.amazonaws.com +s3-website.eu-west-1.amazonaws.com +s3.dualstack.eu-west-2.amazonaws.com +s3-accesspoint.dualstack.eu-west-2.amazonaws.com +s3.eu-west-2.amazonaws.com +s3-accesspoint.eu-west-2.amazonaws.com +s3-object-lambda.eu-west-2.amazonaws.com +s3-website.eu-west-2.amazonaws.com +s3.dualstack.eu-west-3.amazonaws.com +s3-accesspoint.dualstack.eu-west-3.amazonaws.com +s3-website.dualstack.eu-west-3.amazonaws.com +s3.eu-west-3.amazonaws.com +s3-accesspoint.eu-west-3.amazonaws.com +s3-object-lambda.eu-west-3.amazonaws.com +s3-website.eu-west-3.amazonaws.com +s3.dualstack.il-central-1.amazonaws.com +s3-accesspoint.dualstack.il-central-1.amazonaws.com +s3-website.dualstack.il-central-1.amazonaws.com +s3.il-central-1.amazonaws.com +s3-accesspoint.il-central-1.amazonaws.com +s3-object-lambda.il-central-1.amazonaws.com +s3-website.il-central-1.amazonaws.com +s3.dualstack.me-central-1.amazonaws.com +s3-accesspoint.dualstack.me-central-1.amazonaws.com +s3-website.dualstack.me-central-1.amazonaws.com +s3.me-central-1.amazonaws.com +s3-accesspoint.me-central-1.amazonaws.com +s3-object-lambda.me-central-1.amazonaws.com +s3-website.me-central-1.amazonaws.com +s3.dualstack.me-south-1.amazonaws.com +s3-accesspoint.dualstack.me-south-1.amazonaws.com +s3.me-south-1.amazonaws.com +s3-accesspoint.me-south-1.amazonaws.com +s3-object-lambda.me-south-1.amazonaws.com +s3-website.me-south-1.amazonaws.com +s3.amazonaws.com +s3-1.amazonaws.com +s3-ap-east-1.amazonaws.com +s3-ap-northeast-1.amazonaws.com +s3-ap-northeast-2.amazonaws.com +s3-ap-northeast-3.amazonaws.com +s3-ap-south-1.amazonaws.com +s3-ap-southeast-1.amazonaws.com +s3-ap-southeast-2.amazonaws.com +s3-ca-central-1.amazonaws.com +s3-eu-central-1.amazonaws.com +s3-eu-north-1.amazonaws.com +s3-eu-west-1.amazonaws.com +s3-eu-west-2.amazonaws.com +s3-eu-west-3.amazonaws.com +s3-external-1.amazonaws.com +s3-fips-us-gov-east-1.amazonaws.com +s3-fips-us-gov-west-1.amazonaws.com +mrap.accesspoint.s3-global.amazonaws.com +s3-me-south-1.amazonaws.com +s3-sa-east-1.amazonaws.com +s3-us-east-2.amazonaws.com +s3-us-gov-east-1.amazonaws.com +s3-us-gov-west-1.amazonaws.com +s3-us-west-1.amazonaws.com +s3-us-west-2.amazonaws.com +s3-website-ap-northeast-1.amazonaws.com +s3-website-ap-southeast-1.amazonaws.com +s3-website-ap-southeast-2.amazonaws.com +s3-website-eu-west-1.amazonaws.com +s3-website-sa-east-1.amazonaws.com +s3-website-us-east-1.amazonaws.com +s3-website-us-gov-west-1.amazonaws.com +s3-website-us-west-1.amazonaws.com +s3-website-us-west-2.amazonaws.com +s3.dualstack.sa-east-1.amazonaws.com +s3-accesspoint.dualstack.sa-east-1.amazonaws.com +s3-website.dualstack.sa-east-1.amazonaws.com +s3.sa-east-1.amazonaws.com +s3-accesspoint.sa-east-1.amazonaws.com +s3-object-lambda.sa-east-1.amazonaws.com +s3-website.sa-east-1.amazonaws.com +s3.dualstack.us-east-1.amazonaws.com +s3-accesspoint.dualstack.us-east-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-east-1.amazonaws.com +s3-fips.dualstack.us-east-1.amazonaws.com +s3-website.dualstack.us-east-1.amazonaws.com +s3.us-east-1.amazonaws.com +s3-accesspoint.us-east-1.amazonaws.com +s3-accesspoint-fips.us-east-1.amazonaws.com +s3-deprecated.us-east-1.amazonaws.com +s3-fips.us-east-1.amazonaws.com +s3-object-lambda.us-east-1.amazonaws.com +s3-website.us-east-1.amazonaws.com +s3.dualstack.us-east-2.amazonaws.com +s3-accesspoint.dualstack.us-east-2.amazonaws.com +s3-accesspoint-fips.dualstack.us-east-2.amazonaws.com +s3-fips.dualstack.us-east-2.amazonaws.com +s3-website.dualstack.us-east-2.amazonaws.com +s3.us-east-2.amazonaws.com +s3-accesspoint.us-east-2.amazonaws.com +s3-accesspoint-fips.us-east-2.amazonaws.com +s3-deprecated.us-east-2.amazonaws.com +s3-fips.us-east-2.amazonaws.com +s3-object-lambda.us-east-2.amazonaws.com +s3-website.us-east-2.amazonaws.com +s3.dualstack.us-gov-east-1.amazonaws.com +s3-accesspoint.dualstack.us-gov-east-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-gov-east-1.amazonaws.com +s3-fips.dualstack.us-gov-east-1.amazonaws.com +s3.us-gov-east-1.amazonaws.com +s3-accesspoint.us-gov-east-1.amazonaws.com +s3-accesspoint-fips.us-gov-east-1.amazonaws.com +s3-fips.us-gov-east-1.amazonaws.com +s3-object-lambda.us-gov-east-1.amazonaws.com +s3-website.us-gov-east-1.amazonaws.com +s3.dualstack.us-gov-west-1.amazonaws.com +s3-accesspoint.dualstack.us-gov-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-gov-west-1.amazonaws.com +s3-fips.dualstack.us-gov-west-1.amazonaws.com +s3.us-gov-west-1.amazonaws.com +s3-accesspoint.us-gov-west-1.amazonaws.com +s3-accesspoint-fips.us-gov-west-1.amazonaws.com +s3-fips.us-gov-west-1.amazonaws.com +s3-object-lambda.us-gov-west-1.amazonaws.com +s3-website.us-gov-west-1.amazonaws.com +s3.dualstack.us-west-1.amazonaws.com +s3-accesspoint.dualstack.us-west-1.amazonaws.com +s3-accesspoint-fips.dualstack.us-west-1.amazonaws.com +s3-fips.dualstack.us-west-1.amazonaws.com +s3-website.dualstack.us-west-1.amazonaws.com +s3.us-west-1.amazonaws.com +s3-accesspoint.us-west-1.amazonaws.com +s3-accesspoint-fips.us-west-1.amazonaws.com +s3-fips.us-west-1.amazonaws.com +s3-object-lambda.us-west-1.amazonaws.com +s3-website.us-west-1.amazonaws.com +s3.dualstack.us-west-2.amazonaws.com +s3-accesspoint.dualstack.us-west-2.amazonaws.com +s3-accesspoint-fips.dualstack.us-west-2.amazonaws.com +s3-fips.dualstack.us-west-2.amazonaws.com +s3-website.dualstack.us-west-2.amazonaws.com +s3.us-west-2.amazonaws.com +s3-accesspoint.us-west-2.amazonaws.com +s3-accesspoint-fips.us-west-2.amazonaws.com +s3-deprecated.us-west-2.amazonaws.com +s3-fips.us-west-2.amazonaws.com +s3-object-lambda.us-west-2.amazonaws.com +s3-website.us-west-2.amazonaws.com + +// Amazon SageMaker Ground Truth +// Submitted by AWS Security +// Reference: 98dbfde4-7802-48c3-8751-b60f204e0d9c +labeling.ap-northeast-1.sagemaker.aws +labeling.ap-northeast-2.sagemaker.aws +labeling.ap-south-1.sagemaker.aws +labeling.ap-southeast-1.sagemaker.aws +labeling.ap-southeast-2.sagemaker.aws +labeling.ca-central-1.sagemaker.aws +labeling.eu-central-1.sagemaker.aws +labeling.eu-west-1.sagemaker.aws +labeling.eu-west-2.sagemaker.aws +labeling.us-east-1.sagemaker.aws +labeling.us-east-2.sagemaker.aws +labeling.us-west-2.sagemaker.aws + +// Amazon SageMaker Notebook Instances +// Submitted by AWS Security +// Reference: b5ea56df-669e-43cc-9537-14aa172f5dfc +notebook.af-south-1.sagemaker.aws +notebook.ap-east-1.sagemaker.aws +notebook.ap-northeast-1.sagemaker.aws +notebook.ap-northeast-2.sagemaker.aws +notebook.ap-northeast-3.sagemaker.aws +notebook.ap-south-1.sagemaker.aws +notebook.ap-south-2.sagemaker.aws +notebook.ap-southeast-1.sagemaker.aws +notebook.ap-southeast-2.sagemaker.aws +notebook.ap-southeast-3.sagemaker.aws +notebook.ap-southeast-4.sagemaker.aws +notebook.ca-central-1.sagemaker.aws +notebook-fips.ca-central-1.sagemaker.aws +notebook.ca-west-1.sagemaker.aws +notebook-fips.ca-west-1.sagemaker.aws +notebook.eu-central-1.sagemaker.aws +notebook.eu-central-2.sagemaker.aws +notebook.eu-north-1.sagemaker.aws +notebook.eu-south-1.sagemaker.aws +notebook.eu-south-2.sagemaker.aws +notebook.eu-west-1.sagemaker.aws +notebook.eu-west-2.sagemaker.aws +notebook.eu-west-3.sagemaker.aws +notebook.il-central-1.sagemaker.aws +notebook.me-central-1.sagemaker.aws +notebook.me-south-1.sagemaker.aws +notebook.sa-east-1.sagemaker.aws +notebook.us-east-1.sagemaker.aws +notebook-fips.us-east-1.sagemaker.aws +notebook.us-east-2.sagemaker.aws +notebook-fips.us-east-2.sagemaker.aws +notebook.us-gov-east-1.sagemaker.aws +notebook-fips.us-gov-east-1.sagemaker.aws +notebook.us-gov-west-1.sagemaker.aws +notebook-fips.us-gov-west-1.sagemaker.aws +notebook.us-west-1.sagemaker.aws +notebook-fips.us-west-1.sagemaker.aws +notebook.us-west-2.sagemaker.aws +notebook-fips.us-west-2.sagemaker.aws +notebook.cn-north-1.sagemaker.com.cn +notebook.cn-northwest-1.sagemaker.com.cn + +// Amazon SageMaker Studio +// Submitted by AWS Security +// Reference: 475f237e-ab88-4041-9f41-7cfccdf66aeb +studio.af-south-1.sagemaker.aws +studio.ap-east-1.sagemaker.aws +studio.ap-northeast-1.sagemaker.aws +studio.ap-northeast-2.sagemaker.aws +studio.ap-northeast-3.sagemaker.aws +studio.ap-south-1.sagemaker.aws +studio.ap-southeast-1.sagemaker.aws +studio.ap-southeast-2.sagemaker.aws +studio.ap-southeast-3.sagemaker.aws +studio.ca-central-1.sagemaker.aws +studio.eu-central-1.sagemaker.aws +studio.eu-central-2.sagemaker.aws +studio.eu-north-1.sagemaker.aws +studio.eu-south-1.sagemaker.aws +studio.eu-south-2.sagemaker.aws +studio.eu-west-1.sagemaker.aws +studio.eu-west-2.sagemaker.aws +studio.eu-west-3.sagemaker.aws +studio.il-central-1.sagemaker.aws +studio.me-central-1.sagemaker.aws +studio.me-south-1.sagemaker.aws +studio.sa-east-1.sagemaker.aws +studio.us-east-1.sagemaker.aws +studio.us-east-2.sagemaker.aws +studio.us-gov-east-1.sagemaker.aws +studio-fips.us-gov-east-1.sagemaker.aws +studio.us-gov-west-1.sagemaker.aws +studio-fips.us-gov-west-1.sagemaker.aws +studio.us-west-1.sagemaker.aws +studio.us-west-2.sagemaker.aws +studio.cn-north-1.sagemaker.com.cn +studio.cn-northwest-1.sagemaker.com.cn + +// Amazon SageMaker with MLflow +// Submited by: AWS Security +// Reference: c19f92b3-a82a-452d-8189-831b572eea7e +*.experiments.sagemaker.aws + +// Analytics on AWS +// Submitted by AWS Security +// Reference: 955f9f40-a495-4e73-ae85-67b77ac9cadd +analytics-gateway.ap-northeast-1.amazonaws.com +analytics-gateway.ap-northeast-2.amazonaws.com +analytics-gateway.ap-south-1.amazonaws.com +analytics-gateway.ap-southeast-1.amazonaws.com +analytics-gateway.ap-southeast-2.amazonaws.com +analytics-gateway.eu-central-1.amazonaws.com +analytics-gateway.eu-west-1.amazonaws.com +analytics-gateway.us-east-1.amazonaws.com +analytics-gateway.us-east-2.amazonaws.com +analytics-gateway.us-west-2.amazonaws.com + +// AWS Amplify +// Submitted by AWS Security +// Reference: c35bed18-6f4f-424f-9298-5756f2f7d72b +amplifyapp.com + +// AWS App Runner +// Submitted by AWS Security +// Reference: 6828c008-ba5d-442f-ade5-48da4e7c2316 +*.awsapprunner.com + +// AWS Cloud9 +// Submitted by: AWS Security +// Reference: 30717f72-4007-4f0f-8ed4-864c6f2efec9 +webview-assets.aws-cloud9.af-south-1.amazonaws.com +vfs.cloud9.af-south-1.amazonaws.com +webview-assets.cloud9.af-south-1.amazonaws.com +webview-assets.aws-cloud9.ap-east-1.amazonaws.com +vfs.cloud9.ap-east-1.amazonaws.com +webview-assets.cloud9.ap-east-1.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-1.amazonaws.com +vfs.cloud9.ap-northeast-1.amazonaws.com +webview-assets.cloud9.ap-northeast-1.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-2.amazonaws.com +vfs.cloud9.ap-northeast-2.amazonaws.com +webview-assets.cloud9.ap-northeast-2.amazonaws.com +webview-assets.aws-cloud9.ap-northeast-3.amazonaws.com +vfs.cloud9.ap-northeast-3.amazonaws.com +webview-assets.cloud9.ap-northeast-3.amazonaws.com +webview-assets.aws-cloud9.ap-south-1.amazonaws.com +vfs.cloud9.ap-south-1.amazonaws.com +webview-assets.cloud9.ap-south-1.amazonaws.com +webview-assets.aws-cloud9.ap-southeast-1.amazonaws.com +vfs.cloud9.ap-southeast-1.amazonaws.com +webview-assets.cloud9.ap-southeast-1.amazonaws.com +webview-assets.aws-cloud9.ap-southeast-2.amazonaws.com +vfs.cloud9.ap-southeast-2.amazonaws.com +webview-assets.cloud9.ap-southeast-2.amazonaws.com +webview-assets.aws-cloud9.ca-central-1.amazonaws.com +vfs.cloud9.ca-central-1.amazonaws.com +webview-assets.cloud9.ca-central-1.amazonaws.com +webview-assets.aws-cloud9.eu-central-1.amazonaws.com +vfs.cloud9.eu-central-1.amazonaws.com +webview-assets.cloud9.eu-central-1.amazonaws.com +webview-assets.aws-cloud9.eu-north-1.amazonaws.com +vfs.cloud9.eu-north-1.amazonaws.com +webview-assets.cloud9.eu-north-1.amazonaws.com +webview-assets.aws-cloud9.eu-south-1.amazonaws.com +vfs.cloud9.eu-south-1.amazonaws.com +webview-assets.cloud9.eu-south-1.amazonaws.com +webview-assets.aws-cloud9.eu-west-1.amazonaws.com +vfs.cloud9.eu-west-1.amazonaws.com +webview-assets.cloud9.eu-west-1.amazonaws.com +webview-assets.aws-cloud9.eu-west-2.amazonaws.com +vfs.cloud9.eu-west-2.amazonaws.com +webview-assets.cloud9.eu-west-2.amazonaws.com +webview-assets.aws-cloud9.eu-west-3.amazonaws.com +vfs.cloud9.eu-west-3.amazonaws.com +webview-assets.cloud9.eu-west-3.amazonaws.com +webview-assets.aws-cloud9.il-central-1.amazonaws.com +vfs.cloud9.il-central-1.amazonaws.com +webview-assets.aws-cloud9.me-south-1.amazonaws.com +vfs.cloud9.me-south-1.amazonaws.com +webview-assets.cloud9.me-south-1.amazonaws.com +webview-assets.aws-cloud9.sa-east-1.amazonaws.com +vfs.cloud9.sa-east-1.amazonaws.com +webview-assets.cloud9.sa-east-1.amazonaws.com +webview-assets.aws-cloud9.us-east-1.amazonaws.com +vfs.cloud9.us-east-1.amazonaws.com +webview-assets.cloud9.us-east-1.amazonaws.com +webview-assets.aws-cloud9.us-east-2.amazonaws.com +vfs.cloud9.us-east-2.amazonaws.com +webview-assets.cloud9.us-east-2.amazonaws.com +webview-assets.aws-cloud9.us-west-1.amazonaws.com +vfs.cloud9.us-west-1.amazonaws.com +webview-assets.cloud9.us-west-1.amazonaws.com +webview-assets.aws-cloud9.us-west-2.amazonaws.com +vfs.cloud9.us-west-2.amazonaws.com +webview-assets.cloud9.us-west-2.amazonaws.com + +// AWS Directory Service +// Submitted by AWS Security +// Reference: a13203e8-42dc-4045-a0d2-2ee67bed1068 +awsapps.com + +// AWS Elastic Beanstalk +// Submitted by AWS Security +// Reference: bb5a965c-dec3-4967-aa22-e306ad064797 +cn-north-1.eb.amazonaws.com.cn +cn-northwest-1.eb.amazonaws.com.cn +elasticbeanstalk.com +af-south-1.elasticbeanstalk.com +ap-east-1.elasticbeanstalk.com +ap-northeast-1.elasticbeanstalk.com +ap-northeast-2.elasticbeanstalk.com +ap-northeast-3.elasticbeanstalk.com +ap-south-1.elasticbeanstalk.com +ap-southeast-1.elasticbeanstalk.com +ap-southeast-2.elasticbeanstalk.com +ap-southeast-3.elasticbeanstalk.com +ca-central-1.elasticbeanstalk.com +eu-central-1.elasticbeanstalk.com +eu-north-1.elasticbeanstalk.com +eu-south-1.elasticbeanstalk.com +eu-west-1.elasticbeanstalk.com +eu-west-2.elasticbeanstalk.com +eu-west-3.elasticbeanstalk.com +il-central-1.elasticbeanstalk.com +me-south-1.elasticbeanstalk.com +sa-east-1.elasticbeanstalk.com +us-east-1.elasticbeanstalk.com +us-east-2.elasticbeanstalk.com +us-gov-east-1.elasticbeanstalk.com +us-gov-west-1.elasticbeanstalk.com +us-west-1.elasticbeanstalk.com +us-west-2.elasticbeanstalk.com + +// (AWS) Elastic Load Balancing +// Submitted by Luke Wells +// Reference: 12a3d528-1bac-4433-a359-a395867ffed2 +*.elb.amazonaws.com.cn +*.elb.amazonaws.com + +// AWS Global Accelerator +// Submitted by Daniel Massaguer +// Reference: d916759d-a08b-4241-b536-4db887383a6a +awsglobalaccelerator.com + +// AWS re:Post Private +// Submitted by AWS Security +// Reference: 83385945-225f-416e-9aa0-ad0632bfdcee +*.private.repost.aws + +// AWS Transfer Family web apps +// Submitted by AWS Security +// Reference: 67e9cfe6-ac57-49c7-b197-6652711c8e8d +transfer-webapp.ap-northeast-1.on.aws +transfer-webapp.ap-southeast-1.on.aws +transfer-webapp.ap-southeast-2.on.aws +transfer-webapp.eu-central-1.on.aws +transfer-webapp.eu-north-1.on.aws +transfer-webapp.eu-west-1.on.aws +transfer-webapp.us-east-1.on.aws +transfer-webapp.us-east-2.on.aws +transfer-webapp.us-west-2.on.aws + +// eero +// Submitted by Yue Kang +// Reference: 264afe70-f62c-4c02-8ab9-b5281ed24461 +eero.online +eero-stage.online + +// concludes Amazon + +// Apigee : https://apigee.com/ +// Submitted by Apigee Security Team +apigee.io + +// Apis Networks : https://apisnetworks.com +// Submitted by Matt Saladna +panel.dev + +// Apphud : https://apphud.com +// Submitted by Alexander Selivanov +siiites.com + +// Appspace : https://www.appspace.com +// Submitted by Appspace Security Team +appspacehosted.com +appspaceusercontent.com + +// Appudo UG (haftungsbeschränkt) : https://www.appudo.com +// Submitted by Alexander Hochbaum +appudo.net + +// Appwrite : https://appwrite.io +// Submitted by Steven Nguyen +appwrite.global +*.appwrite.run + +// Aptible : https://www.aptible.com/ +// Submitted by Thomas Orozco +on-aptible.com + +// Aquapal : https://aquapal.net/ +// Submitted by Aki Ueno +f5.si + +// ArvanCloud EdgeCompute +// Submitted by ArvanCloud CDN +arvanedge.ir + +// ASEINet : https://www.aseinet.com/ +// Submitted by Asei SEKIGUCHI +user.aseinet.ne.jp +gv.vc +d.gv.vc + +// Asociación Amigos de la Informática "Euskalamiga" : http://encounter.eus/ +// Submitted by Hector Martin +user.party.eus + +// Association potager.org : https://potager.org/ +// Submitted by Lunar +pimienta.org +poivron.org +potager.org +sweetpepper.org + +// ASUSTOR Inc. : http://www.asustor.com +// Submitted by Vincent Tseng +myasustor.com + +// Atlassian : https://atlassian.com +// Submitted by Sam Smyth +cdn.prod.atlassian-dev.net + +// Authentick UG (haftungsbeschränkt) : https://authentick.net +// Submitted by Lukas Reschke +translated.page + +// AVM : https://avm.de +// Submitted by Andreas Weise +myfritz.link +myfritz.net + +// AVStack Pte. Ltd. : https://avstack.io +// Submitted by Jasper Hugo +onavstack.net + +// AW AdvisorWebsites.com Software Inc : https://advisorwebsites.com +// Submitted by James Kennedy +*.awdev.ca +*.advisor.ws + +// AZ.pl sp. z.o.o : https://az.pl +// Submitted by Krzysztof Wolski +ecommerce-shop.pl + +// b-data GmbH : https://www.b-data.io +// Submitted by Olivier Benz +b-data.io + +// Balena : https://www.balena.io +// Submitted by Petros Angelatos +balena-devices.com + +// BASE, Inc. : https://binc.jp +// Submitted by Yuya NAGASAWA +base.ec +official.ec +buyshop.jp +fashionstore.jp +handcrafted.jp +kawaiishop.jp +supersale.jp +theshop.jp +shopselect.net +base.shop + +// BeagleBoard.org Foundation : https://beagleboard.org +// Submitted by Jason Kridner +beagleboard.io + +// Beget Ltd +// Submitted by Lev Nekrasov +*.beget.app + +// Besties : https://besties.house +// Submitted by Hazel Cora +pages.gay + +// BinaryLane : http://www.binarylane.com +// Submitted by Nathan O'Sullivan +bnr.la + +// Bitbucket : http://bitbucket.org +// Submitted by Andy Ortlieb +bitbucket.io + +// Blackbaud, Inc. : https://www.blackbaud.com +// Submitted by Paul Crowder +blackbaudcdn.net + +// Blatech : http://www.blatech.net +// Submitted by Luke Bratch +of.je + +// Block, Inc. : https://block.xyz +// Submitted by Jonathan Boice +square.site + +// Blue Bite, LLC : https://bluebite.com +// Submitted by Joshua Weiss +bluebite.io + +// Boomla : https://boomla.com +// Submitted by Tibor Halter +boomla.net + +// Boutir : https://www.boutir.com +// Submitted by Eric Ng Ka Ka +boutir.com + +// Boxfuse : https://boxfuse.com +// Submitted by Axel Fontaine +boxfuse.io + +// bplaced : https://www.bplaced.net/ +// Submitted by Miroslav Bozic +square7.ch +bplaced.com +bplaced.de +square7.de +bplaced.net +square7.net + +// Brave : https://brave.com +// Submitted by Andrea Brancaleoni +brave.app +*.s.brave.app +brave.io +*.s.brave.io + +// Brendly : https://brendly.rs +// Submitted by Dusan Radovanovic +shop.brendly.hr +shop.brendly.rs + +// BrowserSafetyMark +// Submitted by Dave Tharp +browsersafetymark.io + +// BRS Media : https://brsmedia.com/ +// Submitted by Gavin Brown +radio.am +radio.fm + +// Bubble : https://bubble.io/ +// Submitted by Merlin Zhao +cdn.bubble.io +bubbleapps.io + +// Bytemark Hosting : https://www.bytemark.co.uk +// Submitted by Paul Cammish +uk0.bigv.io +dh.bytemark.co.uk +vm.bytemark.co.uk + +// Caf.js Labs LLC : https://www.cafjs.com +// Submitted by Antonio Lain +cafjs.com + +// Canva Pty Ltd : https://canva.com/ +// Submitted by Joel Aquilina +canva-apps.cn +my.canvasite.cn +canva-apps.com +my.canva.site + +// Carrd : https://carrd.co +// Submitted by AJ +drr.ac +uwu.ai +carrd.co +crd.co +ju.mp + +// CDDO : https://www.gov.uk/guidance/get-an-api-domain-on-govuk +// Submitted by Jamie Tanna +api.gov.uk + +// CDN77.com : http://www.cdn77.com +// Submitted by Jan Krpes +cdn77-storage.com +rsc.contentproxy9.cz +r.cdn77.net +cdn77-ssl.net +c.cdn77.org +rsc.cdn77.org +ssl.origin.cdn77-secure.org + +// CentralNic : https://teaminternet.com/ +// Submitted by registry +za.bz +br.com +cn.com +de.com +eu.com +jpn.com +mex.com +ru.com +sa.com +uk.com +us.com +za.com +com.de +gb.net +hu.net +jp.net +se.net +uk.net +ae.org +com.se + +// Cityhost LLC : https://cityhost.ua +// Submitted by Maksym Rivtin +cx.ua + +// Civilized Discourse Construction Kit, Inc. : https://www.discourse.org/ +// Submitted by Rishabh Nambiar & Michael Brown +discourse.group +discourse.team + +// Clerk : https://www.clerk.dev +// Submitted by Colin Sidoti +clerk.app +clerkstage.app +*.lcl.dev +*.lclstage.dev +*.stg.dev +*.stgstage.dev + +// Clever Cloud : https://www.clever-cloud.com/ +// Submitted by Quentin Adam +cleverapps.cc +*.services.clever-cloud.com +cleverapps.io +cleverapps.tech + +// ClickRising : https://clickrising.com/ +// Submitted by Umut Gumeli +clickrising.net + +// Cloud DNS Ltd : http://www.cloudns.net +// Submitted by Aleksander Hristov & Boyan Peychev +cloudns.asia +cloudns.be +cloud-ip.biz +cloudns.biz +cloudns.cc +cloudns.ch +cloudns.cl +cloudns.club +dnsabr.com +ip-ddns.com +cloudns.cx +cloudns.eu +cloudns.in +cloudns.info +ddns-ip.net +dns-cloud.net +dns-dynamic.net +cloudns.nz +cloudns.org +ip-dynamic.org +cloudns.ph +cloudns.pro +cloudns.pw +cloudns.us + +// Cloud66 : https://www.cloud66.com/ +// Submitted by Khash Sajadi +c66.me +cloud66.ws + +// CloudAccess.net : https://www.cloudaccess.net/ +// Submitted by Pawel Panek +jdevcloud.com +wpdevcloud.com +cloudaccess.host +freesite.host +cloudaccess.net + +// Cloudbees, Inc. : https://www.cloudbees.com/ +// Submitted by Mohideen Shajith +cloudbeesusercontent.io + +// Cloudera, Inc. : https://www.cloudera.com/ +// Submitted by Kedarnath Waikar +*.cloudera.site + +// Cloudflare, Inc. : https://www.cloudflare.com/ +// Submitted by Cloudflare Team +cf-ipfs.com +cloudflare-ipfs.com +trycloudflare.com +pages.dev +r2.dev +workers.dev +cloudflare.net +cdn.cloudflare.net +cdn.cloudflareanycast.net +cdn.cloudflarecn.net +cdn.cloudflareglobal.net + +// cloudscale.ch AG : https://www.cloudscale.ch/ +// Submitted by Gaudenz Steinlin +cust.cloudscale.ch +objects.lpg.cloudscale.ch +objects.rma.cloudscale.ch +lpg.objectstorage.ch +rma.objectstorage.ch + +// Clovyr : https://clovyr.io +// Submitted by Patrick Nielsen +wnext.app + +// CNPY : https://cnpy.gdn +// Submitted by Angelo Gladding +cnpy.gdn + +// Co & Co : https://co-co.nl/ +// Submitted by Govert Versluis +*.otap.co + +// co.ca : http://registry.co.ca/ +co.ca + +// co.com Registry, LLC : https://registry.co.com +// Submitted by Gavin Brown +co.com + +// Codeberg e. V. : https://codeberg.org +// Submitted by Moritz Marquardt +codeberg.page + +// CodeSandbox B.V. : https://codesandbox.io +// Submitted by Ives van Hoorne +csb.app +preview.csb.app + +// CoDNS B.V. +co.nl +co.no + +// Cognition AI, Inc. : https://cognition.ai +// Submitted by Philip Papurt +*.devinapps.com + +// Combell.com : https://www.combell.com +// Submitted by Thomas Wouters +webhosting.be +hosting-cluster.nl + +// Contentful GmbH : https://www.contentful.com +// Submitted by Contentful Developer Experience Team +ctfcloud.net + +// Convex : https://convex.dev/ +// Submitted by James Cowling +convex.app +convex.cloud +convex.site + +// Coordination Center for TLD RU and XN--P1AI : https://cctld.ru/en/domains/domens_ru/reserved/ +// Submitted by George Georgievsky +ac.ru +edu.ru +gov.ru +int.ru +mil.ru + +// COSIMO GmbH : http://www.cosimo.de +// Submitted by Rene Marticke +dyn.cosidns.de +dnsupdater.de +dynamisches-dns.de +internet-dns.de +l-o-g-i-n.de +dynamic-dns.info +feste-ip.net +knx-server.net +static-access.net + +// Craft Docs Ltd : https://www.craft.do/ +// Submitted by Zsombor Fuszenecker +craft.me + +// Craynic, s.r.o. : http://www.craynic.com/ +// Submitted by Ales Krajnik +realm.cz + +// Crisp IM SAS : https://crisp.chat/ +// Submitted by Baptiste Jamin +on.crisp.email + +// Cryptonomic : https://cryptonomic.net/ +// Submitted by Andrew Cady +*.cryptonomic.net + +// cyber_Folks S.A. : https://cyberfolks.pl +// Submitted by Bartlomiej Kida +cfolks.pl + +// cyon GmbH : https://www.cyon.ch/ +// Submitted by Dominic Luechinger +cyon.link +cyon.site + +// Dansk.net : http://www.dansk.net/ +// Submitted by Anani Voule +biz.dk +co.dk +firm.dk +reg.dk +store.dk + +// dappnode.io : https://dappnode.io/ +// Submitted by Abel Boldu / DAppNode Team +dyndns.dappnode.io + +// Dark, Inc. : https://darklang.com +// Submitted by Paul Biggar +builtwithdark.com +darklang.io + +// DataDetect, LLC. : https://datadetect.com +// Submitted by Andrew Banchich +demo.datadetect.com +instance.datadetect.com + +// Datawire, Inc : https://www.datawire.io +// Submitted by Richard Li +edgestack.me + +// Datto, Inc. : https://www.datto.com/ +// Submitted by Philipp Heckel +dattolocal.com +dattorelay.com +dattoweb.com +mydatto.com +dattolocal.net +mydatto.net + +// ddnss.de : https://www.ddnss.de/ +// Submitted by Robert Niedziela +ddnss.de +dyn.ddnss.de +dyndns.ddnss.de +dyn-ip24.de +dyndns1.de +home-webserver.de +dyn.home-webserver.de +myhome-server.de +ddnss.org + +// Debian : https://www.debian.org/ +// Submitted by Peter Palfrader / Debian Sysadmin Team +debian.net + +// Definima : http://www.definima.com/ +// Submitted by Maxence Bitterli +definima.io +definima.net + +// Deno Land Inc : https://deno.com/ +// Submitted by Luca Casonato +deno.dev +deno-staging.dev +deno.net + +// deSEC : https://desec.io/ +// Submitted by Peter Thomassen +dedyn.io + +// Deta : https://www.deta.sh/ +// Submitted by Aavash Shrestha +deta.app +deta.dev + +// Dfinity Foundation: https://dfinity.org/ +// Submitted by Dfinity Team +caffeine.ai +id.ai +icp-api.io +icp0.io +*.raw.icp0.io +icp1.io +*.raw.icp1.io +caffeine.site + +// dhosting.pl Sp. z o.o. : https://dhosting.pl/ +// Submitted by Michal Kokoszkiewicz +dfirma.pl +dkonto.pl +you2.pl + +// DigitalOcean App Platform : https://www.digitalocean.com/products/app-platform/ +// Submitted by Braxton Huggins +ondigitalocean.app + +// DigitalOcean Spaces : https://www.digitalocean.com/products/spaces/ +// Submitted by Robin H. Johnson +*.digitaloceanspaces.com + +// DigitalPlat : https://www.digitalplat.org/ +// Submitted by Edward Hsing +qzz.io +us.kg +xx.kg +dpdns.org + +// Discord Inc : https://discord.com +// Submitted by Sahn Lam +discordsays.com +discordsez.com + +// DNS Africa Ltd : https://dns.business +// Submitted by Calvin Browne +jozi.biz + +// DNShome : https://www.dnshome.de/ +// Submitted by Norbert Auler +dnshome.de + +// DotArai : https://www.dotarai.com/ +// Submitted by Atsadawat Netcharadsang +online.th +shop.th + +// DrayTek Corp. : https://www.draytek.com/ +// Submitted by Paul Fang +drayddns.com + +// DreamCommerce : https://shoper.pl/ +// Submitted by Konrad Kotarba +shoparena.pl + +// DreamHost : http://www.dreamhost.com/ +// Submitted by Andrew Farmer +dreamhosters.com + +// Dreamyoungs, Inc. : https://durumis.com +// Submitted by Infra Team +durumis.com + +// Drobo : http://www.drobo.com/ +// Submitted by Ricardo Padilha +mydrobo.com + +// DuckDNS : http://www.duckdns.org/ +// Submitted by Richard Harper +duckdns.org + +// dy.fi : http://dy.fi/ +// Submitted by Heikki Hannikainen +dy.fi +tunk.org + +// DynDNS.com : http://www.dyndns.com/services/dns/dyndns/ +dyndns.biz +for-better.biz +for-more.biz +for-some.biz +for-the.biz +selfip.biz +webhop.biz +ftpaccess.cc +game-server.cc +myphotos.cc +scrapping.cc +blogdns.com +cechire.com +dnsalias.com +dnsdojo.com +doesntexist.com +dontexist.com +doomdns.com +dyn-o-saur.com +dynalias.com +dyndns-at-home.com +dyndns-at-work.com +dyndns-blog.com +dyndns-free.com +dyndns-home.com +dyndns-ip.com +dyndns-mail.com +dyndns-office.com +dyndns-pics.com +dyndns-remote.com +dyndns-server.com +dyndns-web.com +dyndns-wiki.com +dyndns-work.com +est-a-la-maison.com +est-a-la-masion.com +est-le-patron.com +est-mon-blogueur.com +from-ak.com +from-al.com +from-ar.com +from-ca.com +from-ct.com +from-dc.com +from-de.com +from-fl.com +from-ga.com +from-hi.com +from-ia.com +from-id.com +from-il.com +from-in.com +from-ks.com +from-ky.com +from-ma.com +from-md.com +from-mi.com +from-mn.com +from-mo.com +from-ms.com +from-mt.com +from-nc.com +from-nd.com +from-ne.com +from-nh.com +from-nj.com +from-nm.com +from-nv.com +from-oh.com +from-ok.com +from-or.com +from-pa.com +from-pr.com +from-ri.com +from-sc.com +from-sd.com +from-tn.com +from-tx.com +from-ut.com +from-va.com +from-vt.com +from-wa.com +from-wi.com +from-wv.com +from-wy.com +getmyip.com +gotdns.com +hobby-site.com +homelinux.com +homeunix.com +iamallama.com +is-a-anarchist.com +is-a-blogger.com +is-a-bookkeeper.com +is-a-bulls-fan.com +is-a-caterer.com +is-a-chef.com +is-a-conservative.com +is-a-cpa.com +is-a-cubicle-slave.com +is-a-democrat.com +is-a-designer.com +is-a-doctor.com +is-a-financialadvisor.com +is-a-geek.com +is-a-green.com +is-a-guru.com +is-a-hard-worker.com +is-a-hunter.com +is-a-landscaper.com +is-a-lawyer.com +is-a-liberal.com +is-a-libertarian.com +is-a-llama.com +is-a-musician.com +is-a-nascarfan.com +is-a-nurse.com +is-a-painter.com +is-a-personaltrainer.com +is-a-photographer.com +is-a-player.com +is-a-republican.com +is-a-rockstar.com +is-a-socialist.com +is-a-student.com +is-a-teacher.com +is-a-techie.com +is-a-therapist.com +is-an-accountant.com +is-an-actor.com +is-an-actress.com +is-an-anarchist.com +is-an-artist.com +is-an-engineer.com +is-an-entertainer.com +is-certified.com +is-gone.com +is-into-anime.com +is-into-cars.com +is-into-cartoons.com +is-into-games.com +is-leet.com +is-not-certified.com +is-slick.com +is-uberleet.com +is-with-theband.com +isa-geek.com +isa-hockeynut.com +issmarterthanyou.com +likes-pie.com +likescandy.com +neat-url.com +saves-the-whales.com +selfip.com +sells-for-less.com +sells-for-u.com +servebbs.com +simple-url.com +space-to-rent.com +teaches-yoga.com +writesthisblog.com +ath.cx +fuettertdasnetz.de +isteingeek.de +istmein.de +lebtimnetz.de +leitungsen.de +traeumtgerade.de +barrel-of-knowledge.info +barrell-of-knowledge.info +dyndns.info +for-our.info +groks-the.info +groks-this.info +here-for-more.info +knowsitall.info +selfip.info +webhop.info +forgot.her.name +forgot.his.name +at-band-camp.net +blogdns.net +broke-it.net +buyshouses.net +dnsalias.net +dnsdojo.net +does-it.net +dontexist.net +dynalias.net +dynathome.net +endofinternet.net +from-az.net +from-co.net +from-la.net +from-ny.net +gets-it.net +ham-radio-op.net +homeftp.net +homeip.net +homelinux.net +homeunix.net +in-the-band.net +is-a-chef.net +is-a-geek.net +isa-geek.net +kicks-ass.net +office-on-the.net +podzone.net +scrapper-site.net +selfip.net +sells-it.net +servebbs.net +serveftp.net +thruhere.net +webhop.net +merseine.nu +mine.nu +shacknet.nu +blogdns.org +blogsite.org +boldlygoingnowhere.org +dnsalias.org +dnsdojo.org +doesntexist.org +dontexist.org +doomdns.org +dvrdns.org +dynalias.org +dyndns.org +go.dyndns.org +home.dyndns.org +endofinternet.org +endoftheinternet.org +from-me.org +game-host.org +gotdns.org +hobby-site.org +homedns.org +homeftp.org +homelinux.org +homeunix.org +is-a-bruinsfan.org +is-a-candidate.org +is-a-celticsfan.org +is-a-chef.org +is-a-geek.org +is-a-knight.org +is-a-linux-user.org +is-a-patsfan.org +is-a-soxfan.org +is-found.org +is-lost.org +is-saved.org +is-very-bad.org +is-very-evil.org +is-very-good.org +is-very-nice.org +is-very-sweet.org +isa-geek.org +kicks-ass.org +misconfused.org +podzone.org +readmyblog.org +selfip.org +sellsyourhome.org +servebbs.org +serveftp.org +servegame.org +stuff-4-sale.org +webhop.org +better-than.tv +dyndns.tv +on-the-web.tv +worse-than.tv +is-by.us +land-4-sale.us +stuff-4-sale.us +dyndns.ws +mypets.ws + +// Dynu.com : https://www.dynu.com/ +// Submitted by Sue Ye +ddnsfree.com +ddnsgeek.com +giize.com +gleeze.com +kozow.com +loseyourip.com +ooguy.com +theworkpc.com +casacam.net +dynu.net +accesscam.org +camdvr.org +freeddns.org +mywire.org +webredirect.org +myddns.rocks + +// dynv6 : https://dynv6.com +// Submitted by Dominik Menke +dynv6.net + +// E4YOU spol. s.r.o. : https://e4you.cz/ +// Submitted by Vladimir Dudr +e4.cz + +// Easypanel : https://easypanel.io +// Submitted by Andrei Canta +easypanel.app +easypanel.host + +// EasyWP : https://www.easywp.com +// Submitted by +*.ewp.live + +// eDirect Corp. : https://hosting.url.com.tw/ +// Submitted by C.S. chang +twmail.cc +twmail.net +twmail.org +mymailer.com.tw +url.tw + +// Electromagnetic Field : https://www.emfcamp.org +// Submitted by +at.emf.camp + +// Elefunc, Inc. : https://elefunc.com +// Submitted by Cetin Sert +rt.ht + +// Elementor : Elementor Ltd. +// Submitted by Anton Barkan +elementor.cloud +elementor.cool + +// En root‽ : https://en-root.org +// Submitted by Emmanuel Raviart +en-root.fr + +// Enalean SAS : https://www.enalean.com +// Submitted by Enalean Security Team +mytuleap.com +tuleap-partners.com + +// Encoretivity AB : https://encore.cloud +// Submitted by André Eriksson +encr.app +frontend.encr.app +encoreapi.com +lp.dev +api.lp.dev +objects.lp.dev + +// encoway GmbH : https://www.encoway.de +// Submitted by Marcel Daus +eu.encoway.cloud + +// EU.org : https://eu.org/ +// Submitted by Pierre Beyssac +eu.org +al.eu.org +asso.eu.org +at.eu.org +au.eu.org +be.eu.org +bg.eu.org +ca.eu.org +cd.eu.org +ch.eu.org +cn.eu.org +cy.eu.org +cz.eu.org +de.eu.org +dk.eu.org +edu.eu.org +ee.eu.org +es.eu.org +fi.eu.org +fr.eu.org +gr.eu.org +hr.eu.org +hu.eu.org +ie.eu.org +il.eu.org +in.eu.org +int.eu.org +is.eu.org +it.eu.org +jp.eu.org +kr.eu.org +lt.eu.org +lu.eu.org +lv.eu.org +me.eu.org +mk.eu.org +mt.eu.org +my.eu.org +net.eu.org +ng.eu.org +nl.eu.org +no.eu.org +nz.eu.org +pl.eu.org +pt.eu.org +ro.eu.org +ru.eu.org +se.eu.org +si.eu.org +sk.eu.org +tr.eu.org +uk.eu.org +us.eu.org + +// Eurobyte : https://eurobyte.ru +// Submitted by Evgeniy Subbotin +eurodir.ru + +// Evennode : http://www.evennode.com/ +// Submitted by Michal Kralik +eu-1.evennode.com +eu-2.evennode.com +eu-3.evennode.com +eu-4.evennode.com +us-1.evennode.com +us-2.evennode.com +us-3.evennode.com +us-4.evennode.com + +// Evervault : https://evervault.com +// Submitted by Hannah Neary +relay.evervault.app +relay.evervault.dev + +// Expo : https://expo.dev/ +// Submitted by James Ide +expo.app +staging.expo.app + +// Fabrica Technologies, Inc. : https://www.fabrica.dev/ +// Submitted by Eric Jiang +onfabrica.com + +// FAITID : https://faitid.org/ +// Submitted by Maxim Alzoba +// https://www.flexireg.net/stat_info +ru.net +adygeya.ru +bashkiria.ru +bir.ru +cbg.ru +com.ru +dagestan.ru +grozny.ru +kalmykia.ru +kustanai.ru +marine.ru +mordovia.ru +msk.ru +mytis.ru +nalchik.ru +nov.ru +pyatigorsk.ru +spb.ru +vladikavkaz.ru +vladimir.ru +abkhazia.su +adygeya.su +aktyubinsk.su +arkhangelsk.su +armenia.su +ashgabad.su +azerbaijan.su +balashov.su +bashkiria.su +bryansk.su +bukhara.su +chimkent.su +dagestan.su +east-kazakhstan.su +exnet.su +georgia.su +grozny.su +ivanovo.su +jambyl.su +kalmykia.su +kaluga.su +karacol.su +karaganda.su +karelia.su +khakassia.su +krasnodar.su +kurgan.su +kustanai.su +lenug.su +mangyshlak.su +mordovia.su +msk.su +murmansk.su +nalchik.su +navoi.su +north-kazakhstan.su +nov.su +obninsk.su +penza.su +pokrovsk.su +sochi.su +spb.su +tashkent.su +termez.su +togliatti.su +troitsk.su +tselinograd.su +tula.su +tuva.su +vladikavkaz.su +vladimir.su +vologda.su + +// Fancy Bits, LLC : http://getchannels.com +// Submitted by Aman Gupta +channelsdvr.net +u.channelsdvr.net + +// Fastly Inc. : http://www.fastly.com/ +// Submitted by Fastly Security +edgecompute.app +fastly-edge.com +fastly-terrarium.com +freetls.fastly.net +map.fastly.net +a.prod.fastly.net +global.prod.fastly.net +a.ssl.fastly.net +b.ssl.fastly.net +global.ssl.fastly.net +fastlylb.net +map.fastlylb.net + +// Fastmail : https://www.fastmail.com/ +// Submitted by Marc Bradshaw +*.user.fm + +// FASTVPS EESTI OU : https://fastvps.ru/ +// Submitted by Likhachev Vasiliy +fastvps-server.com +fastvps.host +myfast.host +fastvps.site +myfast.space + +// FearWorks Media Ltd. : https://fearworksmedia.co.uk +// Submitted by Keith Fairley +conn.uk +copro.uk +hosp.uk + +// Fedora : https://fedoraproject.org/ +// Submitted by Patrick Uiterwijk +fedorainfracloud.org +fedorapeople.org +cloud.fedoraproject.org +app.os.fedoraproject.org +app.os.stg.fedoraproject.org + +// Fermax : https://fermax.com/ +// Submitted by Koen Van Isterdael +mydobiss.com + +// FH Muenster : https://www.fh-muenster.de +// Submitted by Robin Naundorf +fh-muenster.io + +// Figma : https://www.figma.com +// Submitted by Nick Frost +figma.site +preview.site + +// Filegear Inc. : https://www.filegear.com +// Submitted by Jason Zhu +filegear.me + +// Firebase, Inc. +// Submitted by Chris Raynor +firebaseapp.com + +// FlashDrive : https://flashdrive.io +// Submitted by Eric Chan +fldrv.com + +// Fleek Labs Inc : https://fleek.xyz +// Submitted by Parsa Ghadimi +on-fleek.app + +// FlutterFlow : https://flutterflow.io +// Submitted by Anton Emelyanov +flutterflow.app + +// fly.io : https://fly.io +// Submitted by Kurt Mackey +fly.dev +shw.io +edgeapp.net + +// Forgerock : https://www.forgerock.com +// Submitted by Roderick Parr +forgeblocks.com +id.forgerock.io + +// FoundryLabs, Inc : https://e2b.dev/ +// Submitted by Jiri Sveceny +e2b.app + +// Framer : https://www.framer.com +// Submitted by Koen Rouwhorst +framer.ai +framer.app +framercanvas.com +framer.media +framer.photos +framer.website +framer.wiki + +// Frederik Braun : https://frederik-braun.com +// Submitted by Frederik Braun +*.0e.vc + +// Freebox : http://www.freebox.fr +// Submitted by Romain Fliedel +freebox-os.com +freeboxos.com +fbx-os.fr +fbxos.fr +freebox-os.fr +freeboxos.fr + +// freedesktop.org : https://www.freedesktop.org +// Submitted by Daniel Stone +freedesktop.org + +// freemyip.com : https://freemyip.com +// Submitted by Cadence +freemyip.com + +// Frusky MEDIA&PR : https://www.frusky.de +// Submitted by Victor Pupynin +*.frusky.de + +// FunkFeuer - Verein zur Förderung freier Netze : https://www.funkfeuer.at +// Submitted by Daniel A. Maierhofer +wien.funkfeuer.at + +// Future Versatile Group. : https://www.fvg-on.net/ +// T.Kabu +daemon.asia +dix.asia +mydns.bz +0am.jp +0g0.jp +0j0.jp +0t0.jp +mydns.jp +pgw.jp +wjg.jp +keyword-on.net +live-on.net +server-on.net +mydns.tw +mydns.vc + +// Futureweb GmbH : https://www.futureweb.at +// Submitted by Andreas Schnederle-Wagner +*.futurecms.at +*.ex.futurecms.at +*.in.futurecms.at +futurehosting.at +futuremailing.at +*.ex.ortsinfo.at +*.kunden.ortsinfo.at +*.statics.cloud + +// GCom Internet : https://www.gcom.net.au +// Submitted by Leo Julius +aliases121.com + +// GDS : https://www.gov.uk/service-manual/technology/managing-domain-names +// Submitted by Stephen Ford +campaign.gov.uk +service.gov.uk +independent-commission.uk +independent-inquest.uk +independent-inquiry.uk +independent-panel.uk +independent-review.uk +public-inquiry.uk +royal-commission.uk + +// Gehirn Inc. : https://www.gehirn.co.jp/ +// Submitted by Kohei YOSHIDA +gehirn.ne.jp +usercontent.jp + +// Gentlent, Inc. : https://www.gentlent.com +// Submitted by Tom Klein +gentapps.com +gentlentapis.com +cdn-edges.net + +// GignoSystemJapan : http://gsj.bz +// Submitted by GignoSystemJapan +gsj.bz + +// GitHub, Inc. +// Submitted by Patrick Toomey +github.app +githubusercontent.com +githubpreview.dev +github.io + +// GitLab, Inc. : https://about.gitlab.com/ +// Submitted by Alex Hanselka +gitlab.io + +// Gitplac.si : https://gitplac.si +// Submitted by Aljaž Starc +gitapp.si +gitpage.si + +// Glitch, Inc : https://glitch.com +// Submitted by Mads Hartmann +glitch.me + +// Global NOG Alliance : https://nogalliance.org/ +// Submitted by Sander Steffann +nog.community + +// Globe Hosting SRL : https://www.globehosting.com/ +// Submitted by Gavin Brown +co.ro +shop.ro + +// GMO Pepabo, Inc. : https://pepabo.com/ +// Submitted by Hosting Div +lolipop.io +angry.jp +babyblue.jp +babymilk.jp +backdrop.jp +bambina.jp +bitter.jp +blush.jp +boo.jp +boy.jp +boyfriend.jp +but.jp +candypop.jp +capoo.jp +catfood.jp +cheap.jp +chicappa.jp +chillout.jp +chips.jp +chowder.jp +chu.jp +ciao.jp +cocotte.jp +coolblog.jp +cranky.jp +cutegirl.jp +daa.jp +deca.jp +deci.jp +digick.jp +egoism.jp +fakefur.jp +fem.jp +flier.jp +floppy.jp +fool.jp +frenchkiss.jp +girlfriend.jp +girly.jp +gloomy.jp +gonna.jp +greater.jp +hacca.jp +heavy.jp +her.jp +hiho.jp +hippy.jp +holy.jp +hungry.jp +icurus.jp +itigo.jp +jellybean.jp +kikirara.jp +kill.jp +kilo.jp +kuron.jp +littlestar.jp +lolipopmc.jp +lolitapunk.jp +lomo.jp +lovepop.jp +lovesick.jp +main.jp +mods.jp +mond.jp +mongolian.jp +moo.jp +namaste.jp +nikita.jp +nobushi.jp +noor.jp +oops.jp +parallel.jp +parasite.jp +pecori.jp +peewee.jp +penne.jp +pepper.jp +perma.jp +pigboat.jp +pinoko.jp +punyu.jp +pupu.jp +pussycat.jp +pya.jp +raindrop.jp +readymade.jp +sadist.jp +schoolbus.jp +secret.jp +staba.jp +stripper.jp +sub.jp +sunnyday.jp +thick.jp +tonkotsu.jp +under.jp +upper.jp +velvet.jp +verse.jp +versus.jp +vivian.jp +watson.jp +weblike.jp +whitesnow.jp +zombie.jp +heteml.net + +// GoDaddy Registry : https://registry.godaddy +// Submitted by Rohan Durrant +graphic.design + +// GoIP DNS Services : http://www.goip.de +// Submitted by Christian Poulter +goip.de + +// Google, Inc. +// Submitted by Shannon McCabe +*.hosted.app +*.run.app +*.mtls.run.app +web.app +*.0emm.com +appspot.com +*.r.appspot.com +blogspot.com +codespot.com +googleapis.com +googlecode.com +pagespeedmobilizer.com +withgoogle.com +withyoutube.com +*.gateway.dev +cloud.goog +translate.goog +*.usercontent.goog +cloudfunctions.net + +// Goupile : https://goupile.fr +// Submitted by Niels Martignene +goupile.fr + +// GOV.UK Pay : https://www.payments.service.gov.uk/ +// Submitted by Richard Baker +pymnt.uk + +// GOV.UK Platform as a Service : https://www.cloud.service.gov.uk/ +// Submitted by Tom Whitwell +cloudapps.digital +london.cloudapps.digital + +// Government of the Netherlands : https://www.government.nl +// Submitted by +gov.nl + +// Grafana Labs : https://grafana.com/ +// Submitted by Platform Engineering +grafana-dev.net + +// GrayJay Web Solutions Inc. : https://grayjaysports.ca +// Submitted by Matt Yamkowy +grayjayleagues.com + +// GünstigBestellen : https://günstigbestellen.de +// Submitted by Furkan Akkoc +günstigbestellen.de +günstigliefern.de + +// Häkkinen.fi : https://www.häkkinen.fi/ +// Submitted by Eero Häkkinen +häkkinen.fi + +// Hashbang : https://hashbang.sh +hashbang.sh + +// Hasura : https://hasura.io +// Submitted by Shahidh K Muhammed +hasura.app +hasura-app.io + +// Hatena Co., Ltd. : https://hatena.co.jp +// Submitted by Masato Nakamura +hatenablog.com +hatenadiary.com +hateblo.jp +hatenablog.jp +hatenadiary.jp +hatenadiary.org + +// Heilbronn University of Applied Sciences - Faculty Informatics (GitLab Pages) : https://www.hs-heilbronn.de +// Submitted by Richard Zowalla +pages.it.hs-heilbronn.de +pages-research.it.hs-heilbronn.de + +// HeiyuSpace : https://lazycat.cloud +// Submitted by Xia Bin +heiyu.space + +// Helio Networks : https://heliohost.org +// Submitted by Ben Frede +helioho.st +heliohost.us + +// Hepforge : https://www.hepforge.org +// Submitted by David Grellscheid +hepforge.org + +// Heroku : https://www.heroku.com/ +// Submitted by Shumon Huque +herokuapp.com + +// Heyflow : https://www.heyflow.com +// Submitted by Mirko Nitschke +heyflow.page +heyflow.site + +// Hibernating Rhinos +// Submitted by Oren Eini +ravendb.cloud +ravendb.community +development.run +ravendb.run + +// home.pl S.A. : https://home.pl +// Submitted by Krzysztof Wolski +homesklep.pl + +// Homebase : https://homebase.id/ +// Submitted by Jason Babo +*.kin.one +*.id.pub +*.kin.pub + +// Hoplix : https://www.hoplix.com +// Submitted by Danilo De Franco +hoplix.shop + +// HOSTBIP REGISTRY : https://www.hostbip.com/ +// Submitted by Atanunu Igbunuroghene +orx.biz +biz.gl +biz.ng +co.biz.ng +dl.biz.ng +go.biz.ng +lg.biz.ng +on.biz.ng +col.ng +firm.ng +gen.ng +ltd.ng +ngo.ng +plc.ng + +// HostyHosting : https://hostyhosting.com +hostyhosting.io + +// Hugging Face : https://huggingface.co +// Submitted by Eliott Coyac +hf.space +static.hf.space + +// Hypernode B.V. : https://www.hypernode.com/ +// Submitted by Cipriano Groenendal +hypernode.io + +// I-O DATA DEVICE, INC. : http://www.iodata.com/ +// Submitted by Yuji Minagawa +iobb.net + +// i-registry s.r.o. : http://www.i-registry.cz/ +// Submitted by Martin Semrad +co.cz + +// Ici la Lune : http://www.icilalune.com/ +// Submitted by Simon Morvan +*.moonscale.io +moonscale.net + +// iDOT Services Limited : http://www.domain.gr.com +// Submitted by Gavin Brown +gr.com + +// iki.fi +// Submitted by Hannu Aronsson +iki.fi + +// iliad italia : https://www.iliad.it +// Submitted by Marios Makassikis +ibxos.it +iliadboxos.it + +// Incsub, LLC : https://incsub.com/ +// Submitted by Aaron Edwards +smushcdn.com +wphostedmail.com +wpmucdn.com +tempurl.host +wpmudev.host + +// Individual Network Berlin e.V. : https://www.in-berlin.de/ +// Submitted by Christian Seitz +dyn-berlin.de +in-berlin.de +in-brb.de +in-butter.de +in-dsl.de +in-vpn.de +in-dsl.net +in-vpn.net +in-dsl.org +in-vpn.org + +// Inferno Communications : https://inferno.co.uk +// Submitted by Connor McFarlane +oninferno.net + +// info.at : http://www.info.at/ +biz.at +info.at + +// info.cx : http://info.cx +// Submitted by June Slater +info.cx + +// Interlegis : http://www.interlegis.leg.br +// Submitted by Gabriel Ferreira +ac.leg.br +al.leg.br +am.leg.br +ap.leg.br +ba.leg.br +ce.leg.br +df.leg.br +es.leg.br +go.leg.br +ma.leg.br +mg.leg.br +ms.leg.br +mt.leg.br +pa.leg.br +pb.leg.br +pe.leg.br +pi.leg.br +pr.leg.br +rj.leg.br +rn.leg.br +ro.leg.br +rr.leg.br +rs.leg.br +sc.leg.br +se.leg.br +sp.leg.br +to.leg.br + +// intermetrics GmbH : https://pixolino.com/ +// Submitted by Wolfgang Schwarz +pixolino.com + +// Internet-Pro, LLP : https://netangels.ru/ +// Submitted by Vasiliy Sheredeko +na4u.ru + +// Inventor Services : https://inventor.gg/ +// Submitted by Inventor Team +botdash.app +botdash.dev +botdash.gg +botdash.net +botda.sh +botdash.xyz + +// IONOS SE : https://www.ionos.com/ +// IONOS Group SE : https://www.ionos-group.com/ +// Submitted by Henrik Willert +apps-1and1.com +live-website.com +webspace-host.com +apps-1and1.net +websitebuilder.online +app-ionos.space + +// iopsys software solutions AB : https://iopsys.eu/ +// Submitted by Roman Azarenko +iopsys.se + +// IPFS Project : https://ipfs.tech/ +// Submitted by Interplanetary Shipyard +*.inbrowser.dev +*.dweb.link +*.inbrowser.link + +// IPiFony Systems, Inc. : https://www.ipifony.com/ +// Submitted by Matthew Hardeman +ipifony.net + +// ir.md : https://nic.ir.md +// Submitted by Ali Soizi +ir.md + +// is-a-good.dev : https://is-a-good.dev +// Submitted by William Harrison +is-a-good.dev + +// is-a.dev : https://is-a.dev +// Submitted by William Harrison +is-a.dev + +// IServ GmbH : https://iserv.de +// Submitted by Kim Brodowski +iservschule.de +mein-iserv.de +schuldock.de +schulplattform.de +schulserver.de +test-iserv.de +iserv.dev +iserv.host + +// Jelastic, Inc. : https://jelastic.com/ +// Submitted by Ihor Kolodyuk +mel.cloudlets.com.au +cloud.interhostsolutions.be +alp1.ae.flow.ch +appengine.flow.ch +es-1.axarnet.cloud +diadem.cloud +vip.jelastic.cloud +jele.cloud +it1.eur.aruba.jenv-aruba.cloud +it1.jenv-aruba.cloud +keliweb.cloud +cs.keliweb.cloud +oxa.cloud +tn.oxa.cloud +uk.oxa.cloud +primetel.cloud +uk.primetel.cloud +ca.reclaim.cloud +uk.reclaim.cloud +us.reclaim.cloud +ch.trendhosting.cloud +de.trendhosting.cloud +jele.club +dopaas.com +paas.hosted-by-previder.com +rag-cloud.hosteur.com +rag-cloud-ch.hosteur.com +jcloud.ik-server.com +jcloud-ver-jpc.ik-server.com +demo.jelastic.com +paas.massivegrid.com +jed.wafaicloud.com +ryd.wafaicloud.com +j.scaleforce.com.cy +jelastic.dogado.eu +fi.cloudplatform.fi +demo.datacenter.fi +paas.datacenter.fi +jele.host +mircloud.host +paas.beebyte.io +sekd1.beebyteapp.io +jele.io +jc.neen.it +jcloud.kz +cloudjiffy.net +fra1-de.cloudjiffy.net +west1-us.cloudjiffy.net +jls-sto1.elastx.net +jls-sto2.elastx.net +jls-sto3.elastx.net +fr-1.paas.massivegrid.net +lon-1.paas.massivegrid.net +lon-2.paas.massivegrid.net +ny-1.paas.massivegrid.net +ny-2.paas.massivegrid.net +sg-1.paas.massivegrid.net +jelastic.saveincloud.net +nordeste-idc.saveincloud.net +j.scaleforce.net +sdscloud.pl +unicloud.pl +mircloud.ru +enscaled.sg +jele.site +jelastic.team +orangecloud.tn +j.layershift.co.uk +phx.enscaled.us +mircloud.us + +// Jino : https://www.jino.ru +// Submitted by Sergey Ulyashin +myjino.ru +*.hosting.myjino.ru +*.landing.myjino.ru +*.spectrum.myjino.ru +*.vps.myjino.ru + +// Jotelulu S.L. : https://jotelulu.com +// Submitted by Daniel Fariña +jotelulu.cloud + +// JouwWeb B.V. : https://www.jouwweb.nl +// Submitted by Camilo Sperberg +webadorsite.com +jouwweb.site + +// Joyent : https://www.joyent.com/ +// Submitted by Brian Bennett +*.cns.joyent.com +*.triton.zone + +// JS.ORG : http://dns.js.org +// Submitted by Stefan Keim +js.org + +// KaasHosting : http://www.kaashosting.nl/ +// Submitted by Wouter Bakker +kaas.gg +khplay.nl + +// Kapsi : https://kapsi.fi +// Submitted by Tomi Juntunen +kapsi.fi + +// Katholieke Universiteit Leuven : https://www.kuleuven.be +// Submitted by Abuse KU Leuven +ezproxy.kuleuven.be +kuleuven.cloud + +// Keyweb AG : https://www.keyweb.de +// Submitted by Martin Dannehl +keymachine.de + +// KingHost : https://king.host +// Submitted by Felipe Keller Braz +kinghost.net +uni5.net + +// KnightPoint Systems, LLC : http://www.knightpoint.com/ +// Submitted by Roy Keene +knightpoint.systems + +// KoobinEvent, SL : https://www.koobin.com +// Submitted by Iván Oliva +koobin.events + +// Krellian Ltd. : https://krellian.com +// Submitted by Ben Francis +webthings.io +krellian.net + +// KUROKU LTD : https://kuroku.ltd/ +// Submitted by DisposaBoy +oya.to + +// Laravel Holdings, Inc. : https://laravel.com +// Submitted by André Valentin +laravel.cloud + +// LCube - Professional hosting e.K. : https://www.lcube-webhosting.de +// Submitted by Lars Laehn +git-repos.de +lcube-server.de +svn-repos.de + +// Leadpages : https://www.leadpages.net +// Submitted by Greg Dallavalle +leadpages.co +lpages.co +lpusercontent.com + +// Liara : https://liara.ir +// Submitted by Amirhossein Badinloo +liara.run +iran.liara.run + +// libp2p project : https://libp2p.io +// Submitted by Interplanetary Shipyard +libp2p.direct + +// Libre IT Ltd : https://libre.nz +// Submitted by Tomas Maggio +runcontainers.dev + +// Lifetime Hosting : https://Lifetime.Hosting/ +// Submitted by Mike Fillator +co.business +co.education +co.events +co.financial +co.network +co.place +co.technology + +// linkyard ldt : https://www.linkyard.ch/ +// Submitted by Mario Siegenthaler +linkyard-cloud.ch +linkyard.cloud + +// Linode : https://linode.com +// Submitted by +members.linode.com +*.nodebalancer.linode.com +*.linodeobjects.com +ip.linodeusercontent.com + +// LiquidNet Ltd : http://www.liquidnetlimited.com/ +// Submitted by Victor Velchev +we.bs + +// Listen53 : https://www.l53.net +// Submitted by Gerry Keh +filegear-sg.me +ggff.net + +// Localcert : https://localcert.dev +// Submitted by Lann Martin +*.user.localcert.dev + +// LocalCert : https://localcert.net +// Submitted by William Harrison +localcert.net + +// Localtonet : https://localtonet.com/ +// Submitted by Burak Isleyici +localtonet.com +*.localto.net + +// Lodz University of Technology LODMAN regional domains : https://www.man.lodz.pl/dns +// Submitted by Piotr Wilk +lodz.pl +pabianice.pl +plock.pl +sieradz.pl +skierniewice.pl +zgierz.pl + +// Log'in Line : https://www.loginline.com/ +// Submitted by Rémi Mach +loginline.app +loginline.dev +loginline.io +loginline.services +loginline.site + +// Lõhmus Family, The : https://lohmus.me/ +// Submitted by Heiki Lõhmus +lohmus.me + +// Lokalized : https://lokalized.nl +// Submitted by Noah Taheij +servers.run + +// Lovable : https://lovable.dev +// Submitted by Fabian Hedin +lovable.app +lovableproject.com + +// LubMAN UMCS Sp. z o.o : https://lubman.pl/ +// Submitted by Ireneusz Maliszewski +krasnik.pl +leczna.pl +lubartow.pl +lublin.pl +poniatowa.pl +swidnik.pl + +// Lug.org.uk : https://lug.org.uk +// Submitted by Jon Spriggs +glug.org.uk +lug.org.uk +lugs.org.uk + +// Lukanet Ltd : https://lukanet.com +// Submitted by Anton Avramov +barsy.bg +barsy.club +barsycenter.com +barsyonline.com +barsy.de +barsy.dev +barsy.eu +barsy.gr +barsy.in +barsy.info +barsy.io +barsy.me +barsy.menu +barsyonline.menu +barsy.mobi +barsy.net +barsy.online +barsy.org +barsy.pro +barsy.pub +barsy.ro +barsy.rs +barsy.shop +barsyonline.shop +barsy.site +barsy.store +barsy.support +barsy.uk +barsy.co.uk +barsyonline.co.uk + +// Lutra : https://lutra.ai +// Submitted by Joshua Newman +*.lutrausercontent.com + +// Luyani Inc. : https://luyani.com/ +// Submitted by Umut Gumeli +luyani.app +luyani.net + +// Magento Commerce +// Submitted by Damien Tournoud +*.magentosite.cloud + +// Mail.Ru Group : https://hb.cldmail.ru +// Submitted by Ilya Zaretskiy +hb.cldmail.ru + +// MathWorks : https://www.mathworks.com/ +// Submitted by Emily Reed +matlab.cloud +modelscape.com +mwcloudnonprod.com +polyspace.com + +// May First - People Link : https://mayfirst.org/ +// Submitted by Jamie McClelland +mayfirst.info +mayfirst.org + +// Maze Play : https://www.mazeplay.com +// Submitted by Adam Humpherys +mazeplay.com + +// McHost : https://mchost.ru +// Submitted by Evgeniy Subbotin +mcdir.me +mcdir.ru +vps.mcdir.ru +mcpre.ru + +// Mediatech : https://mediatech.by +// Submitted by Evgeniy Kozhuhovskiy +mediatech.by +mediatech.dev + +// Medicom Health : https://medicomhealth.com +// Submitted by Michael Olson +hra.health + +// MedusaJS, Inc : https://medusajs.com/ +// Submitted by Stevche Radevski +medusajs.app + +// Memset hosting : https://www.memset.com +// Submitted by Tom Whitwell +miniserver.com +memset.net + +// Messerli Informatik AG : https://www.messerli.ch/ +// Submitted by Ruben Schmidmeister +messerli.app + +// Meta Platforms, Inc. : https://meta.com/ +// Submitted by Jacob Cordero +atmeta.com +apps.fbsbx.com + +// MetaCentrum, CESNET z.s.p.o. : https://www.metacentrum.cz/en/ +// Submitted by Zdeněk Šustr and Radim Janča +*.cloud.metacentrum.cz +custom.metacentrum.cz +flt.cloud.muni.cz +usr.cloud.muni.cz + +// Meteor Development Group : https://www.meteor.com/hosting +// Submitted by Pierre Carrier +meteorapp.com +eu.meteorapp.com + +// Michau Enterprises Limited : http://www.co.pl/ +co.pl + +// Microsoft Corporation : http://microsoft.com +// Submitted by Public Suffix List Admin +// Managed by Corporate Domains +// Microsoft Azure : https://home.azure +*.azurecontainer.io +azure-api.net +azure-mobile.net +azureedge.net +azurefd.net +azurestaticapps.net +1.azurestaticapps.net +2.azurestaticapps.net +3.azurestaticapps.net +4.azurestaticapps.net +5.azurestaticapps.net +6.azurestaticapps.net +7.azurestaticapps.net +centralus.azurestaticapps.net +eastasia.azurestaticapps.net +eastus2.azurestaticapps.net +westeurope.azurestaticapps.net +westus2.azurestaticapps.net +azurewebsites.net +cloudapp.net +trafficmanager.net +blob.core.windows.net +servicebus.windows.net + +// MikroTik : https://mikrotik.com +// Submitted by MikroTik SysAdmin Team +routingthecloud.com +sn.mynetname.net +routingthecloud.net +routingthecloud.org + +// minion.systems : http://minion.systems +// Submitted by Robert Böttinger +csx.cc + +// Mittwald CM Service GmbH & Co. KG : https://mittwald.de +// Submitted by Marco Rieger +mydbserver.com +webspaceconfig.de +mittwald.info +mittwaldserver.info +typo3server.info +project.space + +// MODX Systems LLC : https://modx.com +// Submitted by Elizabeth Southwell +modx.dev + +// Mozilla Foundation : https://mozilla.org/ +// Submitted by glob +bmoattachments.org + +// MSK-IX : https://www.msk-ix.ru/ +// Submitted by Khannanov Roman +net.ru +org.ru +pp.ru + +// Mythic Beasts : https://www.mythic-beasts.com +// Submitted by Paul Cammish +hostedpi.com +caracal.mythic-beasts.com +customer.mythic-beasts.com +fentiger.mythic-beasts.com +lynx.mythic-beasts.com +ocelot.mythic-beasts.com +oncilla.mythic-beasts.com +onza.mythic-beasts.com +sphinx.mythic-beasts.com +vs.mythic-beasts.com +x.mythic-beasts.com +yali.mythic-beasts.com +cust.retrosnub.co.uk + +// Nabu Casa : https://www.nabucasa.com +// Submitted by Paulus Schoutsen +ui.nabu.casa + +// Net at Work Gmbh : https://www.netatwork.de +// Submitted by Jan Jaeschke +cloud.nospamproxy.com +o365.cloud.nospamproxy.com + +// Net libre : https://www.netlib.re +// Submitted by Philippe PITTOLI +netlib.re + +// Netfy Domains : https://netfy.domains +// Submitted by Suranga Ranasinghe +netfy.app + +// Netlify : https://www.netlify.com +// Submitted by Jessica Parsons +netlify.app + +// Neustar Inc. +// Submitted by Trung Tran +4u.com + +// NFSN, Inc. : https://www.NearlyFreeSpeech.NET/ +// Submitted by Jeff Wheelhouse +nfshost.com + +// NFT.Storage : https://nft.storage/ +// Submitted by Vasco Santos or +ipfs.nftstorage.link + +// NGO.US Registry : https://nic.ngo.us +// Submitted by Alstra Solutions Ltd. Networking Team +ngo.us + +// ngrok : https://ngrok.com/ +// Submitted by Alan Shreve +ngrok.app +ngrok-free.app +ngrok.dev +ngrok-free.dev +ngrok.io +ap.ngrok.io +au.ngrok.io +eu.ngrok.io +in.ngrok.io +jp.ngrok.io +sa.ngrok.io +us.ngrok.io +ngrok.pizza +ngrok.pro + +// Nicolaus Copernicus University in Torun - MSK TORMAN : https://www.man.torun.pl +torun.pl + +// Nimbus Hosting Ltd. : https://www.nimbushosting.co.uk/ +// Submitted by Nicholas Ford +nh-serv.co.uk +nimsite.uk + +// No-IP.com : https://noip.com/ +// Submitted by Deven Reza +mmafan.biz +myftp.biz +no-ip.biz +no-ip.ca +fantasyleague.cc +gotdns.ch +3utilities.com +blogsyte.com +ciscofreak.com +damnserver.com +ddnsking.com +ditchyourip.com +dnsiskinky.com +dynns.com +geekgalaxy.com +health-carereform.com +homesecuritymac.com +homesecuritypc.com +myactivedirectory.com +mysecuritycamera.com +myvnc.com +net-freaks.com +onthewifi.com +point2this.com +quicksytes.com +securitytactics.com +servebeer.com +servecounterstrike.com +serveexchange.com +serveftp.com +servegame.com +servehalflife.com +servehttp.com +servehumour.com +serveirc.com +servemp3.com +servep2p.com +servepics.com +servequake.com +servesarcasm.com +stufftoread.com +unusualperson.com +workisboring.com +dvrcam.info +ilovecollege.info +no-ip.info +brasilia.me +ddns.me +dnsfor.me +hopto.me +loginto.me +noip.me +webhop.me +bounceme.net +ddns.net +eating-organic.net +mydissent.net +myeffect.net +mymediapc.net +mypsx.net +mysecuritycamera.net +nhlfan.net +no-ip.net +pgafan.net +privatizehealthinsurance.net +redirectme.net +serveblog.net +serveminecraft.net +sytes.net +cable-modem.org +collegefan.org +couchpotatofries.org +hopto.org +mlbfan.org +myftp.org +mysecuritycamera.org +nflfan.org +no-ip.org +read-books.org +ufcfan.org +zapto.org +no-ip.co.uk +golffan.us +noip.us +pointto.us + +// NodeArt : https://nodeart.io +// Submitted by Konstantin Nosov +stage.nodeart.io + +// Noop : https://noop.app +// Submitted by Nathaniel Schweinberg +*.developer.app +noop.app + +// Northflank Ltd. : https://northflank.com/ +// Submitted by Marco Suter +*.northflank.app +*.build.run +*.code.run +*.database.run +*.migration.run + +// Noticeable : https://noticeable.io +// Submitted by Laurent Pellegrino +noticeable.news + +// Notion Labs, Inc : https://www.notion.so/ +// Submitted by Jess Yao +notion.site + +// Now-DNS : https://now-dns.com +// Submitted by Steve Russell +dnsking.ch +mypi.co +myiphost.com +forumz.info +soundcast.me +tcp4.me +dnsup.net +hicam.net +now-dns.net +ownip.net +vpndns.net +dynserv.org +now-dns.org +x443.pw +ntdll.top +freeddns.us + +// nsupdate.info : https://www.nsupdate.info/ +// Submitted by Thomas Waldmann +nsupdate.info +nerdpol.ovh + +// NYC.mn : https://dot.nyc.mn/ +// Submitted by NYC.mn Subdomain Service +nyc.mn + +// O3O.Foundation : https://o3o.foundation/ +// Submitted by the prvcy.page Registry Team +prvcy.page + +// Obl.ong : https://obl.ong +// Submitted by Reese Armstrong +obl.ong + +// Observable, Inc. : https://observablehq.com +// Submitted by Mike Bostock +observablehq.cloud +static.observableusercontent.com + +// OMG.LOL : https://omg.lol +// Submitted by Adam Newbold +omg.lol + +// Omnibond Systems, LLC. : https://www.omnibond.com +// Submitted by Cole Estep +cloudycluster.net + +// OmniWe Limited : https://omniwe.com +// Submitted by Vicary Archangel +omniwe.site + +// One.com : https://www.one.com/ +// Submitted by Jacob Bunk Nielsen +123webseite.at +123website.be +simplesite.com.br +123website.ch +simplesite.com +123webseite.de +123hjemmeside.dk +123miweb.es +123kotisivu.fi +123siteweb.fr +simplesite.gr +123homepage.it +123website.lu +123website.nl +123hjemmeside.no +service.one +simplesite.pl +123paginaweb.pt +123minsida.se + +// ONID : https://get.onid.ca +// Submitted by ONID Engineering Team +onid.ca + +// Open Domains : https://open-domains.net +// Submitted by William Harrison +is-a-fullstack.dev +is-cool.dev +is-not-a.dev +localplayer.dev +is-local.org + +// Open Social : https://www.getopensocial.com/ +// Submitted by Alexander Varwijk +opensocial.site + +// OpenAI : https://openai.com +// Submitted by Thomas Shadwell +*.oaiusercontent.com + +// OpenCraft GmbH : http://opencraft.com/ +// Submitted by Sven Marnach +opencraft.hosting + +// OpenHost : https://registry.openhost.uk +// Submitted by OpenHost Registry Team +16-b.it +32-b.it +64-b.it + +// OpenResearch GmbH : https://openresearch.com/ +// Submitted by Philipp Schmid +orsites.com + +// Opera Software, A.S.A. +// Submitted by Yngve Pettersen +operaunite.com + +// Oracle Dyn : https://cloud.oracle.com/home https://dyn.com/dns/ +// Submitted by Gregory Drake +// Note: This is intended to also include customer-oci.com due to wildcards implicitly including the current label +*.customer-oci.com +*.oci.customer-oci.com +*.ocp.customer-oci.com +*.ocs.customer-oci.com +*.oraclecloudapps.com +*.oraclegovcloudapps.com +*.oraclegovcloudapps.uk + +// Orange : https://www.orange.com +// Submitted by Alexandre Linte +tech.orange + +// OsSav Technology Ltd. : https://ossav.com/ +// Submitted by OsSav Technology Ltd. +// https://nic.can.re +can.re + +// Oursky Limited : https://authgear.com/ +// Submitted by Authgear Team & Skygear Developer +authgear-staging.com +authgearapps.com +skygearapp.com + +// OutSystems +// Submitted by Duarte Santos +outsystemscloud.com + +// OVHcloud : https://ovhcloud.com +// Submitted by Vincent Cassé +*.hosting.ovh.net +*.webpaas.ovh.net + +// OwnProvider GmbH : http://www.ownprovider.com +// Submitted by Jan Moennich +ownprovider.com +own.pm + +// OwO : https://whats-th.is/ +// Submitted by Dean Sheather +*.owo.codes + +// OX : http://www.ox.rs +// Submitted by Adam Grand +ox.rs + +// oy.lc +// Submitted by Charly Coste +oy.lc + +// Pagefog : https://pagefog.com/ +// Submitted by Derek Myers +pgfog.com + +// PageXL : https://pagexl.com +// Submitted by Yann Guichard +pagexl.com + +// Pantheon Systems, Inc. : https://pantheon.io/ +// Submitted by Gary Dylina +gotpantheon.com +pantheonsite.io + +// Paywhirl, Inc : https://paywhirl.com/ +// Submitted by Daniel Netzer +*.paywhirl.com + +// pcarrier.ca Software Inc : https://pcarrier.ca/ +// Submitted by Pierre Carrier +*.xmit.co +xmit.dev +madethis.site +srv.us +gh.srv.us +gl.srv.us + +// PE Ulyanov Kirill Sergeevich : https://airy.host +// Submitted by Kirill Ulyanov +lk3.ru + +// Peplink | Pepwave : http://peplink.com/ +// Submitted by Steve Leung +mypep.link + +// Perspecta : https://perspecta.com/ +// Submitted by Kenneth Van Alstyne +perspecta.cloud + +// Planet-Work : https://www.planet-work.com/ +// Submitted by Frédéric VANNIÈRE +on-web.fr + +// Platform.sh : https://platform.sh +// Submitted by Nikola Kotur +*.upsun.app +upsunapp.com +ent.platform.sh +eu.platform.sh +us.platform.sh +*.platformsh.site +*.tst.site + +// Platter : https://platter.dev +// Submitted by Patrick Flor +platter-app.dev +platterp.us + +// Pley AB : https://www.pley.com/ +// Submitted by Henning Pohl +pley.games + +// Porter : https://porter.run/ +// Submitted by Rudraksh MK +onporter.run + +// Positive Codes Technology Company : http://co.bn/faq.html +// Submitted by Zulfais +co.bn + +// Postman, Inc : https://postman.com +// Submitted by Rahul Dhawan +postman-echo.com +pstmn.io +mock.pstmn.io +httpbin.org + +// prequalifyme.today : https://prequalifyme.today +// Submitted by DeepakTiwari deepak@ivylead.io +prequalifyme.today + +// prgmr.com : https://prgmr.com/ +// Submitted by Sarah Newman +xen.prgmr.com + +// priv.at : http://www.nic.priv.at/ +// Submitted by registry +priv.at + +// PROJECT ELIV : https://eliv.kr/ +// Submitted by PROJECT ELIV Domain Team +c01.kr +eliv-cdn.kr +eliv-dns.kr +mmv.kr +vki.kr + +// project-study : https://project-study.com +// Submitted by yumenewa +dev.project-study.com + +// Protonet GmbH : http://protonet.io +// Submitted by Martin Meier +protonet.io + +// Publication Presse Communication SARL : https://ppcom.fr +// Submitted by Yaacov Akiba Slama +chirurgiens-dentistes-en-france.fr +byen.site + +// pubtls.org : https://www.pubtls.org +// Submitted by Kor Nielsen +pubtls.org + +// PythonAnywhere LLP : https://www.pythonanywhere.com +// Submitted by Giles Thomas +pythonanywhere.com +eu.pythonanywhere.com + +// QA2 +// Submitted by Daniel Dent : https://www.danieldent.com/ +qa2.com + +// QCX +// Submitted by Cassandra Beelen +qcx.io +*.sys.qcx.io + +// QNAP System Inc : https://www.qnap.com +// Submitted by Nick Chang +myqnapcloud.cn +alpha-myqnapcloud.com +dev-myqnapcloud.com +mycloudnas.com +mynascloud.com +myqnapcloud.com + +// QOTO, Org. +// Submitted by Jeffrey Phillips Freeman +qoto.io + +// Qualifio : https://qualifio.com/ +// Submitted by Xavier De Cock +qualifioapp.com + +// Quality Unit : https://qualityunit.com +// Submitted by Vasyl Tsalko +ladesk.com + +// QuickBackend : https://www.quickbackend.com +// Submitted by Dani Biro +qbuser.com + +// Quip : https://quip.com +// Submitted by Patrick Linehan +*.quipelements.com + +// Qutheory LLC : http://qutheory.io +// Submitted by Jonas Schwartz +vapor.cloud +vaporcloud.io + +// Rackmaze LLC : https://www.rackmaze.com +// Submitted by Kirill Pertsev +rackmaze.com +rackmaze.net + +// Rad Web Hosting : https://radwebhosting.com +// Submitted by Scott Claeys +cloudsite.builders +myradweb.net +servername.us + +// Radix FZC : http://domains.in.net +// Submitted by Gavin Brown +web.in +in.net + +// Raidboxes GmbH : https://raidboxes.de +// Submitted by Auke Tembrink +myrdbx.io +site.rb-hosting.io + +// Rancher Labs, Inc : https://rancher.com +// Submitted by Vincent Fiduccia +*.on-rancher.cloud +*.on-k3s.io +*.on-rio.io + +// RavPage : https://www.ravpage.co.il +// Submitted by Roni Horowitz +ravpage.co.il + +// Read The Docs, Inc : https://www.readthedocs.org +// Submitted by David Fischer +readthedocs-hosted.com +readthedocs.io + +// Red Hat, Inc. OpenShift : https://openshift.redhat.com/ +// Submitted by Tim Kramer +rhcloud.com + +// Redgate Software : https://red-gate.com +// Submitted by Andrew Farries +instances.spawn.cc + +// Render : https://render.com +// Submitted by Anurag Goel +onrender.com +app.render.com + +// Repl.it : https://repl.it +// Submitted by Lincoln Bergeson +replit.app +id.replit.app +firewalledreplit.co +id.firewalledreplit.co +repl.co +id.repl.co +replit.dev +archer.replit.dev +bones.replit.dev +canary.replit.dev +global.replit.dev +hacker.replit.dev +id.replit.dev +janeway.replit.dev +kim.replit.dev +kira.replit.dev +kirk.replit.dev +odo.replit.dev +paris.replit.dev +picard.replit.dev +pike.replit.dev +prerelease.replit.dev +reed.replit.dev +riker.replit.dev +sisko.replit.dev +spock.replit.dev +staging.replit.dev +sulu.replit.dev +tarpit.replit.dev +teams.replit.dev +tucker.replit.dev +wesley.replit.dev +worf.replit.dev +repl.run + +// Resin.io : https://resin.io +// Submitted by Tim Perry +resindevice.io +devices.resinstaging.io + +// RethinkDB : https://www.rethinkdb.com/ +// Submitted by Chris Kastorff +hzc.io + +// Rico Developments Limited : https://adimo.co +// Submitted by Colin Brown +adimo.co.uk + +// Riseup Networks : https://riseup.net +// Submitted by Micah Anderson +itcouldbewor.se + +// Roar Domains LLC : https://roar.basketball/ +// Submitted by Gavin Brown +aus.basketball +nz.basketball + +// ROBOT PAYMENT INC. : https://www.robotpayment.co.jp/ +// Submitted by Kentaro Takamori +subsc-pay.com +subsc-pay.net + +// Rochester Institute of Technology : http://www.rit.edu/ +// Submitted by Jennifer Herting +git-pages.rit.edu + +// Rocky Enterprise Software Foundation : https://resf.org +// Submitted by Neil Hanlon +rocky.page + +// Ruhr University Bochum : https://www.ruhr-uni-bochum.de/ +// Submitted by Andreas Jobs +rub.de +ruhr-uni-bochum.de +io.noc.ruhr-uni-bochum.de + +// Rusnames Limited : http://rusnames.ru/ +// Submitted by Sergey Zotov +биз.рус +ком.рус +крым.рус +мир.рус +мск.рус +орг.рус +самара.рус +сочи.рус +спб.рус +я.рус + +// Russian Academy of Sciences +// Submitted by Tech Support +ras.ru + +// Sakura Frp : https://www.natfrp.com +// Submitted by Bobo Liu +nyat.app + +// SAKURA Internet Inc. : https://www.sakura.ad.jp/ +// Submitted by Internet Service Department +180r.com +dojin.com +sakuratan.com +sakuraweb.com +x0.com +2-d.jp +bona.jp +crap.jp +daynight.jp +eek.jp +flop.jp +halfmoon.jp +jeez.jp +matrix.jp +mimoza.jp +ivory.ne.jp +mail-box.ne.jp +mints.ne.jp +mokuren.ne.jp +opal.ne.jp +sakura.ne.jp +sumomo.ne.jp +topaz.ne.jp +netgamers.jp +nyanta.jp +o0o0.jp +rdy.jp +rgr.jp +rulez.jp +s3.isk01.sakurastorage.jp +s3.isk02.sakurastorage.jp +saloon.jp +sblo.jp +skr.jp +tank.jp +uh-oh.jp +undo.jp +rs.webaccel.jp +user.webaccel.jp +websozai.jp +xii.jp +squares.net +jpn.org +kirara.st +x0.to +from.tv +sakura.tv + +// Salesforce.com, Inc. : https://salesforce.com/ +// Submitted by Salesforce Public Suffix List Team +*.builder.code.com +*.dev-builder.code.com +*.stg-builder.code.com +*.001.test.code-builder-stg.platform.salesforce.com +*.d.crm.dev +*.w.crm.dev +*.wa.crm.dev +*.wb.crm.dev +*.wc.crm.dev +*.wd.crm.dev +*.we.crm.dev +*.wf.crm.dev + +// Sandstorm Development Group, Inc. : https://sandcats.io/ +// Submitted by Asheesh Laroia +sandcats.io + +// SBE network solutions GmbH : https://www.sbe.de/ +// Submitted by Norman Meilick +logoip.com +logoip.de + +// Scaleway : https://www.scaleway.com/ +// Submitted by Scaleway PSL Maintainer +fr-par-1.baremetal.scw.cloud +fr-par-2.baremetal.scw.cloud +nl-ams-1.baremetal.scw.cloud +cockpit.fr-par.scw.cloud +ddl.fr-par.scw.cloud +dtwh.fr-par.scw.cloud +fnc.fr-par.scw.cloud +functions.fnc.fr-par.scw.cloud +ifr.fr-par.scw.cloud +k8s.fr-par.scw.cloud +nodes.k8s.fr-par.scw.cloud +kafk.fr-par.scw.cloud +mgdb.fr-par.scw.cloud +rdb.fr-par.scw.cloud +s3.fr-par.scw.cloud +s3-website.fr-par.scw.cloud +scbl.fr-par.scw.cloud +whm.fr-par.scw.cloud +priv.instances.scw.cloud +pub.instances.scw.cloud +k8s.scw.cloud +cockpit.nl-ams.scw.cloud +ddl.nl-ams.scw.cloud +dtwh.nl-ams.scw.cloud +ifr.nl-ams.scw.cloud +k8s.nl-ams.scw.cloud +nodes.k8s.nl-ams.scw.cloud +kafk.nl-ams.scw.cloud +mgdb.nl-ams.scw.cloud +rdb.nl-ams.scw.cloud +s3.nl-ams.scw.cloud +s3-website.nl-ams.scw.cloud +scbl.nl-ams.scw.cloud +whm.nl-ams.scw.cloud +cockpit.pl-waw.scw.cloud +ddl.pl-waw.scw.cloud +dtwh.pl-waw.scw.cloud +ifr.pl-waw.scw.cloud +k8s.pl-waw.scw.cloud +nodes.k8s.pl-waw.scw.cloud +kafk.pl-waw.scw.cloud +mgdb.pl-waw.scw.cloud +rdb.pl-waw.scw.cloud +s3.pl-waw.scw.cloud +s3-website.pl-waw.scw.cloud +scbl.pl-waw.scw.cloud +scalebook.scw.cloud +smartlabeling.scw.cloud +dedibox.fr + +// schokokeks.org GbR : https://schokokeks.org/ +// Submitted by Hanno Böck +schokokeks.net + +// Scottish Government : https://www.gov.scot +// Submitted by Martin Ellis +gov.scot +service.gov.scot + +// Scry Security : http://www.scrysec.com +// Submitted by Shante Adam +scrysec.com + +// Scrypted : https://scrypted.app +// Submitted by Koushik Dutta +client.scrypted.io + +// Securepoint GmbH : https://www.securepoint.de +// Submitted by Erik Anders +firewall-gateway.com +firewall-gateway.de +my-gateway.de +my-router.de +spdns.de +spdns.eu +firewall-gateway.net +my-firewall.org +myfirewall.org +spdns.org + +// Seidat : https://www.seidat.com +// Submitted by Artem Kondratev +seidat.net + +// Sellfy : https://sellfy.com +// Submitted by Yuriy Romadin +sellfy.store + +// Sendmsg : https://www.sendmsg.co.il +// Submitted by Assaf Stern +minisite.ms + +// Senseering GmbH : https://www.senseering.de +// Submitted by Felix Mönckemeyer +senseering.net + +// Servebolt AS : https://servebolt.com +// Submitted by Daniel Kjeserud +servebolt.cloud + +// Service Online LLC : http://drs.ua/ +// Submitted by Serhii Bulakh +biz.ua +co.ua +pp.ua + +// Shanghai Accounting Society : https://www.sasf.org.cn +// Submitted by Information Administration +as.sh.cn + +// Sheezy.Art : https://sheezy.art +// Submitted by Nyoom +sheezy.games + +// Shopblocks : http://www.shopblocks.com/ +// Submitted by Alex Bowers +myshopblocks.com + +// Shopify : https://www.shopify.com +// Submitted by Alex Richter +myshopify.com + +// Shopit : https://www.shopitcommerce.com/ +// Submitted by Craig McMahon +shopitsite.com + +// shopware AG : https://shopware.com +// Submitted by Jens Küper +shopware.shop +shopware.store + +// Siemens Mobility GmbH +// Submitted by Oliver Graebner +mo-siemens.io + +// SinaAppEngine : http://sae.sina.com.cn/ +// Submitted by SinaAppEngine +1kapp.com +appchizi.com +applinzi.com +sinaapp.com +vipsinaapp.com + +// Siteleaf : https://www.siteleaf.com/ +// Submitted by Skylar Challand +siteleaf.net + +// Small Technology Foundation : https://small-tech.org +// Submitted by Aral Balkan +small-web.org + +// Smallregistry by Promopixel SARL : https://www.smallregistry.net +// Former AFNIC's SLDs +// Submitted by Jérôme Lipowicz +aeroport.fr +avocat.fr +chambagri.fr +chirurgiens-dentistes.fr +experts-comptables.fr +medecin.fr +notaires.fr +pharmacien.fr +port.fr +veterinaire.fr + +// Smoove.io : https://www.smoove.io/ +// Submitted by Dan Kozak +vp4.me + +// Snowflake Inc : https://www.snowflake.com/ +// Submitted by Sam Haar +*.snowflake.app +*.privatelink.snowflake.app +streamlit.app +streamlitapp.com + +// Snowplow Analytics : https://snowplowanalytics.com/ +// Submitted by Ian Streeter +try-snowplow.com + +// Software Consulting Michal Zalewski : https://www.mafelo.com +// Submitted by Michal Zalewski +mafelo.net + +// Sony Interactive Entertainment LLC : https://sie.com/ +// Submitted by David Coles +playstation-cloud.com + +// SourceHut : https://sourcehut.org +// Submitted by Drew DeVault +srht.site + +// SourceLair PC : https://www.sourcelair.com +// Submitted by Antonis Kalipetis +apps.lair.io +*.stolos.io + +// sourceWAY GmbH : https://sourceway.de +// Submitted by Richard Reiber +4.at +my.at +my.de +*.nxa.eu +nx.gw + +// SpeedPartner GmbH : https://www.speedpartner.de/ +// Submitted by Stefan Neufeind +customer.speedpartner.de + +// Spreadshop (sprd.net AG) : https://www.spreadshop.com/ +// Submitted by Martin Breest +myspreadshop.at +myspreadshop.com.au +myspreadshop.be +myspreadshop.ca +myspreadshop.ch +myspreadshop.com +myspreadshop.de +myspreadshop.dk +myspreadshop.es +myspreadshop.fi +myspreadshop.fr +myspreadshop.ie +myspreadshop.it +myspreadshop.net +myspreadshop.nl +myspreadshop.no +myspreadshop.pl +myspreadshop.se +myspreadshop.co.uk + +// StackBlitz : https://stackblitz.com +// Submitted by Dominic Elm +w-corp-staticblitz.com +w-credentialless-staticblitz.com +w-staticblitz.com + +// Stackhero : https://www.stackhero.io +// Submitted by Adrien Gillon +stackhero-network.com + +// STACKIT GmbH & Co. KG : https://www.stackit.de/en/ +// Submitted by STACKIT-DNS Team (Simon Stier) +runs.onstackit.cloud +stackit.gg +stackit.rocks +stackit.run +stackit.zone + +// Staclar : https://staclar.com +// Submitted by Q Misell +// Submitted by Matthias Merkel +musician.io +novecore.site + +// Standard Library : https://stdlib.com +// Submitted by Jacob Lee +api.stdlib.com + +// stereosense GmbH : https://www.involve.me +// Submitted by Florian Burmann +feedback.ac +forms.ac +assessments.cx +calculators.cx +funnels.cx +paynow.cx +quizzes.cx +researched.cx +tests.cx +surveys.so + +// Storacha Network : https://storacha.network +// Submitted by Alan Shaw +ipfs.storacha.link +ipfs.w3s.link + +// Storebase : https://www.storebase.io +// Submitted by Tony Schirmer +storebase.store + +// Storipress : https://storipress.com +// Submitted by Benno Liu +storipress.app + +// Storj Labs Inc. : https://storj.io/ +// Submitted by Philip Hutchins +storj.farm + +// Strapi : https://strapi.io/ +// Submitted by Florent Baldino +strapiapp.com +media.strapiapp.com + +// Strategic System Consulting (eApps Hosting) : https://www.eapps.com/ +// Submitted by Alex Oancea +vps-host.net +atl.jelastic.vps-host.net +njs.jelastic.vps-host.net +ric.jelastic.vps-host.net + +// Streak : https://streak.com +// Submitted by Blake Kadatz +streak-link.com +streaklinks.com +streakusercontent.com + +// Student-Run Computing Facility : https://www.srcf.net/ +// Submitted by Edwin Balani +soc.srcf.net +user.srcf.net + +// Studenten Net Twente : http://www.snt.utwente.nl/ +// Submitted by Silke Hofstra +utwente.io + +// Sub 6 Limited : http://www.sub6.com +// Submitted by Dan Miller +temp-dns.com + +// Supabase : https://supabase.io +// Submitted by Inian Parameshwaran +supabase.co +supabase.in +supabase.net + +// Syncloud : https://syncloud.org +// Submitted by Boris Rybalkin +syncloud.it + +// Synology, Inc. : https://www.synology.com/ +// Submitted by Rony Weng +dscloud.biz +direct.quickconnect.cn +dsmynas.com +familyds.com +diskstation.me +dscloud.me +i234.me +myds.me +synology.me +dscloud.mobi +dsmynas.net +familyds.net +dsmynas.org +familyds.org +direct.quickconnect.to +vpnplus.to + +// Tabit Technologies Ltd. : https://tabit.cloud/ +// Submitted by Oren Agiv +mytabit.com +mytabit.co.il +tabitorder.co.il + +// TAIFUN Software AG : http://taifun-software.de +// Submitted by Bjoern Henke +taifun-dns.de + +// Tailscale Inc. : https://www.tailscale.com +// Submitted by David Anderson +ts.net +*.c.ts.net + +// TASK geographical domains : https://task.gda.pl/en/services/for-entrepreneurs/ +gda.pl +gdansk.pl +gdynia.pl +med.pl +sopot.pl + +// Tave Creative Corp : https://tave.com/ +// Submitted by Adrian Ziemkowski +taveusercontent.com + +// tawk.to, Inc : https://www.tawk.to +// Submitted by tawk.to developer team +p.tawk.email +p.tawkto.email + +// Tche.br : https://tche.br +// Submitted by Bruno Lorensi +tche.br + +// team.blue : https://team.blue +// Submitted by Cedric Dubois +site.tb-hosting.com + +// Teckids e.V. : https://www.teckids.org +// Submitted by Dominik George +edugit.io +s3.teckids.org + +// Telebit : https://telebit.cloud +// Submitted by AJ ONeal +telebit.app +telebit.io +*.telebit.xyz + +// Thingdust AG : https://thingdust.com/ +// Submitted by Adrian Imboden +*.firenet.ch +*.svc.firenet.ch +reservd.com +thingdustdata.com +cust.dev.thingdust.io +reservd.dev.thingdust.io +cust.disrec.thingdust.io +reservd.disrec.thingdust.io +cust.prod.thingdust.io +cust.testing.thingdust.io +reservd.testing.thingdust.io + +// ticket i/O GmbH : https://ticket.io +// Submitted by Christian Franke +tickets.io + +// Tlon.io : https://tlon.io +// Submitted by Mark Staarink +arvo.network +azimuth.network +tlon.network + +// Tor Project, Inc. : https://torproject.org +// Submitted by Antoine Beaupré +torproject.net +pages.torproject.net + +// TownNews.com : http://www.townnews.com +// Submitted by Dustin Ward +townnews-staging.com + +// TrafficPlex GmbH : https://www.trafficplex.de/ +// Submitted by Phillipp Röll +12hp.at +2ix.at +4lima.at +lima-city.at +12hp.ch +2ix.ch +4lima.ch +lima-city.ch +trafficplex.cloud +de.cool +12hp.de +2ix.de +4lima.de +lima-city.de +1337.pictures +clan.rip +lima-city.rocks +webspace.rocks +lima.zone + +// TransIP : https://www.transip.nl +// Submitted by Rory Breuk and Cedric Dubois +*.transurl.be +*.transurl.eu +site.transip.me +*.transurl.nl + +// TuxFamily : http://tuxfamily.org +// Submitted by TuxFamily administrators +tuxfamily.org + +// TwoDNS : https://www.twodns.de/ +// Submitted by TwoDNS-Support +dd-dns.de +dray-dns.de +draydns.de +dyn-vpn.de +dynvpn.de +mein-vigor.de +my-vigor.de +my-wan.de +syno-ds.de +synology-diskstation.de +synology-ds.de +diskstation.eu +diskstation.org + +// Typedream : https://typedream.com +// Submitted by Putri Karunia +typedream.app + +// Typeform : https://www.typeform.com +// Submitted by Typeform +pro.typeform.com + +// Uberspace : https://uberspace.de +// Submitted by Moritz Werner +*.uberspace.de +uber.space + +// UDR Limited : http://www.udr.hk.com +// Submitted by registry +hk.com +inc.hk +ltd.hk +hk.org + +// UK Intis Telecom LTD : https://it.com +// Submitted by ITComdomains +it.com + +// Unison Computing, PBC : https://unison.cloud +// Submitted by Simon Højberg +unison-services.cloud + +// United Gameserver GmbH : https://united-gameserver.de +// Submitted by Stefan Schwarz +virtual-user.de +virtualuser.de + +// United States Writing Corporation : https://uswriting.co +// Submitted by Andrew Sampson +obj.ag + +// UNIVERSAL DOMAIN REGISTRY : https://www.udr.org.yt/ +// see also: whois -h whois.udr.org.yt help +// Submitted by Atanunu Igbunuroghene +name.pm +sch.tf +biz.wf +sch.wf +org.yt + +// University of Banja Luka : https://unibl.org +// Domains for Republic of Srpska administrative entity. +// Submitted by Marko Ivanovic +rs.ba + +// University of Bielsko-Biala regional domain : http://dns.bielsko.pl/ +// Submitted by Marcin +bielsko.pl + +// urown.net : https://urown.net +// Submitted by Hostmaster +urown.cloud +dnsupdate.info + +// US REGISTRY LLC : http://us.org +// Submitted by Gavin Brown +us.org + +// V.UA Domain Registry: https://www.v.ua/ +// Submitted by Serhii Rostilo +v.ua + +// Val Town, Inc : https://val.town/ +// Submitted by Tom MacWright +val.run +web.val.run + +// Vercel, Inc : https://vercel.com/ +// Submitted by Laurens Duijvesteijn +vercel.app +v0.build +vercel.dev +vusercontent.net +vercel.run +now.sh + +// VeryPositive SIA : http://very.lv +// Submitted by Danko Aleksejevs +2038.io + +// Virtual-Info : https://www.virtual-info.info/ +// Submitted by Adnan RIHAN +v-info.info + +// VistaBlog : https://vistablog.ir/ +// Submitted by Hossein Piri +vistablog.ir + +// Viva Republica, Inc. : https://toss.im/ +// Submitted by Deus Team +deus-canvas.com + +// Voorloper.com : https://voorloper.com +// Submitted by Nathan van Bakel +voorloper.cloud + +// Vultr Objects : https://www.vultr.com/products/object-storage/ +// Submitted by Niels Maumenee +*.vultrobjects.com + +// Waffle Computer Inc., Ltd. : https://docs.waffleinfo.com +// Submitted by Masayuki Note +wafflecell.com + +// Webflow, Inc. : https://www.webflow.com +// Submitted by Webflow Security Team +webflow.io +webflowtest.io + +// WebHare bv : https://www.webhare.com/ +// Submitted by Arnold Hendriks +*.webhare.dev + +// WebHotelier Technologies Ltd : https://www.webhotelier.net/ +// Submitted by Apostolos Tsakpinis +bookonline.app +hotelwithflight.com +reserve-online.com +reserve-online.net + +// WebPros International, LLC : https://webpros.com/ +// Submitted by Nicolas Rochelemagne +cprapid.com +pleskns.com +wp2.host +pdns.page +plesk.page +cpanel.site +wpsquared.site + +// WebWaddle Ltd : https://webwaddle.com/ +// Submitted by Merlin Glander +*.wadl.top + +// Western Digital Technologies, Inc : https://www.wdc.com +// Submitted by Jung Jin +remotewd.com + +// Whatbox Inc. : https://whatbox.ca/ +// Submitted by Anthony Ryan +box.ca + +// WIARD Enterprises : https://wiardweb.com +// Submitted by Kidd Hustle +pages.wiardweb.com + +// Wikimedia Labs : https://wikitech.wikimedia.org +// Submitted by Arturo Borrero Gonzalez +toolforge.org +wmcloud.org +wmflabs.org + +// William Harrison : https://wharrison.com.au +// Submitted by William Harrison +wdh.app +hrsn.dev + +// Windsurf : https://windsurf.com +// Submitted by Douglas Chen +windsurf.app +windsurf.build + +// WISP : https://wisp.gg +// Submitted by Stepan Fedotov +panel.gg +daemon.panel.gg + +// Wix.com, Inc. : https://www.wix.com +// Submitted by Shahar Talmi / Alon Kochba +wixsite.com +wixstudio.com +editorx.io +wixstudio.io +wix.run + +// Wizard Zines : https://wizardzines.com +// Submitted by Julia Evans +messwithdns.com + +// WoltLab GmbH : https://www.woltlab.com +// Submitted by Tim Düsterhus +woltlab-demo.com +myforum.community +community-pro.de +diskussionsbereich.de +community-pro.net +meinforum.net + +// Woods Valldata : https://www.woodsvalldata.co.uk/ +// Submitted by Chris Whittle +affinitylottery.org.uk +raffleentry.org.uk +weeklylottery.org.uk + +// WP Engine : https://wpengine.com/ +// Submitted by Michael Smith +// Submitted by Brandon DuRette +wpenginepowered.com +js.wpenginepowered.com + +// XenonCloud GbR : https://xenoncloud.net +// Submitted by Julian Uphoff +half.host + +// XnBay Technology : http://www.xnbay.com/ +// Submitted by XnBay Developer +xnbay.com +u2.xnbay.com +u2-local.xnbay.com + +// XS4ALL Internet bv : https://www.xs4all.nl/ +// Submitted by Daniel Mostertman +cistron.nl +demon.nl +xs4all.space + +// Yandex.Cloud LLC : https://cloud.yandex.com +// Submitted by Alexander Lodin +yandexcloud.net +storage.yandexcloud.net +website.yandexcloud.net + +// YesCourse Pty Ltd : https://yescourse.com +// Submitted by Atul Bhouraskar +official.academy + +// Yola : https://www.yola.com/ +// Submitted by Stefano Rivera +yolasite.com + +// Yunohost : https://yunohost.org +// Submitted by Valentin Grimaud +ynh.fr +nohost.me +noho.st + +// ZaNiC : http://www.za.net/ +// Submitted by registry +za.net +za.org + +// ZAP-Hosting GmbH & Co. KG : https://zap-hosting.com +// Submitted by Julian Alker +zap.cloud + +// Zeabur : https://zeabur.com/ +// Submitted by Zeabur Team +zeabur.app + +// Zerops : https://zerops.io/ +// Submitted by Zerops Team +*.zerops.app + +// Zine EOOD : https://zine.bg/ +// Submitted by Martin Angelov +bss.design + +// Zitcom A/S : https://www.zitcom.dk +// Submitted by Emil Stahl +basicserver.io +virtualserver.io +enterprisecloud.nu + +// Zone.ID: https://zone.id +// Submitted by Gx1.org +zone.id + +// ===END PRIVATE DOMAINS=== diff --git a/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java new file mode 100644 index 00000000..a716df82 --- /dev/null +++ b/src/test/java/org/archive/extract/RealCDXExtractorOutputTest.java @@ -0,0 +1,29 @@ +package org.archive.extract; + +import java.net.URI; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + + +public class RealCDXExtractorOutputTest { + + @Test + public void testEscapeResolvedUrl() throws Exception { + String context = "http://www.uni-giessen.de/cms/studium/dateien/informationberatung/merkblattpdf"; + String spec = "http://fss.plone.uni-giessen.de/fß/studium/dateien/informationberatung/merkblattpdf/file/Mérkblatt zur Gestaltung von Nachteilsausgleichen.pdf?föo=bar#änchor"; + String escaped = RealCDXExtractorOutput.resolve(context, spec); + assertTrue(escaped.indexOf(" ") < 0); + URI parsed = new URI(escaped); + assertEquals("änchor", parsed.getFragment()); + } + + @Test + public void testNoDoubleEscaping() throws Exception { + String spec = "https://www.google.com/search?q=java+escape+url+spaces&ie=utf-8&oe=utf-8"; + String resolved = RealCDXExtractorOutput.resolve(spec, spec); + assertTrue(spec.equals(resolved)); + } +} diff --git a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java index 27d0fdad..73e1fda8 100644 --- a/src/test/java/org/archive/format/dns/DNSResponseParserTest.java +++ b/src/test/java/org/archive/format/dns/DNSResponseParserTest.java @@ -3,15 +3,15 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.archive.format.dns.DNSParseException; -import org.archive.format.dns.DNSRecord; -import org.archive.format.dns.DNSResponse; -import org.archive.format.dns.DNSResponseParser; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static java.nio.charset.StandardCharsets.UTF_8; -public class DNSResponseParserTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class DNSResponseParserTest { DNSResponseParser parser = new DNSResponseParser(); + @Test public void testParse() throws DNSParseException, IOException { verifyResults("20110328212258\nfarm6.static.flickr.a06.yahoodns.net.\t300\tIN\tA\t98.136.170.121\n", "20110328212258",new String[][] {{"farm6.static.flickr.a06.yahoodns.net.","300","IN","A","98.136.170.121"}}); @@ -22,7 +22,7 @@ public void testParse() throws DNSParseException, IOException { } private void verifyResults(String res, String date, String d[][]) throws DNSParseException, IOException { ByteArrayInputStream is = - new ByteArrayInputStream(res.getBytes("UTF-8")); + new ByteArrayInputStream(res.getBytes(UTF_8)); DNSResponse response = new DNSResponse(); parser.parse(is, response); verifyResults(response,date,d); diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java index 95c7e96f..6f218ebb 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberSeriesTest.java @@ -9,9 +9,6 @@ import org.archive.util.ByteOp; import org.archive.util.IAUtils; import org.archive.util.TestUtils; -import org.archive.format.gzip.GZIPFormatException; -import org.archive.format.gzip.GZIPMemberSeries; -import org.archive.format.gzip.GZIPSeriesMember; import org.archive.streamcontext.ByteArrayWrappedStream; import org.archive.streamcontext.SimpleStream; import org.archive.streamcontext.Stream; @@ -19,10 +16,13 @@ import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class GZIPMemberSeriesTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; +public class GZIPMemberSeriesTest { + + @Test public void testSingle() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -38,6 +38,7 @@ public void testSingle() throws IndexOutOfBoundsException, FileNotFoundException assertNull(s.getNextMember()); } + @Test public void testSingleEmpty() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("empty.gz"); @@ -59,6 +60,7 @@ public void testSingleEmpty() throws IndexOutOfBoundsException, FileNotFoundExce assertTrue(s.gotEOF()); } + @Test public void testDouble() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -81,14 +83,14 @@ public void testDouble() throws IndexOutOfBoundsException, FileNotFoundException assertNull(s.getNextMember()); } - + @Test public void testSingleCRCStrict() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); ByteArrayInputStream bais = new ByteArrayInputStream(abcd); Stream stream = new SimpleStream(bais); @@ -117,14 +119,15 @@ public void testSingleCRCStrict() throws IndexOutOfBoundsException, FileNotFound } assertNotNull(e); } - + + @Test public void testSingleCRCLAX() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); ByteArrayInputStream bais = new ByteArrayInputStream(abcd); Stream stream = new SimpleStream(bais); @@ -154,7 +157,8 @@ public void testSingleCRCLAX() throws IndexOutOfBoundsException, FileNotFoundExc assertNull(e); assertNull(s.getNextMember()); } - + + @Test public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -162,7 +166,7 @@ public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundEx byte abcdorig[] = ByteOp.copy(abcd); byte oldb = abcd[abcd.length-1]; abcd[abcd.length-1] = (byte) (abcd[abcd.length-1] + 1); - assertFalse(oldb == abcd[abcd.length-1]); + assertNotEquals(oldb, abcd[abcd.length - 1]); byte both[] = Bytes.concat(abcd,abcdorig); @@ -195,7 +199,8 @@ public void testDoubleCRC1LAX() throws IndexOutOfBoundsException, FileNotFoundEx assertNotNull(m); TestUtils.assertStreamEquals(m,"abcd".getBytes(IAUtils.UTF8)); } - + + @Test public void testSingleDeflateError() throws IndexOutOfBoundsException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -240,7 +245,7 @@ public void testSingleDeflateError() throws IndexOutOfBoundsException, IOExcepti assertNull(m); } - + @Test public void testDoubleDeflateError() throws IndexOutOfBoundsException, IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); @@ -290,7 +295,8 @@ public void testDoubleDeflateError() throws IndexOutOfBoundsException, IOExcepti assertFalse(s.gotIOError()); } - + + @Test public void testDoubleBiggerDeflateErrOnFirst() throws IOException { String resource = "double-single-inflate-error.gz"; InputStream is = getClass().getResourceAsStream(resource); @@ -333,7 +339,8 @@ public void testDoubleBiggerDeflateErrOnFirst() throws IOException { } - + + @Test public void testAutoSkip() throws IOException { InputStream is = getClass().getResourceAsStream("abcd.gz"); byte abcd[] = ByteStreams.toByteArray(is); @@ -374,6 +381,10 @@ public void testAutoSkip() throws IOException { assertNull(m); assertTrue(s.gotEOF()); } - + @Test + public void testWgetProblem() throws IndexOutOfBoundsException, FileNotFoundException, IOException { + InputStream is = getClass().getResourceAsStream("IAH-urls-wget.warc.gz"); + new GZIPDecoder().parseHeader(is); + } } diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java index 5cd75ccf..45bc18e4 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java @@ -7,13 +7,14 @@ import org.archive.util.IAUtils; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class GZIPMemberWriterTest extends TestCase { +public class GZIPMemberWriterTest { + @Test public void testWrite() throws IOException { - String outPath = "/tmp/tmp.gz"; - GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(new File(outPath))); + File outFile = File.createTempFile("tmp", ".gz"); + GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(outFile)); gzw.write(new ByteArrayInputStream("Here is record 1".getBytes(IAUtils.UTF8))); gzw.write(new ByteArrayInputStream("Here is record 2".getBytes(IAUtils.UTF8))); } diff --git a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java index cfadbd79..13658bcb 100644 --- a/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java +++ b/src/test/java/org/archive/format/gzip/zipnum/ZipNumWriterTest.java @@ -10,32 +10,36 @@ import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.nio.charset.Charset; +import java.util.Locale; import org.archive.format.gzip.GZIPMemberSeries; import org.archive.format.gzip.GZIPSeriesMember; import org.archive.streamcontext.SimpleStream; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class ZipNumWriterTest extends TestCase { +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ZipNumWriterTest { + + @Test public void testAddRecord() throws IOException { - Charset UTF8 = Charset.forName("UTF-8"); - File main = File.createTempFile("test-znw",".main"); + File main = File.createTempFile("test-znw",".main"); File summ = File.createTempFile("test-znw",".summ"); main.deleteOnExit(); summ.deleteOnExit(); - System.out.format("Summ: %s\n", summ.getAbsolutePath()); + System.out.format(Locale.ROOT, "Summ: %s\n", summ.getAbsolutePath()); int limit = 10; ZipNumWriter znw = new ZipNumWriter(new FileOutputStream(main,false), new FileOutputStream(summ,false), limit); for(int i = 0; i < 1000; i++) { - znw.addRecord(String.format("%06d\n",i).getBytes(UTF8)); + znw.addRecord(String.format(Locale.ROOT,"%06d\n",i).getBytes(UTF_8)); } znw.close(); InputStreamReader isr = - new InputStreamReader(new FileInputStream(summ),UTF8); + new InputStreamReader(new FileInputStream(summ), UTF_8); BufferedReader br = new BufferedReader(isr); String line = null; int count = 0; diff --git a/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java b/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java index 50df9dde..9a5d69af 100644 --- a/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java +++ b/src/test/java/org/archive/format/http/HttpRequestMessageParserTest.java @@ -3,16 +3,16 @@ import java.io.ByteArrayInputStream; import java.io.IOException; -import org.archive.format.http.HttpConstants; -import org.archive.format.http.HttpParseException; -import org.archive.format.http.HttpRequestMessage; -import org.archive.format.http.HttpRequestMessageParser; import org.archive.util.IAUtils; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HttpRequestMessageParserTest extends TestCase implements HttpConstants { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class HttpRequestMessageParserTest implements HttpConstants { HttpRequestMessageParser parser = new HttpRequestMessageParser(); + + @Test public void testParse() throws IOException { assertParse("GET / HTTP/1.0\r\n", METHOD_GET, "/", VERSION_0); assertParse("GET / HTTP/1.1\r\n", METHOD_GET, "/", VERSION_1); diff --git a/src/test/java/org/archive/format/http/HttpResponseParserTest.java b/src/test/java/org/archive/format/http/HttpResponseParserTest.java index 2850fe44..631d67c7 100644 --- a/src/test/java/org/archive/format/http/HttpResponseParserTest.java +++ b/src/test/java/org/archive/format/http/HttpResponseParserTest.java @@ -5,16 +5,14 @@ import org.archive.util.IAUtils; import org.archive.util.TestUtils; -import org.archive.format.http.HttpHeader; -import org.archive.format.http.HttpHeaders; -import org.archive.format.http.HttpParseException; -import org.archive.format.http.HttpResponse; -import org.archive.format.http.HttpResponseParser; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HttpResponseParserTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; +public class HttpResponseParserTest { + + @Test public void testParse() throws IOException { HttpResponseParser parser = new HttpResponseParser(); @@ -38,4 +36,49 @@ public void testParse() throws IOException { } + @Test + public void testParseWithLf() throws IOException { + + HttpResponseParser parser = new HttpResponseParser(); + String message = "200 OK\nContent-Type: text/plain\n\nHi there"; + try { + HttpResponse response = + parser.parse(new ByteArrayInputStream(message.getBytes(IAUtils.UTF8))); + assertNotNull(response); + HttpHeaders headers = response.getHeaders(); + assertNotNull(headers); + assertEquals(1,headers.size()); + + } catch (HttpParseException e) { + e.printStackTrace(); + fail(); + } + + } + + @Test + public void testParseEmptyHeaderField() throws IOException { + + HttpResponseParser parser = new HttpResponseParser(); + String message = "200 OK\r\nContent-Type: text/plain\r\nServer: \r\n\r\nHi there"; + try { + HttpResponse response = + parser.parse(new ByteArrayInputStream(message.getBytes(IAUtils.UTF8))); + assertNotNull(response); + HttpHeaders headers = response.getHeaders(); + assertNotNull(headers); + assertEquals(2, headers.size()); + HttpHeader header = headers.get(1); + assertEquals("Server",header.getName()); + System.err.println(header.getValue()); + assertFalse("text/plain".equals(header.getValue())); + TestUtils.assertStreamEquals(response, "Hi there".getBytes(IAUtils.UTF8)); + + } catch (HttpParseException e) { + e.printStackTrace(); + fail(); + } + + } + } diff --git a/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java b/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java index 57c21965..ef8c2fa0 100644 --- a/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java +++ b/src/test/java/org/archive/format/json/CompoundORJSONPathSpecTest.java @@ -6,11 +6,12 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CompoundORJSONPathSpecTest extends TestCase { +public class CompoundORJSONPathSpecTest { String json1S = "{\"a\":\"A\"}"; String json2S = "{\"b\":\"B\"}"; + @Test public void testExtract() throws JSONException { JSONObject json1 = new JSONObject(json1S); JSONObject json2 = new JSONObject(json2S); diff --git a/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java b/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java index ab999dca..257cb112 100644 --- a/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java +++ b/src/test/java/org/archive/format/json/JSONPathSpecFactoryTest.java @@ -4,9 +4,9 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class JSONPathSpecFactoryTest extends TestCase { +public class JSONPathSpecFactoryTest { String json1S = "{\"a\":\"A\"}"; String json2S = "{\"b\":\"B\"}"; @@ -14,6 +14,7 @@ public class JSONPathSpecFactoryTest extends TestCase { String json4S = "{\"b\":[{\"x\":\"x1\", \"y\":\"y1\"},{\"x\":\"x2\", \"y\":\"y2\"}]}"; + @Test public void testGet() throws JSONException { JSONObject json1 = new JSONObject(json1S); JSONObject json2 = new JSONObject(json2S); diff --git a/src/test/java/org/archive/format/json/JSONViewTest.java b/src/test/java/org/archive/format/json/JSONViewTest.java index 20bd4fe6..6d199025 100644 --- a/src/test/java/org/archive/format/json/JSONViewTest.java +++ b/src/test/java/org/archive/format/json/JSONViewTest.java @@ -1,33 +1,38 @@ package org.archive.format.json; +import java.util.Locale; + import org.archive.util.TestUtils; import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class JSONViewTest extends TestCase { +public class JSONViewTest { public int getInt(byte b[]) { return b[0] & 0xff; } - + + @Test public void testBytes() throws JSONException { JSONObject o = new JSONObject(); o.append("name1", "val\\rue1"); String json = o.toString(); - System.out.format("once: (%s)\n",json); + System.out.format(Locale.ROOT, "once: (%s)\n", json); JSONObject o2 = new JSONObject(json); - System.out.format("twice: (%s)\n",o2.toString()); + System.out.format(Locale.ROOT, "twice: (%s)\n", o2.toString()); byte b[] = new byte[2]; for(int i = 0; i < 256; i++) { b[0] = (byte) i; int gi = getInt(b); - System.out.format("I(%d) gi(%d)\n",i,gi); + System.out.format(Locale.ROOT, "I(%d) gi(%d)\n", i, gi); } } + + @Test public void testApply() throws JSONException { String json1S = "{\"url\":\"a\",\"link\":[{\"zz\":\"1\",\"qq\":\"qa\"},{\"zz2\":\"2\",\"qq\":\"qb\"},{\"zz\":\"3\",\"qq\":\"qc\"},{\"zz\":\"4\"}]}"; JSONObject json1 = new JSONObject(json1S); diff --git a/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java b/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java index a703b49a..640a5a80 100644 --- a/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java +++ b/src/test/java/org/archive/format/json/SimpleJSONPathSpecTest.java @@ -4,15 +4,16 @@ import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class SimpleJSONPathSpecTest extends TestCase { +public class SimpleJSONPathSpecTest { String json1 = "{\"a\": { \"b\": \"Foo\" }}"; String json2 = "{\"a\": { \"b\": [{\"a\":\"1\"},{\"a\":\"2\"}] }}"; String json3 = "{\"a\": { \"b\": {\"A\":\"11\",\"B\":\"22\"} }}"; String json4 = "{\"a\": { \"b\": [{\"A\":\"11\",\"B\":\"22\"},{\"A\":\"33\",\"B\":\"44\"}] }}"; + @Test public void testExtract() throws JSONException { JSONObject json = new JSONObject(json1); JSONPathSpec spec = new SimpleJSONPathSpec("a.b"); diff --git a/src/test/java/org/archive/format/text/html/CDATALexerTest.java b/src/test/java/org/archive/format/text/html/CDATALexerTest.java index 481a3eda..7c9f24f3 100644 --- a/src/test/java/org/archive/format/text/html/CDATALexerTest.java +++ b/src/test/java/org/archive/format/text/html/CDATALexerTest.java @@ -1,17 +1,18 @@ package org.archive.format.text.html; -import org.archive.format.text.html.CDATALexer; -import org.archive.format.text.html.NodeUtils; import org.htmlparser.Node; import org.htmlparser.lexer.Page; -//import org.htmlparser.nodes.RemarkNode; import org.htmlparser.nodes.TagNode; import org.htmlparser.nodes.TextNode; import org.htmlparser.util.ParserException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CDATALexerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Locale; + +public class CDATALexerTest { CDATALexer l; Node n; private CDATALexer makeLexer(String html) { @@ -19,7 +20,8 @@ private CDATALexer makeLexer(String html) { t.setPage(new Page(html)); return t; } - + + @Test public void testNextNode() throws ParserException { l = makeLexer("blem"); n = l.nextNode(); @@ -35,6 +37,7 @@ public void testNextNode() throws ParserException { assertNull(l.nextNode()); } + @Test public void testInJS() throws ParserException { l = makeLexer(""); assertFalse(l.inCSS()); @@ -54,6 +57,7 @@ public void testInJS() throws ParserException { assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT")); } + @Test public void testInCSS() throws ParserException { l = makeLexer(""); assertFalse(l.inCSS()); @@ -100,7 +104,7 @@ public void testInJSComment() throws ParserException { } private void assertJSContentWorks(String js) throws ParserException { - String html = String.format("",js); + String html = String.format(Locale.ROOT,"",js); l = makeLexer(html); assertFalse(l.inCSS()); assertFalse(l.inJS()); diff --git a/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java new file mode 100644 index 00000000..791a1148 --- /dev/null +++ b/src/test/java/org/archive/io/ArchiveReaderFactoryTest.java @@ -0,0 +1,101 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.Iterator; + +import org.apache.commons.lang3.StringUtils; +import org.archive.io.arc.ARCWriterTest; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ArchiveReaderFactoryTest { + @TempDir + File tempDir; + + /** + * Test local file as URL + * @throws IOException + */ + @Test + public void testGetFileURL() throws IOException { + File arc = ARCWriterTest.createARCFile(tempDir, true); + ArchiveReader reader = null; + try { + reader = ArchiveReaderFactory. + get(new URL("file:////" + arc.getAbsolutePath())); + for (Iterator i = reader.iterator(); i.hasNext();) { + ArchiveRecord r = (ArchiveRecord)i.next(); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); + } + } finally { + if (reader != null) { + reader.close(); + } + } + } + + /** + * Test local file as File + * @throws IOException + */ + @Test + public void testGetFile() throws IOException { + File arc = ARCWriterTest.createARCFile(tempDir, true); + ArchiveReader reader = null; + try { + reader = ArchiveReaderFactory.get(arc.getAbsoluteFile()); + for (Iterator i = reader.iterator(); i.hasNext();) { + ArchiveRecord r = (ArchiveRecord)i.next(); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); + } + } finally { + if (reader != null) { + reader.close(); + } + } + } + + /** + * Test local file as String path + * @throws IOException + */ + @Test + public void testGetPath() throws IOException { + File arc = ARCWriterTest.createARCFile(tempDir, true); + ArchiveReader reader = null; + try { + reader = ArchiveReaderFactory.get(arc.getAbsoluteFile().getAbsolutePath()); + for (Iterator i = reader.iterator(); i.hasNext();) { + ArchiveRecord r = (ArchiveRecord)i.next(); + assertTrue(StringUtils.isNotBlank(r.getHeader().getMimetype()),"mime unread"); + } + } finally { + if (reader != null) { + reader.close(); + } + } + } +} diff --git a/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java b/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java new file mode 100644 index 00000000..f7e8e0b2 --- /dev/null +++ b/src/test/java/org/archive/io/BufferedSeekInputStreamTest.java @@ -0,0 +1,70 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import org.junit.jupiter.api.Test; + +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; + + +/** + * Unit test for BufferedSeekInputStream. The tests do some random + * repositioning in the stream to make sure the buffer is always valid. + * + * @author pjack + */ +public class BufferedSeekInputStreamTest { + + + private static byte[] TEST_DATA = makeTestData(); + + @Test + public void testPosition() throws Exception { + Random random = new Random(); + ArraySeekInputStream asis = new ArraySeekInputStream(TEST_DATA); + BufferedSeekInputStream bsis = new BufferedSeekInputStream(asis, 11); + for (int i = 0; i < TEST_DATA.length; i++) { + byte b = (byte)bsis.read(); + assertEquals(TEST_DATA[i], b); + } + for (int i = 0; i < 1000; i++) { + int index = random.nextInt(TEST_DATA.length); + bsis.position(index); + char expected = (char)((int)TEST_DATA[index] & 0xFF); + char read = (char)(bsis.read() & 0xFF); + assertEquals(expected, read); + } + } + + + private static byte[] makeTestData() { + String s = "If the dull substance of my flesh were thought\n" + + "Injurious distance could not stop my way\n" + + "For then, despite of space, I would be brought\n" + + "From limits far remote where thou dost stay.\n"; + byte[] r = new byte[s.length()]; + for (int i = 0; i < r.length; i++) { + r[i] = (byte)s.charAt(i); +// r[i] = (byte)s.charAt(i); + } + return r; + } +} diff --git a/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java new file mode 100644 index 00000000..5d31b890 --- /dev/null +++ b/src/test/java/org/archive/io/HeaderedArchiveRecordTest.java @@ -0,0 +1,215 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.archive.format.http.HttpHeader; +import org.archive.io.arc.ARCRecord; +import org.archive.io.warc.WARCRecord; +import org.junit.jupiter.api.Test; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class HeaderedArchiveRecordTest { + private static final String HTTPHEADER = "HTTP/1.1 200 OK\r\n" + + "Last-Modified: Sun, 28 Aug 2005 14:10:55 GMT\r\n" + + "Content-Length: 108\r\n" + "Connection: close\r\n" + + "Content-Type: text/html\r\n" + "\r\n"; + private static final String BODY = "\r\n" + " \r\n" + + " Neue Seite 1\r\n" + " \r\n" + + " \r\n" + " \r\n" + ""; + + @Test + public void testParseHttpHeadersInWARC() throws IOException { + final String url = "http://foo.maths.uq.edu.au/index.html"; + // final String warcHeader = "WARC/0.10 000000000486 response " + + // url + " 20070315152520 " + + // "urn:uuid:d8b342a8-dba4-4d7f-a551-1d8184f2ff58 " + + // "application/http; msgtype=response\r\n" + + // "Checksum: sha1:IT6YEX5WHKK57GOEHV2YHTTXEP5KPM6A\r\n" + + // "IP-Address: 80.150.6.184\r\n" + + // "\r\n"; + + final String warcHeader = "WARC/0.12\r\n" + + "MIME-Version: 1.0\r\n" + + "WARC-Record-Type: response\r\n" + + "WARC-Target-URI: http://foo.maths.uq.edu.au/index.html\r\n" + + "WARC-Date: 2006-09-19T17:20:24Z\r\n" + + "WARC-Digest: sha1:IT6YEX5WHKK57GOEHV2YHTTXEP5KPM6A\r\n" + + "WARC-IP-Address: 80.150.6.184\r\n" + + "Content-ID: \r\n" + + "Content-Type: application/http; msgtype=response\r\n" + + "Content-Length: " + (HTTPHEADER.length() + BODY.length()) + "\r\n" + + "\r\n"; + + final String hdr = warcHeader + HTTPHEADER + BODY; + + WARCRecord r = new WARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), + "READER_IDENTIFIER", 0, false, true); + HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); + + har.skipHttpHeader(); + + byte[] b = new byte[BODY.length()]; + har.read(b); + String bodyRead = new String(b, UTF_8); + assertEquals(BODY, bodyRead); + assertHeaderCorrectlyParsed(har.getContentHeaders()); + assertEquals(har.getHeader().getUrl(), url, + "failed to retrieve Url from metadata"); + } + + public void testParseHttpHeadersInARC() throws IOException { + final int len = HTTPHEADER.length() + BODY.length(); + final int contentLength = BODY.length(); + final String url = "http://www.ly.gov.tw:80/accpart.htm"; + final String hdr = HTTPHEADER + BODY; + // Interesting difference between ARCRecord and WARCRecord is that the + // stream passed the ARCRecord is supposed to be just past the + // ARCRecord metadata line where as stream passed WARCRecord is at + // record start. TODO: Add to ARCRecord constructor that doesn't + // take an ArchiveRecordHeader but rather parses it from the stream. + ArchiveRecordHeader arh = new ArchiveRecordHeader() { + public int getContentBegin() { + // TODO: In ARCs, this is where http headers end and + // the content begins. Need to reconcile for generic + // HeaderedArchiveRecord processing. In this context, it + // makes sense setting it to zero -- HeaderedArchiveRecord + // will then figure it out. + return 0; + } + + public String getDate() { + return null; + } + + public String getDigest() { + return null; + } + + public Set getHeaderFieldKeys() { + return null; + } + + public Map getHeaderFields() { + return null; + } + + public Object getHeaderValue(String key) { + return null; + } + + public long getLength() { + return len; + } + + public long getContentLength() { + return contentLength; + } + + public String getMimetype() { + return null; + } + + public long getOffset() { + return 0; + } + + public String getReaderIdentifier() { + return null; + } + + public String getRecordIdentifier() { + return null; + } + + public String getUrl() { + return url; + } + + public String getVersion() { + return null; + } + + }; + ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), + arh, 0, false, true, false); + + HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); + har.skipHttpHeader(); + byte[] b = new byte[BODY.length()]; + har.read(b); + String bodyRead = new String(b, UTF_8); + assertEquals(BODY, bodyRead); + assertHeaderCorrectlyParsed(har.getContentHeaders()); + } + + @Test + public void testEasierParseHttpHeadersInARC() throws IOException { + final String url = "http://www.archive.org/index.htm"; + final String arcHeader = url + + " 192.168.0.1 20070515111004 text/html 167568\n"; + final String hdr = arcHeader + HTTPHEADER + BODY; + + ARCRecord r = new ARCRecord(new ByteArrayInputStream(hdr.getBytes(UTF_8)), + "READER_IDENTIFIER", 0, false, true, false); + + HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); + har.skipHttpHeader(); + byte[] b = new byte[BODY.length()]; + har.read(b); + String bodyRead = new String(b, UTF_8); + assertEquals(BODY, bodyRead); + assertHeaderCorrectlyParsed(har.getContentHeaders()); + assertEquals(har.getHeader().getUrl(), url, "failed to retrieve Url from metadata"); + } + + private void assertHeaderCorrectlyParsed(HttpHeader[] headers) { + final List orgHeaders = Arrays.asList(HTTPHEADER.split("\r\n")); + assertEquals(orgHeaders.size(), headers.length + 1, + "not all HTTP header entries have been retrieved"); + + for (HttpHeader header : headers) { + assertTrue(orgHeaders.contains(header.getName() + ": " + + header.getValue())); + } + } + + @Test + public void testNoheaderWARC() throws IOException { + String b = "hello world"; + String c = "WARC/0.12\r\nContent-Type: text/plain\r\n" + + "Content-Length: " + b.length() + "\r\n\r\n" + b; + org.archive.io.warc.WARCRecord r = new org.archive.io.warc.WARCRecord( + new ByteArrayInputStream(c.getBytes(UTF_8)), "READER_IDENTIFIER", 0, + false, true); + HeaderedArchiveRecord har = new HeaderedArchiveRecord(r, true); + assertTrue(har.isStrict()); + } +} diff --git a/src/test/java/org/archive/io/RecordingInputStreamTest.java b/src/test/java/org/archive/io/RecordingInputStreamTest.java new file mode 100644 index 00000000..74e92024 --- /dev/null +++ b/src/test/java/org/archive/io/RecordingInputStreamTest.java @@ -0,0 +1,145 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + + +/** + * Test cases for RecordingInputStream. + * + * @author gojomo + */ +public class RecordingInputStreamTest { + @TempDir + File tempDir; + + /** + * Test readFullyOrUntil soft (no exception) and hard (exception) + * length cutoffs, timeout, and rate-throttling. + * + * @throws IOException + * @throws InterruptedException + * @throws RecorderTimeoutException + */ + @Test + public void testReadFullyOrUntil() throws RecorderTimeoutException, IOException, InterruptedException + { + RecordingInputStream ris = new RecordingInputStream(16384, (new File( + tempDir, "testReadFullyOrUntil").getAbsolutePath())); + ByteArrayInputStream bais = new ByteArrayInputStream( + "abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8)); + // test soft max + ris.open(bais); + ris.setLimits(10,0,0); + ris.readFullyOrUntil(7); + ris.close(); + ReplayInputStream res = ris.getReplayInputStream(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + res.readFullyTo(baos); + assertEquals("abcdefg", new String(baos.toByteArray(), UTF_8), + "soft max cutoff"); + // test hard max + bais.reset(); + baos.reset(); + ris.open(bais); + boolean exceptionThrown = false; + try { + ris.setLimits(10,0,0); + ris.readFullyOrUntil(13); + } catch (RecorderLengthExceededException ex) { + exceptionThrown = true; + } + assertTrue(exceptionThrown,"hard max exception"); + ris.close(); + res = ris.getReplayInputStream(); + res.readFullyTo(baos); + assertEquals("abcdefghijk", new String(baos.toByteArray(), UTF_8), + "hard max cutoff"); + // test timeout + PipedInputStream pin = new PipedInputStream(); + PipedOutputStream pout = new PipedOutputStream(pin); + ris.open(pin); + exceptionThrown = false; + trickle("abcdefghijklmnopqrstuvwxyz".getBytes(UTF_8),pout); + int timeout = 200; + try { + ris.setLimits(0, timeout,0); + ris.readFullyOrUntil(0); + } catch (RecorderTimeoutException ex) { + exceptionThrown = true; + } + assertTrue(exceptionThrown,"timeout exception"); + ris.close(); + // test rate limit + bais = new ByteArrayInputStream(new byte[1024*2*5]); + ris.open(bais); + long startTime = System.currentTimeMillis(); + ris.setLimits(0,0,2); + ris.readFullyOrUntil(0); + long endTime = System.currentTimeMillis(); + long duration = endTime - startTime; + assertTrue(duration>= timeout,"read too fast: "+duration); + ris.close(); + } + + protected void trickle(final byte[] bytes, final PipedOutputStream pout) { + new Thread() { + public void run() { + try { + for (int i = 0; i < bytes.length; i++) { + Thread.sleep(200); + pout.write(bytes[i]); + } + pout.close(); + } catch (IOException e) { + // do nothing + } catch (Exception e) { + System.err.print(e); + } + } + }.start(); + + } + + @Test + public void testAsOutputStream() throws IOException { + RecordingInputStream ris = new RecordingInputStream(16384, (new File( + tempDir, "testAsOutputStream").getAbsolutePath())); + ris.open(null); + ris.asOutputStream().write("hello".getBytes(UTF_8)); + ris.close(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ris.getReplayInputStream().readFullyTo(baos); + assertEquals("hello", baos.toString(UTF_8.name())); + } +} diff --git a/src/test/java/org/archive/io/RecordingOutputStreamTest.java b/src/test/java/org/archive/io/RecordingOutputStreamTest.java new file mode 100644 index 00000000..0dba910e --- /dev/null +++ b/src/test/java/org/archive/io/RecordingOutputStreamTest.java @@ -0,0 +1,360 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import org.archive.util.Base32; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + + +/** + * Test casesfor RecordingOutputStream. + * + * @author stack + */ +public class RecordingOutputStreamTest { + /** + * Size of buffer used in tests. + */ + private static final int BUFFER_SIZE = 5; + + /** + * How much to write total to testing RecordingOutputStream. + */ + private static final int WRITE_TOTAL = 10; + + @TempDir + File tempDir; + + + /** + * Test reusing instance of RecordingOutputStream. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + @Test + public void testReuse() + throws IOException + { + final String BASENAME = "testReuse"; + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(tempDir, BASENAME + "Bkg.txt")).getAbsolutePath()); + for (int i = 0; i < 3; i++) + { + reuse(BASENAME, ros, i); + } + } + + private void reuse(String baseName, RecordingOutputStream ros, int index) + throws IOException + { + final String BASENAME = baseName + Integer.toString(index); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(int). + * + * Uses small buffer size and small write size. Test mark and reset too. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + @Test + public void testWriteint() + throws IOException + { + final String BASENAME = "testWriteint"; + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeIntRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Method to test for void write(byte []). + * + * Uses small buffer size and small write size. + * + * @throws IOException Failed open of backing file or opening of + * input streams verifying recording. + */ + @Test + public void testWritebytearray() + throws IOException + { + final String BASENAME = "testWritebytearray"; + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + // Do again to test that I can get a new ReplayInputStream on same + // RecordingOutputStream. + verifyRecording(ros, f, WRITE_TOTAL); + } + + /** + * Test mark and reset. + * @throws IOException + */ + @Test + public void testMarkReset() throws IOException + { + final String BASENAME = "testMarkReset"; + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); + File f = writeByteRecordedFile(ros, BASENAME, WRITE_TOTAL); + verifyRecording(ros, f, WRITE_TOTAL); + ReplayInputStream ris = ros.getReplayInputStream(); + ris.mark(10 /*Arbitrary value*/); + // Read from the stream. + ris.read(); + ris.read(); + ris.read(); + // Reset it. It should be back at zero. + ris.reset(); + assertEquals(0, ris.read(), "Reset to zero"); + assertEquals(1, ris.read(), "Reset to zero char 1"); + assertEquals(2, ris.read(), "Reset to zero char 2"); + // Mark stream. Here. Next character should be '3'. + ris.mark(10 /* Arbitrary value*/); + ris.read(); + ris.read(); + ris.reset(); + assertEquals(3, ris.read(), "Reset to zero char 3"); + } + + /** + * Record a file write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

    This method writes a character at a time. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeIntRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(tempDir, basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + for (int i = 0; i < WRITE_TOTAL; i++) + { + ros.write(i); + } + ros.close(); + fos.close(); + assertEquals(size, ros.getResponseContentLength(), + "Content-Length test"); + return f; + } + + /** + * Record a file byte array write. + * + * Write a file w/ characters that start at null and ascend to + * filesize. Record the writing w/ passed ros + * recordingoutputstream. Return the file recorded as result of method. + * The file output stream that is recorded is named + * basename + ".txt". + * + *

    This method writes using a byte array. + * + * @param ros RecordingOutputStream to record with. + * @param basename Basename of file. + * @param size How many characters to write. + * @return Recorded output stream. + */ + private File writeByteRecordedFile(RecordingOutputStream ros, + String basename, int size) + throws IOException + { + File f = new File(tempDir, basename + ".txt"); + FileOutputStream fos = new FileOutputStream(f); + ros.open(fos); + byte [] b = new byte[size]; + for (int i = 0; i < size; i++) + { + b[i] = (byte)i; + } + ros.write(b); + ros.close(); + fos.close(); + assertEquals(size, ros.getResponseContentLength(), + "Content-Length test"); + return f; + } + + /** + * Verify what was written is both in the file written to and in the + * recording stream. + * + * @param ros Stream to check. + * @param f File that was recorded. Stream should have its content + * exactly. + * @param size Amount of bytes written. + * + * @exception IOException Failure reading streams. + */ + private void verifyRecording(RecordingOutputStream ros, File f, + int size) throws IOException + { + assertEquals(size, f.length(), "Recorded file size."); + FileInputStream fis = new FileInputStream(f); + assertNotNull(fis, "FileInputStream not null"); + ReplayInputStream ris = ros.getReplayInputStream(); + assertNotNull(ris, "ReplayInputStream not null"); + for (int i = 0; i < size; i++) + { + assertEquals(i, ris.read(), + "ReplayInputStream content verification"); + assertEquals(i, fis.read(), + "Recorded file content verification"); + } + assertEquals(-1, ris.read(), "ReplayInputStream at EOF"); + fis.close(); + ris.close(); + } + + @Test + public void testMessageBodyBegin() throws IOException { + final String BASENAME = "testMessageBodyBegin"; + RecordingOutputStream ros = new RecordingOutputStream(BUFFER_SIZE, + (new File(tempDir, BASENAME + "Backing.txt")).getAbsolutePath()); + ros.setSha1Digest(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\nabcdefghij".getBytes(UTF_8)); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\r\n\r\nabcdefghij".getBytes(UTF_8)); + assertEquals(14, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r\nabcdefghij".getBytes(UTF_8)); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes(UTF_8)); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes(UTF_8)); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes(UTF_8)); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\r\nabcdefghij".getBytes(UTF_8)); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes(UTF_8)); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes(UTF_8)); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes(UTF_8)); + ros.write('\n'); + assertEquals(-1, ros.getMessageBodyBegin()); + ros.write("\nabcdefghij".getBytes(UTF_8)); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes(UTF_8)); + ros.write('\n'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes(UTF_8)) { + ros.write(b); + } + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789".getBytes(UTF_8)); + ros.write('\n'); + ros.write('\r'); + ros.write('\n'); + for (int b: "abcdefghij".getBytes(UTF_8)) { + ros.write(b); + } + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n".getBytes(UTF_8)); + ros.write('\n'); + ros.write("abcdefghij".getBytes(UTF_8)); + assertEquals(12, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + + ros.open(new ByteArrayOutputStream()); + ros.write("0123456789\n\r".getBytes(UTF_8)); + ros.write('\n'); + ros.write("abcdefghij".getBytes(UTF_8)); + assertEquals(13, ros.getMessageBodyBegin()); + assertEquals("22GBTIFDIW36VN4NLYI6TEOAE3WGBW3D", Base32.encode(ros.getDigestValue())); + ros.close(); + } +} diff --git a/src/test/java/org/archive/io/ReplayCharSequenceTest.java b/src/test/java/org/archive/io/ReplayCharSequenceTest.java new file mode 100644 index 00000000..3935837b --- /dev/null +++ b/src/test/java/org/archive/io/ReplayCharSequenceTest.java @@ -0,0 +1,398 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.text.NumberFormat; +import java.util.Date; +import java.util.Locale; +import java.util.Random; +import java.util.logging.Logger; + +import org.archive.util.FileUtils; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Test ReplayCharSequences. + * + * @author stack, gojomo + * @version $Revision$, $Date$ + */ +public class ReplayCharSequenceTest { + /** + * Logger. + */ + private static Logger logger = + Logger.getLogger("org.archive.io.ReplayCharSequenceFactoryTest"); + + + private static final int SEQUENCE_LENGTH = 127; + private static final int MULTIPLIER = 3; + private static final int BUFFER_SIZE = SEQUENCE_LENGTH * MULTIPLIER; + private static final int INCREMENT = 1; + + /** + * Buffer of regular content. + */ + private byte [] regularBuffer = null; + + @TempDir + File tempDir; + + @BeforeEach + protected void setUp() throws Exception + { + this.regularBuffer = + fillBufferWithRegularContent(new byte [BUFFER_SIZE]); + } + + @Test + public void testShiftjis() throws IOException { + + // Here's the bytes for the JIS encoding of the Japanese form of Nihongo + byte[] bytes_nihongo = { + (byte) 0x1B, (byte) 0x24, (byte) 0x42, (byte) 0x46, + (byte) 0x7C, (byte) 0x4B, (byte) 0x5C, (byte) 0x38, + (byte) 0x6C, (byte) 0x1B, (byte) 0x28, (byte) 0x42, + (byte) 0x1B, (byte) 0x28, (byte) 0x42 }; + final String ENCODING = "SJIS"; + // Here is nihongo converted to JVM encoding. + String nihongo = new String(bytes_nihongo, ENCODING); + + RecordingOutputStream ros = writeTestStream( + bytes_nihongo,MULTIPLIER, + "testShiftjis",MULTIPLIER); + // TODO: check for existence of overflow file? + ReplayCharSequence rcs = getReplayCharSequence(ros,Charset.forName(ENCODING)); + + // Now check that start of the rcs comes back in as nihongo string. + String rcsStr = rcs.subSequence(0, nihongo.length()).toString(); + assertEquals(nihongo, rcsStr, "Nihongo " + nihongo + " does not equal converted string" + + " from rcs " + rcsStr); + // And assert next string is also properly nihongo. + if (rcs.length() >= (nihongo.length() * 2)) { + rcsStr = rcs.subSequence(nihongo.length(), + nihongo.length() + nihongo.length()).toString(); + assertEquals(nihongo, rcsStr, "Nihongo " + nihongo + " does not equal converted " + + " string from rcs (2nd time)" + rcsStr); + } + } + + @Test + public void testGetReplayCharSequenceByteZeroOffset() throws IOException { + + RecordingOutputStream ros = writeTestStream( + regularBuffer,MULTIPLIER, + "testGetReplayCharSequenceByteZeroOffset",MULTIPLIER); + ReplayCharSequence rcs = getReplayCharSequence(ros); + + for (int i = 0; i < MULTIPLIER; i++) { + accessingCharacters(rcs); + } + } + + private ReplayCharSequence getReplayCharSequence(RecordingOutputStream ros) throws IOException { + return getReplayCharSequence(ros,null); + } + + private ReplayCharSequence getReplayCharSequence(RecordingOutputStream ros, Charset charset) throws IOException { + return new GenericReplayCharSequence(ros.getReplayInputStream(), + ros.getBufferLength()/2, ros.backingFilename, charset); + } + + @Test + public void testGetReplayCharSequenceMultiByteZeroOffset() + throws IOException { + + RecordingOutputStream ros = writeTestStream( + regularBuffer,MULTIPLIER, + "testGetReplayCharSequenceMultiByteZeroOffset",MULTIPLIER); + ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8); + + for (int i = 0; i < MULTIPLIER; i++) { + accessingCharacters(rcs); + } + } + + @Test + public void testReplayCharSequenceByteToString() throws IOException { + String fileContent = "Some file content"; + byte [] buffer = fileContent.getBytes(UTF_8); + RecordingOutputStream ros = writeTestStream( + buffer,1, + "testReplayCharSequenceByteToString.txt",0); + ReplayCharSequence rcs = getReplayCharSequence(ros); + String result = rcs.toString(); + assertEquals(fileContent, result,"Strings don't match"); + } + + private String toHexString(String str) + { + if (str != null) { + StringBuilder buf = new StringBuilder("{ "); + buf.append(Integer.toString(str.charAt(0), 16)); + for (int i = 1; i < str.length(); i++) { + buf.append(", "); + buf.append(Integer.toString(str.charAt(i), 16)); + } + buf.append(" }"); + return buf.toString(); + } + else + return "null"; + } + + @Test + public void testSingleByteEncodings() throws IOException { + byte[] bytes = { + (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64, + (byte) 0x7d, (byte) 0x7e, (byte) 0x7f, (byte) 0x80, + (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84, + (byte) 0xfc, (byte) 0xfd, (byte) 0xfe, (byte) 0xff }; + + String latin1String = new String(bytes, "latin1"); + RecordingOutputStream ros = writeTestStream( + bytes, 1, "testSingleByteEncodings-latin1.txt", 0); + ReplayCharSequence rcs = getReplayCharSequence(ros, ISO_8859_1); + String result = rcs.toString(); + logger.fine("latin1[0] " + toHexString(latin1String)); + logger.fine("latin1[1] " + toHexString(result)); + assertEquals(result, latin1String, "latin1 strings don't match"); + + String w1252String = new String(bytes, "windows-1252"); + ros = writeTestStream( + bytes, 1, "testSingleByteEncodings-windows-1252.txt", 0); + rcs = getReplayCharSequence(ros,Charset.forName("windows-1252")); + result = rcs.toString(); + logger.fine("windows-1252[0] " + toHexString(w1252String)); + logger.fine("windows-1252[1] " + toHexString(result)); + assertEquals(result, w1252String, "windows-1252 strings don't match"); + + String asciiString = new String(bytes, StandardCharsets.US_ASCII); + ros = writeTestStream( + bytes, 1, "testSingleByteEncodings-ascii.txt", 0); + rcs = getReplayCharSequence(ros, StandardCharsets.US_ASCII); + result = rcs.toString(); + logger.fine("ascii[0] " + toHexString(asciiString)); + logger.fine("ascii[1] " + toHexString(result)); + assertEquals(result, asciiString, "ascii strings don't match"); + } + + @Test + public void testReplayCharSequenceByteToStringOverflow() throws IOException { + String fileContent = "Some file content. "; // ascii + byte [] buffer = fileContent.getBytes(UTF_8); + RecordingOutputStream ros = writeTestStream( + buffer,1, + "testReplayCharSequenceByteToStringOverflow.txt",1); + String expectedContent = fileContent+fileContent; + + // The string is ascii which is a subset of both these encodings. Use + // both encodings because they exercise different code paths. UTF-8 is + // decoded to UTF-16 while windows-1252 is memory mapped directly. See + // GenericReplayCharSequence + ReplayCharSequence rcsUtf8 = getReplayCharSequence(ros, UTF_8); + ReplayCharSequence rcs1252 = getReplayCharSequence(ros, Charset.forName("windows-1252")); + + String result = rcsUtf8.toString(); + assertEquals(expectedContent, result, "Strings don't match"); + + result = rcs1252.toString(); + assertEquals(expectedContent, result, "Strings don't match"); + } + + @Test + public void testReplayCharSequenceByteToStringMulti() throws IOException { + String fileContent = "Some file content"; + byte [] buffer = fileContent.getBytes(StandardCharsets.UTF_8); + final int MULTIPLICAND = 10; + StringBuilder sb = + new StringBuilder(MULTIPLICAND * fileContent.length()); + for (int i = 0; i < MULTIPLICAND; i++) { + sb.append(fileContent); + } + String expectedResult = sb.toString(); + RecordingOutputStream ros = writeTestStream( + buffer,1, + "testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1); + for (int i = 0; i < 3; i++) { + ReplayCharSequence rcs = getReplayCharSequence(ros, UTF_8); + String result = rcs.toString(); + assertEquals(result, expectedResult, "Strings don't match"); + rcs.close(); + System.gc(); + System.runFinalization(); + } + } + + @Test + @Disabled + public void xestHugeReplayCharSequence() throws IOException { + String fileContent = "01234567890123456789"; + byte[] buffer = fileContent.getBytes(US_ASCII); + + long reps = (long) Integer.MAX_VALUE / (long) buffer.length + 1000000l; + + logger.info("writing " + (reps * buffer.length) + + " bytes to testHugeReplayCharSequence.txt"); + RecordingOutputStream ros = writeTestStream(buffer, 0, + "testHugeReplayCharSequence.txt", reps); + ReplayCharSequence rcs = getReplayCharSequence(ros, US_ASCII); + + if (reps * fileContent.length() > (long) Integer.MAX_VALUE) { + assertEquals(Integer.MAX_VALUE, rcs.length(), "ReplayCharSequence has wrong length (length()=" + + rcs.length() + ") (should be " + Integer.MAX_VALUE + ")"); + } else { + assertEquals(rcs.length(), reps * (long) fileContent.length(), + "ReplayCharSequence has wrong length (length()=" + + rcs.length() + ") (should be " + + (reps * fileContent.length()) + ")"); + } + + // boundary cases or something + for (int index : new int[] { 0, rcs.length() / 4, rcs.length() / 2, + rcs.length() - 1, rcs.length() / 4 }) { + // logger.info("testing char at index=" + + // NumberFormat.getInstance().format(index)); + assertEquals(fileContent.charAt(index % fileContent.length()), + rcs.charAt(index), "Characters don't match (index=" + + NumberFormat.getInstance(Locale.ROOT).format(index) + ")"); + } + + // check that out of bounds indices throw exception + for (int n : new int[] { -1, Integer.MIN_VALUE, rcs.length() + 1 }) { + try { + String message = "rcs.charAt(" + n + ")=" + rcs.charAt(n) + + " ?!? -- expected IndexOutOfBoundsException"; + logger.severe(message); + fail(message); + } catch (IndexOutOfBoundsException e) { + logger.info("got expected exception: " + e); + } + } + + // check some characters at random spots & kinda stress test the + // system's memory mapping facility + Random rand = new Random(0); // seed so we get the same ones each time + for (int i = 0; i < 5000; i++) { + int index = rand.nextInt(rcs.length()); + // logger.info(i + ". testing char at index=" + + // NumberFormat.getInstance().format(index)); + assertEquals(fileContent.charAt(index % fileContent.length()), + rcs.charAt(index), "Characters don't match (index=" + + NumberFormat.getInstance(Locale.ROOT).format(index) + ")"); + } + } + + /** + * Accessing characters test. + * + * Checks that characters in the rcs are in sequence. + * + * @param rcs The ReplayCharSequence to try out. + */ + private void accessingCharacters(CharSequence rcs) { + long timestamp = (new Date()).getTime(); + int seeks = 0; + for (int i = (INCREMENT * 2); (i + INCREMENT) < rcs.length(); + i += INCREMENT) { + checkCharacter(rcs, i); + seeks++; + for (int j = i - INCREMENT; j < i; j++) { + checkCharacter(rcs, j); + seeks++; + } + } + // Note that printing out below breaks cruisecontrols drawing + // of the xml unit test results because it outputs disallowed + // xml characters. + logger.fine(rcs + " seeks count " + seeks + " in " + + ((new Date().getTime()) - timestamp) + " milliseconds."); + } + + /** + * Check the character read. + * + * Throws assertion if not expected result. + * + * @param rcs ReplayCharSequence to read from. + * @param i Character offset. + */ + private void checkCharacter(CharSequence rcs, int i) { + int c = rcs.charAt(i); + assertEquals((c % SEQUENCE_LENGTH), (i % SEQUENCE_LENGTH), "Character " + Integer.toString(c) + " at offset " + i + + " unexpected."); + } + + /** + * @param baseName + * @return RecordingOutputStream + * @throws IOException + */ + private RecordingOutputStream writeTestStream(byte[] content, + int memReps, String baseName, long fileReps) throws IOException { + String backingFilename = FileUtils.maybeRelative(tempDir,baseName).getAbsolutePath(); + RecordingOutputStream ros = new RecordingOutputStream( + content.length * memReps, + backingFilename); + ros.open(); + ros.markMessageBodyBegin(); + for(long i = 0; i < (memReps+fileReps); i++) { + // fill buffer (repeat MULTIPLIER times) and + // overflow to disk (also MULTIPLIER times) + ros.write(content); + } + ros.close(); + return ros; + } + + + /** + * Fill a buffer w/ regular progression of single-byte + * (and <= 127) characters. + * @param buffer Buffer to fill. + * @return The buffer we filled. + */ + private byte [] fillBufferWithRegularContent(byte [] buffer) { + int index = 0; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) (index & 0x00ff); + index++; + if (index >= SEQUENCE_LENGTH) { + // Reset the index. + index = 0; + } + } + return buffer; + } +} diff --git a/src/test/java/org/archive/io/RepositionableInputStreamTest.java b/src/test/java/org/archive/io/RepositionableInputStreamTest.java new file mode 100644 index 00000000..4aad11b9 --- /dev/null +++ b/src/test/java/org/archive/io/RepositionableInputStreamTest.java @@ -0,0 +1,79 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.io; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class RepositionableInputStreamTest { + private File testFile; + private static final String LINE = "0123456789abcdefghijklmnopqrstuv"; + @TempDir + File tempDir; + + @BeforeEach + protected void setUp() throws Exception { + this.testFile = new File(tempDir, this.getClass().getName()); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(testFile), UTF_8)); + for (int i = 0; i < 100; i++) { + pw.print(LINE); + } + pw.close(); + } + + @Test + public void testname() throws Exception { + // Make buffer awkward size so we run into buffers spanning issues. + RepositionableInputStream ris = + new RepositionableInputStream(new FileInputStream(this.testFile), + 57); + int c = ris.read(); + assertEquals(1, ris.position()); + ris.read(); + ris.position(0); + assertEquals(0, ris.position()); + int c1 = ris.read(); + assertEquals(c, c1); + ris.position(0); + byte [] bytes = new byte[LINE.length()]; + long offset = 0; + for (int i = 0; i < 10; i++) { + ris.read(bytes, 0, LINE.length()); + assertEquals(LINE, new String(bytes, UTF_8)); + offset += LINE.length(); + assertEquals(offset, ris.position()); + } + long p = ris.position(); + ris.position(p - LINE.length()); + assertEquals(p - LINE.length(), ris.position()); + c = ris.read(); + assertEquals(c, c1); + } +} diff --git a/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java new file mode 100644 index 00000000..25d5218e --- /dev/null +++ b/src/test/java/org/archive/io/arc/ARCReaderFactoryTest.java @@ -0,0 +1,60 @@ +package org.archive.io.arc; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.RandomAccessFile; + +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * + * Based on https://github.com/iipc/openwayback/pull/104/files + * + * @author csr@statsbiblioteket.dk (Colin Rosenthal) + * + */ +public class ARCReaderFactoryTest { + + private File testfile1 = new File("src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"); + + /** + * Test reading uncompressed arcfile for issue + * https://github.com/iipc/openwayback/issues/101 + * @throws Exception + */ + @Test + public void testGetResource() throws Exception { + this.offsetResourceTest(testfile1, 1515, "http://www.archive.org/robots.txt" ); + this.offsetResourceTest(testfile1, 36420, "http://www.archive.org/services/collection-rss.php" ); + } + + private void offsetResourceTest( File testfile, long offset, String uri ) throws Exception { + RandomAccessFile raf = new RandomAccessFile(testfile, "r"); + raf.seek(offset); + InputStream is = new FileInputStream(raf.getFD()); + String fPath = testfile.getAbsolutePath(); + ArchiveReader reader = ARCReaderFactory.get(fPath, is, false); + // This one works: + //ArchiveReader reader = ARCReaderFactory.get(testfile, offset); + ArchiveRecord record = reader.get(); + + final String url = record.getHeader().getUrl(); + assertEquals(uri, url, "URL of record is not as expected."); + + final long position = record.getPosition(); + final long recordLength = record.getHeader().getLength(); + assertTrue(position <= recordLength, "Position " + position + " is after end of record " + recordLength); + + // Clean up: + if( raf != null ) + raf.close(); + } + +} diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java new file mode 100644 index 00000000..f6820337 --- /dev/null +++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java @@ -0,0 +1,129 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io.arc; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.nio.file.Path; +import java.util.Arrays; + +import org.archive.io.WriterPool; +import org.archive.io.WriterPoolMember; +import org.archive.io.WriterPoolSettings; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.archive.format.arc.ARCConstants.*; + +/** + * Test ARCWriterPool + */ +@SuppressWarnings("deprecation") +public class ARCWriterPoolTest { + @TempDir + Path tempDir; + + @Test + public void testARCWriterPool() + throws Exception { + final int MAX_ACTIVE = 3; + final int MAX_WAIT_MILLISECONDS = 100; + WriterPool pool = new ARCWriterPool(getSettings(true), + MAX_ACTIVE, MAX_WAIT_MILLISECONDS); + WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; + final String CONTENT = "Any old content"; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(CONTENT.getBytes(UTF_8)); + for (int i = 0; i < MAX_ACTIVE; i++) { + writers[i] = pool.borrowFile(); + assertEquals(i + 1, pool.getNumActive(), "Number active"); + ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", + "0.0.0.0", 1234567890, CONTENT.length(), baos); + } + + // Pool is maxed out. New behavior is that additional requests + // block as long as necessary -- so no longer testing for timeout/ + // exception + + for (int i = (MAX_ACTIVE - 1); i >= 0; i--) { + pool.returnFile(writers[i]); + assertEquals(i, pool.getNumActive(), "Number active"); + assertEquals(MAX_ACTIVE - pool.getNumActive(), pool.getNumIdle(), + "Number idle"); + } + pool.close(); + } + + @Test + public void testInvalidate() throws Exception { + final int MAX_ACTIVE = 3; + final int MAX_WAIT_MILLISECONDS = 100; + WriterPool pool = new ARCWriterPool(getSettings(true), + MAX_ACTIVE, MAX_WAIT_MILLISECONDS); + WriterPoolMember [] writers = new WriterPoolMember[MAX_ACTIVE]; + final String CONTENT = "Any old content"; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(CONTENT.getBytes(UTF_8)); + for (int i = 0; i < MAX_ACTIVE; i++) { + writers[i] = pool.borrowFile(); + assertEquals(i + 1, pool.getNumActive(), "Number active"); + ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", + "0.0.0.0", 1234567890, CONTENT.length(), baos); + } + + WriterPoolMember writer2Invalidate = writers[pool.getNumActive() - 1]; + writers[pool.getNumActive() - 1] = null; + pool.invalidateFile(writer2Invalidate); + for (int i = 0; i < (MAX_ACTIVE - 1); i++) { + if (writers[i] == null) { + continue; + } + pool.returnFile(writers[i]); + } + + for (int i = 0; i < MAX_ACTIVE; i++) { + writers[i] = pool.borrowFile(); + assertEquals(i + 1, pool.getNumActive(), "Number active"); + ((ARCWriter)writers[i]).write("http://one.two.three", "no-type", + "0.0.0.0", 1234567890, CONTENT.length(), baos); + } + for (int i = (MAX_ACTIVE - 1); i >= 0; i--) { + pool.returnFile(writers[i]); + assertEquals(i, pool.getNumActive(), "Number active"); + assertEquals(MAX_ACTIVE - pool.getNumActive(), pool.getNumIdle(), + "Number idle"); + } + pool.close(); + } + + private WriterPoolSettings getSettings(final boolean isCompressed) { + File [] files = {tempDir.toFile()}; + return new WriterPoolSettingsData( + "TEST", + "${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}", + DEFAULT_MAX_ARC_FILE_SIZE, + isCompressed, + Arrays.asList(files), + null); + } +} diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java new file mode 100644 index 00000000..f6c48462 --- /dev/null +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -0,0 +1,713 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io.arc; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.util.Arrays; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.input.NullInputStream; +import org.apache.commons.io.output.NullOutputStream; +import org.archive.io.ArchiveRecord; +import org.archive.io.ArchiveRecordHeader; +import org.archive.io.ReplayInputStream; +import org.archive.io.WriterPoolMember; +import org.archive.io.WriterPoolSettings; +import org.archive.util.ArchiveUtils; + +import com.google.common.io.Closeables; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.*; + +import static org.archive.format.arc.ARCConstants.*; + +/** + * Test ARCWriter class. + * + * This code exercises ARCWriter AND ARCReader. First it writes ARCs w/ + * ARCWriter. Then it validates what was written w/ ARCReader. + * + * @author stack + */ +public class ARCWriterTest { + /** + * Utility class for writing bad ARCs (with trailing junk) + */ + public class CorruptibleARCWriter extends ARCWriter { + byte[] endJunk = null; + + public CorruptibleARCWriter(AtomicInteger serial_no, WriterPoolSettings settings) { + super(serial_no, settings); + } + + @Override + protected void postWriteRecordTasks() throws IOException { + if (endJunk != null) { + this.write(endJunk); + } + super.postWriteRecordTasks(); + } + + public void setEndJunk(byte[] b) throws IOException { + this.endJunk = b; + } + } + + /** + * Suffix to use for ARC files made by JUNIT. + */ + private static final String SUFFIX = "JUNIT"; + + private static final String SOME_URL = "http://www.archive.org/test/"; + + + private static final AtomicInteger SERIAL_NO = new AtomicInteger(); + + @TempDir + File tempDir; + + protected static String getContent(int index) { + return getContent(Integer.toString(index)); + } + + protected static String getContent() { + return getContent(null); + } + + protected static String getContent(String indexStr) { + String page = (indexStr != null)? "Page #" + indexStr: "Some Page"; + return "HTTP/1.1 200 OK\r\n" + + "Content-Type: text/html\r\n\r\n" + + "" + page + + "" + + "" + page + + ""; + } + + @SuppressWarnings("deprecation") + protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index) + throws IOException { + String indexStr = Integer.toString(index); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // Start the record with an arbitrary 14-digit date per RFC2540 + String now = ArchiveUtils.get14DigitDate(); + int recordLength = 0; + byte[] record = (getContent(indexStr)).getBytes(UTF_8); + recordLength += record.length; + baos.write(record); + // Add the newline between records back in + baos.write("\n".getBytes(UTF_8)); + recordLength += 1; + arcWriter.write("http://www.one.net/id=" + indexStr, "text/html", + "0.1.2.3", Long.parseLong(now), recordLength, baos); + return recordLength; + } + + private File writeRecords(String baseName, boolean compress, + long maxSize, int recordCount) + throws IOException { + File [] files = {tempDir}; + ARCWriter arcWriter = + new ARCWriter( + SERIAL_NO, + new WriterPoolSettingsData( + baseName, + "${prefix}-"+SUFFIX, + maxSize, + compress, + Arrays.asList(files), + null)); + assertNotNull(arcWriter); + for (int i = 0; i < recordCount; i++) { + writeRandomHTTPRecord(arcWriter, i); + } + arcWriter.close(); + assertTrue(arcWriter.getFile().exists(), + "Doesn't exist: " + + arcWriter.getFile().getAbsolutePath()); + return arcWriter.getFile(); + } + + private void validate(File arcFile, int recordCount) + throws FileNotFoundException, IOException { + ARCReader reader = ARCReaderFactory.get(arcFile); + assertNotNull(reader); + List metaDatas = null; + if (recordCount == -1) { + metaDatas = reader.validate(); + } else { + metaDatas = reader.validate(recordCount); + } + reader.close(); + // Now, run through each of the records doing absolute get going from + // the end to start. Reopen the arc so no context between this test + // and the previous. + + for (int i = metaDatas.size() - 1; i >= 0; i--) { + reader = ARCReaderFactory.get(arcFile); + ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i); + ArchiveRecord r = reader.get(meta.getOffset()); + String mimeType = r.getHeader().getMimetype(); + assertTrue(mimeType != null && mimeType.length() > 0, + "Record is bogus"); + reader.close(); + } + assertEquals(recordCount,metaDatas.size(), "Metadata count not as expected"); + for (Iterator i = metaDatas.iterator(); i.hasNext();) { + ARCRecordMetaData r = (ARCRecordMetaData)i.next(); + assertTrue(r.getLength() > 0, "Record is empty"); + } + } + + @Test + public void testCheckARCFileSize() + throws IOException { + runCheckARCFileSizeTest("checkARCFileSize", false); + } + + @Test + public void testCheckARCFileSizeCompressed() + throws IOException { + runCheckARCFileSizeTest("checkARCFileSize", true); + } + + @Test + public void testWriteRecord() throws IOException { + final int recordCount = 2; + File arcFile = writeRecords("writeRecord", false, + DEFAULT_MAX_ARC_FILE_SIZE, recordCount); + validate(arcFile, recordCount + 1); // Header record. + } + + @Test + public void testRandomAccess() throws IOException { + final int recordCount = 3; + File arcFile = writeRecords("writeRecord", true, + DEFAULT_MAX_ARC_FILE_SIZE, recordCount); + ARCReader reader = ARCReaderFactory.get(arcFile); + // Get to second record. Get its offset for later use. + boolean readFirst = false; + String url = null; + long offset = -1; + long totalRecords = 0; + boolean readSecond = false; + for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) { + ARCRecord ar = (ARCRecord)i.next(); + if (!readFirst) { + readFirst = true; + continue; + } + if (!readSecond) { + url = ar.getMetaData().getUrl(); + offset = ar.getMetaData().getOffset(); + readSecond = true; + } + } + reader.close(); + + reader = ARCReaderFactory.get(arcFile, offset); + ArchiveRecord ar = reader.get(); + assertEquals(ar.getHeader().getUrl(), url); + ar.close(); + reader.close(); + + // Get reader again. See how iterator works with offset + reader = ARCReaderFactory.get(arcFile, offset); + int count = 0; + for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { + count++; + } + reader.close(); + assertEquals(totalRecords - 1, count); + } + + @Test + public void testWriteRecordCompressed() throws IOException { + final int recordCount = 2; + File arcFile = writeRecords("writeRecordCompressed", true, + DEFAULT_MAX_ARC_FILE_SIZE, recordCount); + validate(arcFile, recordCount + 1 /*Header record*/); + } + + public void testWriteGiantRecord() throws IOException { + PrintStream dummyStream = new PrintStream(new NullOutputStream(), false, UTF_8.name()); + ARCWriter arcWriter = + new ARCWriter( + SERIAL_NO, + dummyStream, + new File("dummy"), + new WriterPoolSettingsData( + "", + "", + -1, + false, + null, + null)); + assertNotNull(arcWriter); + + // Start the record with an arbitrary 14-digit date per RFC2540 + long now = System.currentTimeMillis(); + long recordLength = org.apache.commons.io.FileUtils.ONE_GB * 3; + + arcWriter.write("dummy:uri", "application/octet-stream", + "0.1.2.3", now, recordLength, new NullInputStream(recordLength)); + arcWriter.close(); + } + + private void runCheckARCFileSizeTest(String baseName, boolean compress) + throws FileNotFoundException, IOException { + File f = writeRecords(baseName, compress, 1024, 15); + validate(f, 15+1); + } + + protected CorruptibleARCWriter createARCWriter(String name, boolean compress) { + File [] files = {tempDir}; + return new CorruptibleARCWriter( + SERIAL_NO, + new WriterPoolSettingsData( + name, + "${prefix}-"+SUFFIX, + DEFAULT_MAX_ARC_FILE_SIZE, + compress, + Arrays.asList(files), + null)); + } + + protected static ByteArrayInputStream getBais(String str) + throws IOException { + return new ByteArrayInputStream(str.getBytes(UTF_8)); + } + + /** + * Writes a record, suppressing normal length-checks (so that + * intentionally malformed records may be written). + */ + protected static void writeRecord(ARCWriter writer, String url, + String type, int len, ByteArrayInputStream bais) + throws IOException { + writer.write(url, type, "192.168.1.1", (new Date()).getTime(), len, + bais, false); + } + + protected int iterateRecords(ARCReader r) + throws IOException { + int count = 0; + for (Iterator i = r.iterator(); i.hasNext();) { + ARCRecord rec = (ARCRecord)i.next(); + rec.close(); + if (count != 0) { + assertTrue(rec.getMetaData().getUrl().startsWith(SOME_URL), + "Unexpected URL " + rec.getMetaData().getUrl()); + } + count++; + } + return count; + } + + protected CorruptibleARCWriter createArcWithOneRecord(String name, + boolean compressed) + throws IOException { + CorruptibleARCWriter writer = createARCWriter(name, compressed); + String content = getContent(); + writeRecord(writer, SOME_URL, "text/html", + content.length(), getBais(content)); + return writer; + } + + @Test + public void testSpaceInURL() { + String eMessage = null; + try { + holeyUrl("testSpaceInURL", false, " "); + } catch (IOException e) { + eMessage = e.getMessage(); + } + assertTrue(eMessage.startsWith("Metadata line doesn't match"), + "Didn't get expected exception: " + eMessage); + } + + @Test + public void testTabInURL() { + String eMessage = null; + try { + holeyUrl("testTabInURL", false, "\t"); + } catch (IOException e) { + eMessage = e.getMessage(); + } + assertTrue(eMessage.startsWith("Metadata line doesn't match"), + "Didn't get expected exception: " + eMessage); + } + + protected void holeyUrl(String name, boolean compress, String urlInsert) + throws IOException { + ARCWriter writer = null; + try { + writer = createArcWithOneRecord(name, compress); + // Add some bytes on the end to mess up the record. + String content = getContent(); + writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html", + content.length(), getBais(content)); + } finally { + Closeables.close(writer, true); + } + } + +// If uncompressed, length has to be right or parse will fail. +// +// public void testLengthTooShort() throws IOException { +// lengthTooShort("testLengthTooShort-" + PREFIX, false); +// } + + @Test + public void testLengthTooShortCompressed() throws IOException { + lengthTooShort("testLengthTooShortCompressed", true, false); + } + + @Test + public void testLengthTooShortCompressedStrict() + throws IOException { + String eMessage = null; + try { + lengthTooShort("testLengthTooShortCompressedStrict", + true, true); + } catch (RuntimeException e) { + eMessage = e.getMessage(); + } + assertTrue(eMessage.startsWith("java.io.IOException: Record STARTING at"), + "Didn't get expected exception: " + eMessage); + } + + protected void lengthTooShort(String name, boolean compress, boolean strict) + throws IOException { + CorruptibleARCWriter writer = null; + try { + writer = createArcWithOneRecord(name, compress); + // Add some bytes on the end to mess up the record. + String content = getContent(); + ByteArrayInputStream bais = getBais(content+"SOME TRAILING BYTES"); + writeRecord(writer, SOME_URL, "text/html", + content.length(), bais); + writer.setEndJunk("SOME TRAILING BYTES".getBytes(UTF_8)); + writeRecord(writer, SOME_URL, "text/html", + content.length(), getBais(content)); + } finally { + Closeables.close(writer, true); + } + + // Catch System.err into a byte stream. + ByteArrayOutputStream os = new ByteArrayOutputStream(); + PrintStream origErr = System.err; + ARCReader r = null; + try { + System.setErr(new PrintStream(os, false, UTF_8.name())); + + r = ARCReaderFactory.get(writer.getFile()); + r.setStrict(strict); + int count = iterateRecords(r); + assertTrue(count == 4, "Count wrong " + count); + + // Make sure we get the warning string which complains about the + // trailing bytes. + String err = os.toString(UTF_8.name()); + assertTrue(err.startsWith("WARNING") && + (err.indexOf("Record STARTING at") > 0), "No message " + err); + r.close(); + } finally { + Closeables.close(r, true); + System.setErr(origErr); + } + } + +// If uncompressed, length has to be right or parse will fail. +// +// public void testLengthTooLong() +// throws IOException { +// lengthTooLong("testLengthTooLongCompressed-" + PREFIX, +// false, false); +// } + + @Test + public void testLengthTooLongCompressed() + throws IOException { + lengthTooLong("testLengthTooLongCompressed", + true, false); + } + + @Test + public void testLengthTooLongCompressedStrict() { + String eMessage = null; + try { + lengthTooLong("testLengthTooLongCompressed", + true, true); + } catch (IOException e) { + eMessage = e.getMessage(); + } + assertTrue(eMessage.startsWith("Premature EOF before end-of-record"), + "Didn't get expected exception: " + eMessage); + } + + protected void lengthTooLong(String name, boolean compress, + boolean strict) + throws IOException { + ARCWriter writer = createArcWithOneRecord(name, compress); + // Add a record with a length that is too long. + String content = getContent(); + writeRecord(writer, SOME_URL+"2", "text/html", + content.length() + 10, getBais(content)); + writeRecord(writer, SOME_URL+"3", "text/html", + content.length(), getBais(content)); + writer.close(); + + // Catch System.err. + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + PrintStream origErr = System.err; + ARCReader r = null; + try { + System.setErr(new PrintStream(os, false, UTF_8.name())); + + r = ARCReaderFactory.get(writer.getFile()); + r.setStrict(strict); + int count = iterateRecords(r); + assertTrue(count == 4, "Count wrong " + count); + + // Make sure we get the warning string which complains about the + // trailing bytes. + String err = os.toString(UTF_8.name()); + assertTrue(err.startsWith("WARNING Premature EOF before end-of-record"), + "No message " + err); + } finally { + Closeables.close(r, true); + System.setErr(origErr); + } + } + + @Test + public void testGapError() throws IOException { + ARCWriter writer = createArcWithOneRecord("testGapError", true); + String content = getContent(); + // Make a 'weird' RIS that returns bad 'remaining' length + // awhen remaining should be 0 + ReplayInputStream ris = new ReplayInputStream(content.getBytes(UTF_8), + content.length(), null) { + public long remaining() { + return (super.remaining()==0) ? -1 : super.remaining(); + } + }; + String message = null; + try { + writer.write(SOME_URL, "text/html", "192.168.1.1", + (new Date()).getTime(), content.length(), ris); + } catch (IOException e) { + message = e.getMessage(); + } finally { + IOUtils.closeQuietly(ris); + } + writer.close(); + assertTrue(message != null && + message.indexOf("Gap between expected and actual") >= 0, + "No gap when should be"); + } + + /** + * Write an arc file for other tests to use. + * @param arcdir Directory to write to. + * @param compress True if file should be compressed. + * @return ARC written. + * @throws IOException + */ + public static File createARCFile(File arcdir, boolean compress) + throws IOException { + File [] files = {arcdir}; + ARCWriter writer = new ARCWriter(SERIAL_NO, + new WriterPoolSettingsData( + "", + "test", + DEFAULT_MAX_ARC_FILE_SIZE, + compress, + Arrays.asList(files), + null)); + String content = getContent(); + writeRecord(writer, SOME_URL, "text/html", content.length(), + getBais(content)); + writer.close(); + return writer.getFile(); + } + +// public void testSpeed() throws IOException { +// ARCWriter writer = createArcWithOneRecord("speed", true); +// // Add a record with a length that is too long. +// String content = getContent(); +// final int count = 100000; +// logger.info("Starting speed write of " + count + " records."); +// for (int i = 0; i < count; i++) { +// writeRecord(writer, SOME_URL, "text/html", content.length(), +// getBaos(content)); +// } +// writer.close(); +// logger.info("Finished speed write test."); +// } + + @Test + public void testValidateMetaLine() throws Exception { + final String line = "http://www.aandw.net/images/walden2.png " + + "128.197.34.86 20060111174224 image/png 2160"; + ARCWriter w = createARCWriter("testValidateMetaLine", true); + try { + w.validateMetaLine(line); + w.validateMetaLine(line + LINE_SEPARATOR); + w.validateMetaLine(line + "\\r\\n"); + } finally { + w.close(); + } + } + + @Test + public void testArcRecordOffsetReads() throws Exception { + ARCReader r = getSingleRecordReader("testArcRecordInBufferStream"); + ARCRecord ar = getSingleRecord(r); + // Now try getting some random set of bytes out of it + // at an odd offset (used to fail because we were + // doing bad math to find where in buffer to read). + final byte[] buffer = new byte[17]; + final int maxRead = 4; + int totalRead = 0; + while (totalRead < maxRead) { + totalRead = totalRead + + ar.read(buffer, 13 + totalRead, maxRead - totalRead); + assertTrue(totalRead > 0); + } + r.close(); + } + + // available should always be >= 0; extra read()s should all give EOF + @Test + public void testArchiveRecordAvailableConsistent() throws Exception { + // first test reading byte-at-a-time via no-param read() + ARCReader r = getSingleRecordReader("testArchiveRecordAvailableConsistent"); + ARCRecord record = getSingleRecord(r); + int c = record.read(); + while(c>=0) { + c = record.read(); + } + // consecutive reads after EOR should always give -1, still show zero available() + for (int i=0; i<5; i++) { + assertTrue(record.available()>=0, "available negative:"+record.available()); + assertEquals(-1, record.read()); + } + r.close(); + } + + // should always give -1 on repeated reads past EOR + @Test + public void testArchiveRecordEORConsistent() throws Exception { + ARCReader r = getSingleRecordReader("testArchiveRecordEORConsistent"); + ARCRecord record = getSingleRecord(r); + this.readToEOS(record); + // consecutive reads after EOR should always give -1 + for (int i=0; i<5; i++) { + assertEquals(-1, record.read(new byte[1])); + } + r.close(); + } + + // should not throw premature EOF when wrapped with BufferedInputStream + // [HER-1450] showed this was the case using Apache Tika + @Test + public void testArchiveRecordMarkSupport() throws Exception { + ARCReader r = getSingleRecordReader("testArchiveRecordMarkSupport"); + ARCRecord record = getSingleRecord(r); + record.setStrict(true); + // ensure mark support + InputStream stream = new BufferedInputStream(record); + if (stream.markSupported()) { + for (int i=0; i<3; i++) { + this.readToEOS(stream); + stream.mark(stream.available()); + stream.reset(); + } + stream.close(); + } + r.close(); + } + + /** + * Test a particular style of using the reader iterator. (Should + * possibly be on a reader-centric test class, but the best setup + * functionality is here.) + * + * @throws IOException + */ + @Test + public void testReadIterator() throws IOException { + final int recordCount = 3; + File arcFile = writeRecords("writeRecord", true, + DEFAULT_MAX_ARC_FILE_SIZE, recordCount); + ARCReader reader = ARCReaderFactory.get(arcFile); + Iterator it = reader.iterator(); + while (it.hasNext()) { + ArchiveRecord next = it.next(); + next.close(); + } + reader.close(); + } + + protected void readToEOS(InputStream in) throws Exception { + byte [] buf = new byte[1024]; + int read = 0; + while (read >= 0) { + read = in.read(buf); + // System.out.println("readToEOS read " + read + " bytes"); + } + } + + protected ARCReader getSingleRecordReader(String name) throws Exception { + // Get an ARC with one record. + WriterPoolMember w = createArcWithOneRecord(name, true); + w.close(); + // Get reader on said ARC. + ARCReader r = ARCReaderFactory.get(w.getFile()); + return r; + } + + protected ARCRecord getSingleRecord(ARCReader r) { + final Iterator i = r.iterator(); + // Skip first ARC meta record. + i.next(); + i.hasNext(); + // Now we're at first and only record in ARC. + return (ARCRecord) i.next(); + } +} diff --git a/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java new file mode 100644 index 00000000..c6617559 --- /dev/null +++ b/src/test/java/org/archive/io/warc/WARCReaderFactoryTest.java @@ -0,0 +1,37 @@ +package org.archive.io.warc; + +import java.io.FileInputStream; +import java.io.IOException; + +import org.archive.format.warc.WARCConstants; +import org.archive.format.warc.WARCConstants.WARCRecordType; +import org.archive.io.ArchiveReader; +import org.archive.io.ArchiveRecord; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class WARCReaderFactoryTest { + + // Test files: + String[] files = new String[] { + "src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz", + "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc" + }; + + @Test + public void testGetStringInputstreamBoolean() throws IOException { + // Check the test files can be opened: + for( String file : files ) { + FileInputStream is = new FileInputStream(file); + ArchiveReader ar = WARCReaderFactory.get(file, is, true); + ArchiveRecord r = ar.get(); + String type = (String) r.getHeader().getHeaderValue(WARCConstants.HEADER_KEY_TYPE); + // Check the first record comes out as a 'warcinfo' record. + assertEquals(WARCRecordType.warcinfo.name(), type); + } + } + + +} diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java new file mode 100644 index 00000000..d2684fa4 --- /dev/null +++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java @@ -0,0 +1,529 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.io.warc; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; + +import org.archive.io.ArchiveRecord; +import org.archive.io.ArchiveRecordHeader; +import org.archive.io.UTF8Bytes; +import org.archive.io.WriterPoolMember; +import org.archive.uid.RecordIDGenerator; +import org.archive.uid.UUIDGenerator; +import org.archive.util.ArchiveUtils; +import org.archive.util.anvl.ANVLRecord; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.*; + +import static org.archive.format.warc.WARCConstants.*; + +/** + * Test Writer and Reader. + * @author stack + * @version $Date: 2006-08-29 19:35:48 -0700 (Tue, 29 Aug 2006) $ $Version$ + */ +public class WARCWriterTest { + + private static final AtomicInteger SERIAL_NO = new AtomicInteger(); + + RecordIDGenerator generator = new UUIDGenerator(); + + /** + * Prefix to use for ARC files made by JUNIT. + */ + private static final String SUFFIX = "JUNIT"; + + private static final String SOME_URL = "http://www.archive.org/test/"; + + @TempDir + File tempDir; + + @SuppressWarnings("unchecked") + @Test + public void testCheckHeaderLineValue() throws Exception { + WARCWriter writer = new WARCWriter( + SERIAL_NO, + new WARCWriterPoolSettingsData( + "","test",1,false,Collections.EMPTY_LIST,Collections.EMPTY_LIST,generator)); + writer.checkHeaderValue("one"); + IllegalArgumentException exception = null; + try { + writer.checkHeaderValue("with space"); + } catch(IllegalArgumentException e) { + exception = e; + } + assertNotNull(exception); + exception = null; + try { + writer.checkHeaderValue("with\0x0000controlcharacter"); + } catch(IllegalArgumentException e) { + exception = e; + } + writer.close(); + assertNotNull(exception); + } + + @SuppressWarnings("unchecked") + @Test + public void testMimetypes() throws IOException { + WARCWriter writer = new WARCWriter(SERIAL_NO, + new WARCWriterPoolSettingsData( + "m","testM",1,false,Collections.EMPTY_LIST,Collections.EMPTY_LIST,generator)); + writer.checkHeaderLineMimetypeParameter("text/xml"); + writer.checkHeaderLineMimetypeParameter("text/xml+rdf"); + assertEquals("text/plain; charset=SHIFT-JIS", writer.checkHeaderLineMimetypeParameter( + "text/plain; charset=SHIFT-JIS")); + assertEquals("multipart/mixed; boundary=\"simple boundary\"", + writer.checkHeaderLineMimetypeParameter( + "multipart/mixed; \r\n boundary=\"simple boundary\"")); + } + + @Test + public void testWriteRecord() throws IOException { + File [] files = {tempDir}; + + // Write uncompressed. + WARCWriter writer = + new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( + this.getClass().getName(), "templateWR1", -1, false, Arrays.asList(files), null, generator)); + + writeFile(writer); + writer.close(); + + // Write compressed. + writer = new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( + this.getClass().getName(), "templateWR2", -1, true, Arrays.asList(files), null, generator)); + + writeFile(writer); + writer.close(); + } + + private void writeFile(final WARCWriter writer) + throws IOException { + try { + writeWarcinfoRecord(writer); + writeBasicRecords(writer); + } finally { + writer.close(); + writer.getFile().delete(); + } + } + + private void writeWarcinfoRecord(WARCWriter writer) + throws IOException { + WARCRecordInfo recordInfo = new WARCRecordInfo(); + recordInfo.setType(WARCRecordType.warcinfo); + recordInfo.setUrl(null); + recordInfo.setCreate14DigitDate(ArchiveUtils.getLog14Date()); + recordInfo.setMimetype(ANVLRecord.MIMETYPE); + recordInfo.setExtraHeaders(null); + recordInfo.setEnforceLength(true); + + ANVLRecord meta = new ANVLRecord(); + meta.addLabelValue("size", "1G"); + meta.addLabelValue("operator", "igor"); + byte [] bytes = meta.getUTF8Bytes(); + recordInfo.setContentStream(new ByteArrayInputStream(bytes)); + recordInfo.setContentLength((long) bytes.length); + + final URI recordid = writer.generateRecordId(TYPE, WARCRecordType.warcinfo.toString()); + recordInfo.setRecordId(recordid); + + writer.writeRecord(recordInfo); + } + + protected void writeBasicRecords(final WARCWriter writer) + throws IOException { + WARCRecordInfo recordInfo = new WARCRecordInfo(); + recordInfo.setType(WARCRecordType.metadata); + recordInfo.setUrl("http://www.archive.org/"); + recordInfo.setCreate14DigitDate(ArchiveUtils.get14DigitDate()); + recordInfo.setMimetype("no/type"); + recordInfo.setEnforceLength(true); + + ANVLRecord headerFields = new ANVLRecord(); + headerFields.addLabelValue("x", "y"); + headerFields.addLabelValue("a", "b"); + recordInfo.setExtraHeaders(headerFields); + + URI rid = (new UUIDGenerator()).getQualifiedRecordID(TYPE, WARCRecordType.metadata.toString()); + recordInfo.setRecordId(rid); + + final String content = "Any old content."; + for (int i = 0; i < 10; i++) { + String body = i + ". " + content; + byte [] bodyBytes = body.getBytes(UTF8Bytes.UTF8); + recordInfo.setContentStream(new ByteArrayInputStream(bodyBytes)); + recordInfo.setContentLength((long)bodyBytes.length); + writer.writeRecord(recordInfo); + } + } + + /** + * @return Generic HTML Content. + */ + protected static String getContent() { + return getContent(null); + } + + /** + * @return Generic HTML Content with mention of passed indexStr + * in title and body. + */ + protected static String getContent(String indexStr) { + String page = (indexStr != null)? "Page #" + indexStr: "Some Page"; + return "HTTP/1.1 200 OK\r\n" + + "Content-Type: text/html\r\n\r\n" + + "" + page + + "" + + "" + page + + ""; + } + + /** + * Write random HTML Record. + * @param w Where to write. + * @param index An index to put into content. + * @return Length of record written. + * @throws IOException + */ + protected int writeRandomHTTPRecord(WARCWriter w, int index) + throws IOException { + WARCRecordInfo recordInfo = new WARCRecordInfo(); + recordInfo.setType(WARCRecordType.resource); + recordInfo.setCreate14DigitDate(ArchiveUtils.get14DigitDate()); + recordInfo.setMimetype("text/html; charset=UTF-8"); + recordInfo.setRecordId(w.generateRecordId(null)); + recordInfo.setEnforceLength(true); + + String indexStr = Integer.toString(index); + recordInfo.setUrl("http://www.one.net/id=" + indexStr); + + byte[] record = (getContent(indexStr)).getBytes(UTF_8); + recordInfo.setContentLength((long) record.length); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(record); + recordInfo.setContentStream(new ByteArrayInputStream(baos.toByteArray())); + + // Add named fields for ip, checksum, and relate the metadata + // and request to the resource field. + recordInfo.addExtraHeader(NAMED_FIELD_IP_LABEL, "127.0.0.1"); + + w.writeRecord(recordInfo); + return record.length; + } + + /** + * Fill a WARC with HTML Records. + * @param baseName WARC basename. + * @param compress Whether to compress or not. + * @param maxSize Maximum WARC size. + * @param recordCount How many records. + * @return The written file. + * @throws IOException + */ + private File writeRecords(String baseName, boolean compress, + int maxSize, int recordCount) + throws IOException { + File [] files = {tempDir}; + WARCWriter w = new WARCWriter(SERIAL_NO, new WARCWriterPoolSettingsData( + baseName + '-' + SUFFIX, "${prefix}", maxSize, compress, Arrays.asList(files), null, generator)); + + assertNotNull(w); + for (int i = 0; i < recordCount; i++) { + writeRandomHTTPRecord(w, i); + } + w.close(); + assertTrue(w.getFile().exists(), + "Doesn't exist: " + w.getFile().getAbsolutePath()); + return w.getFile(); + } + + /** + * Run validation of passed file. + * @param f File to validate. + * @param recordCount Expected count of records. + * @throws FileNotFoundException + * @throws IOException + */ + private void validate(File f, int recordCount) + throws FileNotFoundException, IOException { + WARCReader reader = WARCReaderFactory.get(f); + assertNotNull(reader); + List headers = null; + if (recordCount == -1) { + headers = reader.validate(); + } else { + headers = reader.validate(recordCount); + } + reader.close(); + + // Now, run through each of the records doing absolute get going from + // the end to start. Reopen the arc so no context between this test + // and the previous. + + for (int i = headers.size() - 1; i >= 0; i--) { + reader = WARCReaderFactory.get(f); + ArchiveRecordHeader h = (ArchiveRecordHeader)headers.get(i); + ArchiveRecord r = reader.get(h.getOffset()); + String mimeType = r.getHeader().getMimetype(); + assertTrue(mimeType != null && mimeType.length() > 0, + "Record is bogus"); + reader.close(); + } + + assertTrue(headers.size() == recordCount, "Metadatas not equal"); + for (Iterator i = headers.iterator(); i.hasNext();) { + ArchiveRecordHeader r = (ArchiveRecordHeader)i.next(); + assertTrue(r.getLength() > 0, "Record is empty"); + } + } + + @Test + public void testWriteRecords() throws IOException { + final int recordCount = 2; + File f = writeRecords("writeRecords", false, DEFAULT_MAX_WARC_FILE_SIZE, + recordCount); + validate(f, recordCount + 1); // Header record. + } + + @Test + public void testRandomAccess() throws IOException { + final int recordCount = 3; + File f = writeRecords("randomAccess", true, DEFAULT_MAX_WARC_FILE_SIZE, + recordCount); + WARCReader reader = WARCReaderFactory.get(f); + // Get to second record. Get its offset for later use. + boolean readFirst = false; + String url = null; + long offset = -1; + long totalRecords = 0; + boolean readSecond = false; + for (final Iterator i = reader.iterator(); i.hasNext(); + totalRecords++) { + WARCRecord ar = (WARCRecord)i.next(); + if (!readFirst) { + readFirst = true; + continue; + } + if (!readSecond) { + url = ar.getHeader().getUrl(); + offset = ar.getHeader().getOffset(); + readSecond = true; + } + } + reader.close(); + + reader = WARCReaderFactory.get(f, offset); + ArchiveRecord ar = reader.get(); + assertEquals(ar.getHeader().getUrl(), url); + ar.close(); + reader.close(); + + // Get reader again. See how iterator works with offset + reader = WARCReaderFactory.get(f, offset); + int count = 0; + for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) { + count++; + } + reader.close(); + assertEquals(totalRecords - 1, count); + } + + @Test + public void testWriteRecordCompressed() throws IOException { + final int recordCount = 2; + File arcFile = writeRecords("writeRecordCompressed", true, + DEFAULT_MAX_WARC_FILE_SIZE, recordCount); + validate(arcFile, recordCount + 1 /*Header record*/); + } + + protected WARCWriter createWARCWriter(String name, + boolean compress) { + File [] files = {tempDir}; + return new WARCWriter(SERIAL_NO, + new WARCWriterPoolSettingsData( + name, + "${prefix}-"+SUFFIX, + DEFAULT_MAX_WARC_FILE_SIZE, + compress, + Arrays.asList(files), + null, + generator)); + } + + protected static ByteArrayOutputStream getBaos(String str) + throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + baos.write(str.getBytes(UTF_8)); + return baos; + } + + protected static void writeRecord(WARCWriter w, String url, + String mimetype, int len, ByteArrayOutputStream baos) + throws IOException { + WARCRecordInfo recordInfo = new WARCRecordInfo(); + recordInfo.setType(WARCRecordType.resource); + recordInfo.setUrl(url); + recordInfo.setCreate14DigitDate(ArchiveUtils.get14DigitDate()); + recordInfo.setMimetype(mimetype); + recordInfo.setRecordId(w.generateRecordId(null)); + recordInfo.setExtraHeaders(null); + recordInfo.setContentStream(new ByteArrayInputStream(baos.toByteArray())); + recordInfo.setContentLength((long) len); + recordInfo.setEnforceLength(true); + + w.writeRecord(recordInfo); + } + + protected int iterateRecords(WARCReader r) + throws IOException { + int count = 0; + for (Iterator i = r.iterator(); i.hasNext();) { + ArchiveRecord ar = i.next(); + ar.close(); + if (count != 0) { + assertTrue(ar.getHeader().getUrl().equals(SOME_URL), + "Unexpected URL " + ar.getHeader().getUrl()); + } + count++; + } + return count; + } + + protected WARCWriter createWithOneRecord(String name, + boolean compressed) + throws IOException { + WARCWriter writer = createWARCWriter(name, compressed); + String content = getContent(); + writeRecord(writer, SOME_URL, "text/html", + content.length(), getBaos(content)); + return writer; + } + + @Test + public void testSpaceInURL() throws IOException { + long bytesWritten = holeyUrl("testSpaceInURL", false, " "); + assertEquals(0,bytesWritten,"Unexpected successful writing occurred"); + } + + @Test + public void testTabInURL() throws IOException { + long bytesWritten = holeyUrl("testTabInURL", false, "\t"); + assertEquals(0,bytesWritten,"Unexpected successful writing occurred"); + } + + protected long holeyUrl(String name, boolean compress, String urlInsert) + throws IOException { + WARCWriter writer = createWithOneRecord(name, compress); + // Add some bytes on the end to mess up the record. + long startPos = writer.getPosition(); + String content = getContent(); + ByteArrayOutputStream baos = getBaos(content); + writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html", + content.length(), baos); + long endPos = writer.getPosition(); + writer.close(); + return endPos-startPos; + } + + /** + * Write an arc file for other tests to use. + * @param arcdir Directory to write to. + * @param compress True if file should be compressed. + * @return ARC written. + * @throws IOException + */ + public static File createWARCFile(File arcdir, boolean compress) + throws IOException { + File [] files = {arcdir}; + WARCWriter writer = + new WARCWriter(SERIAL_NO, + new WARCWriterPoolSettingsData( + "", + "test", + DEFAULT_MAX_WARC_FILE_SIZE, + compress, + Arrays.asList(files), + null, + new UUIDGenerator())); + String content = getContent(); + writeRecord(writer, SOME_URL, "text/html", content.length(), + getBaos(content)); + writer.close(); + return writer.getFile(); + } + +// public void testSpeed() throws IOException { +// ARCWriter writer = createArcWithOneRecord("speed", true); +// // Add a record with a length that is too long. +// String content = getContent(); +// final int count = 100000; +// logger.info("Starting speed write of " + count + " records."); +// for (int i = 0; i < count; i++) { +// writeRecord(writer, SOME_URL, "text/html", content.length(), +// getBaos(content)); +// } +// writer.close(); +// logger.info("Finished speed write test."); +// } + + @Test + public void testArcRecordOffsetReads() throws Exception { + // Get an ARC with one record. + WriterPoolMember w = + createWithOneRecord("testArcRecordInBufferStream", true); + w.close(); + // Get reader on said ARC. + WARCReader r = WARCReaderFactory.get(w.getFile()); + final Iterator i = r.iterator(); + // Skip first ARC meta record. + ArchiveRecord ar = i.next(); + i.hasNext(); + // Now we're at first and only record in ARC. + ar = (WARCRecord) i.next(); + // Now try getting some random set of bytes out of it + // at an odd offset (used to fail because we were + // doing bad math to find where in buffer to read). + final byte[] buffer = new byte[17]; + final int maxRead = 4; + int totalRead = 0; + while (totalRead < maxRead) { + totalRead = totalRead + + ar.read(buffer, 13 + totalRead, maxRead - totalRead); + assertTrue(totalRead > 0); + } + } +} diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index b88acb6d..96b6772c 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -19,14 +19,15 @@ package org.archive.net; -import java.io.PrintWriter; -import java.io.StringWriter; +import java.io.*; import java.util.ArrayList; import java.util.regex.Matcher; - -import junit.framework.TestCase; +import java.util.regex.Pattern; import org.archive.net.PublicSuffixes.Node; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches @@ -34,9 +35,11 @@ * * @author gojomo */ -public class PublicSuffixesTest extends TestCase { +public class PublicSuffixesTest { // test of low level implementation - + private final String NL = System.getProperty("line.separator"); + + @Test public void testCompare() { Node n = new Node("hoge"); assertTrue(n.compareTo('a') > 0); @@ -45,7 +48,7 @@ public void testCompare() { assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(-1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node("*,"); assertEquals(1, n.compareTo('a')); assertEquals(0, n.compareTo('*')); @@ -53,7 +56,7 @@ public void testCompare() { assertEquals(0, n.compareTo(new Node("*,"))); assertEquals(1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node("!hoge"); assertEquals(1, n.compareTo('a')); assertEquals(-1, n.compareTo('*')); @@ -61,133 +64,221 @@ public void testCompare() { assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(0, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); - + n = new Node(""); assertEquals(1, n.compareTo('a')); assertEquals(1, n.compareTo('*')); assertEquals(1, n.compareTo('!')); assertEquals(0, n.compareTo(new Node(""))); } - + protected String dump(Node alt) { StringWriter w = new StringWriter(); PublicSuffixes.dump(alt, 0, new PrintWriter(w)); return w.toString(); } + + @Test public void testTrie1() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); } + + @Test public void testTrie2() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); } + @Test public void testTrie3() { Node alt = new Node(null, new ArrayList()); alt.addBranch("ac,"); alt.addBranch("ac,!hoge,"); alt.addBranch("ac,*,"); // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); + } + + @Test + public void testTrie4() { + StringBuilder sb = new StringBuilder(); + sb.append("us-east-1.amazonaws.com\n"); + sb.append("execute-api.us-east-1.amazonaws.com\n"); + // Test regex build ordering of branches. Second entry is a superset of the first + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:com,amazonaws,us-east-1,(?:execute-api,|)|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + } + + @Test + public void testTrie5() { + StringBuilder sb = new StringBuilder(); + sb.append("execute-api.us-east-1.amazonaws.com\n"); + sb.append("us-east-1.amazonaws.com\n"); + // Test regex build ordering of branches. Second entry is a proper subset of the first + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:com,amazonaws,us-east-1,(?:execute-api,|)|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + } + @Test + public void testTrie6() { + StringBuilder sb = new StringBuilder(); + sb.append("va.it\n"); + sb.append("val-daosta.it\n"); + sb.append("vald-aosta.it\n"); + sb.append("valled-aosta.it\n"); + sb.append("vallée-aoste.it\n"); + // Test input that breaks without proper unicode handling. + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:it,va(?:,|l(?:-daosta,|d-aosta,|l(?:ed-aosta,|ée-aoste,)))|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + + Matcher m = Pattern.compile(regex).matcher(""); + matchPrefix("it,va,example","it,va,", m); + matchPrefix("it,va,","it,va,", m); + matchPrefix("it,val-daosta,www","it,val-daosta,", m); + matchPrefix("it,val-daosta,","it,val-daosta,", m); + matchPrefix("it,vald-aosta,www","it,vald-aosta,", m); + matchPrefix("it,vald-aosta,","it,vald-aosta,", m); + matchPrefix("it,valled-aosta,www","it,valled-aosta,", m); + matchPrefix("it,valled-aosta,","it,valled-aosta,", m); + matchPrefix("it,vallze-aoste,","it,vallze-aoste,", m); + matchPrefix("it,vallze-aoste,www,222","it,vallze-aoste,", m); + } + @Test + public void testTrie7() { + StringBuilder sb = new StringBuilder(); + sb.append("*.fk\n"); + sb.append("com.fm\n"); + sb.append("edu.fm\n"); + sb.append("fm\n"); + // Test condition that generates duplicate branches f -> m, + + StringReader reader = new StringReader(sb.toString()); + String regex = PublicSuffixes.getTopmostAssignedSurtPrefixRegex(new BufferedReader(reader)); + assertEquals("(?ix)^\n" + + "(?:f(?:k,[-\\w\\u00C0-\\u017F]+,|m,(?:com,|edu,))|[-\\w\\u00C0-\\u017F]+,)\n" + + "([-\\w\\u00C0-\\u017F]+,)", regex); + + Matcher m = Pattern.compile(regex).matcher(""); + matchPrefix("fm,edu,www","fm,edu,", m); + matchPrefix("fm,edu,","fm,edu,", m); + matchPrefix("fm,example,www","fm,example,", m); + matchPrefix("fm,example,","fm,example,", m); } // test of higher-level functionality - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() .matcher(""); + @Test public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("com,example,www,", "com,example,", m); + matchPrefix("com,example,", "com,example,", m); + matchPrefix("org,archive,www,", "org,archive,", m); + matchPrefix("org,archive,", "org,archive,", m); + matchPrefix("fr,yahoo,www,", "fr,yahoo,", m); + matchPrefix("fr,yahoo,", "fr,yahoo,", m); + matchPrefix("au,com,foobar,www,", "au,com,foobar,", m); + matchPrefix("au,com,foobar,", "au,com,foobar,", m); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,", m); + matchPrefix("uk,co,virgin,", "uk,co,virgin,", m); + matchPrefix("au,com,example,www,", "au,com,example,", m); + matchPrefix("au,com,example,", "au,com,example,", m); matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + "jp,yokohama,public,assigned,", m); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,", m); } + @Test public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); + matchPrefix("de,bad-site,www", "de,bad-site,", m); } - + + @Test public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); + matchPrefix("de,archive4u,www", "de,archive4u,", m); } - + + @Test public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + assertEquals("1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4"), + "unexpected reduction"); } - + + @Test public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + assertEquals("[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]"), + "unexpected reduction"); } - + + @Test public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + matchPrefix("uk,bl,www,", "uk,bl,", m); + matchPrefix("uk,bl,", "uk,bl,", m); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,", m); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,", m); } + @Test public void testFakeTLD() { // we assume any new/unknonwn TLD should be assumed as 2-level; // this is preferable for our grouping purpose but might not be // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); + matchPrefix("zzz,example,www,", "zzz,example,", m); } + @Test public void testUnsegmentedHostname() { m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); + assertFalse(m.find(), "unexpected match found in 'example'"); } + @Test public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),"topmostAssignedSurtPrefixPattern not cached"); + assertSame(PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),"topmostAssignedSurtPrefixRegex not cached"); } - + // TODO: test UTF domains? - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix, Matcher m) { m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); + assertTrue(m.find(), "expected match not found in '" + surtDomain); + assertEquals(expectedAssignedPrefix, m.group(), "expected match not found"); } } diff --git a/src/test/java/org/archive/resource/MetaDataTest.java b/src/test/java/org/archive/resource/MetaDataTest.java new file mode 100644 index 00000000..88b8cd10 --- /dev/null +++ b/src/test/java/org/archive/resource/MetaDataTest.java @@ -0,0 +1,191 @@ +package org.archive.resource; + +import java.io.IOException; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; +import org.archive.format.json.JSONUtils; +import org.json.JSONArray; +import org.json.JSONObject; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class MetaDataTest { + + private static String[] testFilePaths = { + "src/test/resources/org/archive/format/warc/IAH-urls-wget.warc", + "src/test/resources/org/archive/format/warc/mutliple-headers.warc" + }; + + private static JSONObject obj = new JSONObject("{\"foo\":\"bar\",\"hello\":\"world\"}"); + + private MetaData putMetaData(MetaData m) { + m.putBoolean("boolean-1", false); + m.putBoolean("boolean-2", true); + m.put("boolean-3", true); + m.put("boolean-1", true); // append + + m.put("double-1", 0.5d); + m.put("double-2", 2.5d); + m.put("double-3", 3.5d); + m.put("double-1", 1.5d); // append + + m.put("int-1", 0); + m.put("int-2", 2); + m.put("int-3", 3); + m.put("int-1", 1); // append + + // choose JSON "numbers" which are forced into a Java long (too big for an integer) + m.putLong("long-1", 0xffffffffL + 0L); + m.putLong("long-2", 0xffffffffL + 2L); + m.put("long-3", 0xffffffffL + 3L); + m.put("long-1", 0xffffffffL + 1L); // append + + m.putString("string-1", "0"); + m.putString("string-2", "2"); + m.put("string-3", "3"); + m.put("string-1", "1"); // append + + m.putOpt("obj-1", obj); + m.put("obj-1", obj); // append + m.put("obj-2", obj); + m.putOpt("obj-2", null); // do nothing because value is null + + return m; + } + + private void verifyMultiValuedMetaData(MetaData m) { + // boolean + assertEquals(JSONArray.class, m.get("boolean-1").getClass()); + assertFalse(((JSONArray) m.get("boolean-1")).getBoolean(0)); + assertTrue(((JSONArray) m.get("boolean-1")).getBoolean(1)); + assertTrue(m.getBoolean("boolean-2")); + assertTrue(m.getBoolean("boolean-3")); + assertEquals(Boolean.class, m.get("boolean-3").getClass()); + assertTrue(m.optBoolean("boolean-3", false)); + assertFalse(m.optBoolean("boolean-99", false)); + + // double + assertEquals(JSONArray.class, m.get("double-1").getClass()); + assertEquals(0.5d, ((JSONArray) m.get("double-1")).getDouble(0)); + assertEquals(1.5d, ((JSONArray) m.get("double-1")).getDouble(1)); + assertEquals(2.5d, m.getDouble("double-2")); + assertEquals(3.5d, m.getDouble("double-3")); + // could be Double or BigDecimal, depending on the Java version + // assertEquals(Double.class, m.get("double-3").getClass()); + assertEquals(3.5d, m.optDouble("double-3")); + assertEquals(99.5d, m.optDouble("double-99", 99.5d)); + + // int + assertEquals(JSONArray.class, m.get("int-1").getClass()); + assertEquals(0, ((JSONArray) m.get("int-1")).getInt(0)); + assertEquals(1, ((JSONArray) m.get("int-1")).getInt(1)); + assertEquals(2, m.getInt("int-2")); + assertEquals(3, m.getInt("int-3")); + assertEquals(Integer.class, m.get("int-3").getClass()); + assertEquals(3, m.optInt("int-3")); + assertEquals(99, m.optInt("int-99", 99)); + + // long + assertEquals(JSONArray.class, m.get("long-1").getClass()); + assertEquals(0xffffffffL + 0L, ((JSONArray) m.get("long-1")).getLong(0)); + assertEquals(0xffffffffL + 1L, ((JSONArray) m.get("long-1")).getLong(1)); + assertEquals(0xffffffffL + 2L, m.getLong("long-2")); + assertEquals(0xffffffffL + 3L, m.getLong("long-3")); + assertEquals(Long.class, m.get("long-3").getClass()); + assertEquals(0xffffffffL + 3L, m.optLong("long-3")); + assertEquals(0xffffffffL + 99L, m.optLong("long-99", 0xffffffffL + 99L)); + + // String + assertEquals(JSONArray.class, m.get("string-1").getClass()); + assertEquals("0", ((JSONArray) m.get("string-1")).getString(0)); + assertEquals("1", ((JSONArray) m.get("string-1")).getString(1)); + assertEquals("2", m.getString("string-2")); + assertEquals("3", m.getString("string-3")); + assertEquals(String.class, m.get("string-3").getClass()); + assertEquals("3", m.optString("string-3")); + assertEquals("99", m.optString("string-99", "99")); + + // Object + assertEquals(JSONArray.class, m.get("obj-1").getClass()); + assertEquals(JSONObject.class, ((JSONArray) m.get("obj-1")).get(0).getClass()); + assertEquals(JSONObject.class, ((JSONArray) m.get("obj-1")).get(1).getClass()); + assertEquals("bar", ((JSONObject) ((JSONArray) m.get("obj-1")).get(0)).get("foo")); + assertEquals("world", ((JSONObject) ((JSONArray) m.get("obj-1")).get(0)).get("hello")); + assertEquals("bar", ((JSONObject) ((JSONArray) m.get("obj-1")).get(1)).get("foo")); + assertEquals("world", ((JSONObject) ((JSONArray) m.get("obj-1")).get(1)).get("hello")); + assertEquals(JSONObject.class, m.get("obj-2").getClass()); + assertEquals("bar", ((JSONObject) m.get("obj-2")).get("foo")); + assertEquals("world", ((JSONObject) m.get("obj-2")).get("hello")); + } + + @Test + public void testMultiValued() { + MetaData m = new MetaData(); + m = putMetaData(m); + verifyMultiValuedMetaData(m); + + // test (de)serialization + m = new MetaData(m.toString(2)); + verifyMultiValuedMetaData(m); + } + + private MetaData readNextWARCResponseAsMetaData(String filePath) throws IOException, ResourceParseException { + ResourceProducer producer = ProducerUtils.getProducer(filePath); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer exProducer = new ExtractingResourceProducer(producer, mapper); + Resource r = exProducer.getNext(); + while (r != null) { + MetaData top = r.getMetaData().getTopMetaData(); + JSONObject warcHeaders = JSONUtils.extractObject(top, "Envelope.WARC-Header-Metadata"); + if (warcHeaders.has("WARC-Type") && "response".equals(warcHeaders.getString("WARC-Type"))) { + return top; + } + r = exProducer.getNext(); + } + return null; + } + + /** + * Verify that in the legacy test file all WARC and HTTP headers are + * single-valued, i.e. {@linkplain String}s. + */ + @Test + public void testSingleHeaders() throws IOException, ResourceParseException { + MetaData m = readNextWARCResponseAsMetaData(testFilePaths[0]); + + JSONObject warcHeaders = JSONUtils.extractObject(m, "Envelope.WARC-Header-Metadata"); + JSONObject httpHeaders = JSONUtils.extractObject(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers"); + + for (Object header : warcHeaders.keySet()) { + assertEquals(String.class, warcHeaders.get(header.toString()).getClass()); + } + + for (Object header : httpHeaders.keySet()) { + assertEquals(String.class, httpHeaders.get(header.toString()).getClass()); + } + } + + @Test + public void testMultipleHeaders() throws IOException, ResourceParseException { + MetaData m = readNextWARCResponseAsMetaData(testFilePaths[1]); + + JSONObject warcHeaders = JSONUtils.extractObject(m, "Envelope.WARC-Header-Metadata"); + JSONObject httpHeaders = JSONUtils.extractObject(m, "Envelope.Payload-Metadata.HTTP-Response-Metadata.Headers"); + + assertEquals("https://www.example.com/index.html/", warcHeaders.getString("WARC-Target-URI")); + assertEquals(JSONArray.class, warcHeaders.get("WARC-Protocol").getClass()); + assertEquals(2, ((JSONArray) warcHeaders.get("WARC-Protocol")).length()); + assertEquals("h2", ((JSONArray) warcHeaders.get("WARC-Protocol")).get(0)); + + assertEquals("108", httpHeaders.getString("Content-Length")); + assertEquals(JSONArray.class, httpHeaders.get("x-powered-by").getClass()); + assertEquals(2, ((JSONArray) httpHeaders.get("x-powered-by")).length()); + assertEquals("PHP/8.3.11", ((JSONArray) httpHeaders.get("x-powered-by")).get(0)); + assertEquals("PleskLin", ((JSONArray) httpHeaders.get("x-powered-by")).get(1)); + } +} diff --git a/src/test/java/org/archive/resource/arc/ARCResourceTest.java b/src/test/java/org/archive/resource/arc/ARCResourceTest.java new file mode 100644 index 00000000..e92d07be --- /dev/null +++ b/src/test/java/org/archive/resource/arc/ARCResourceTest.java @@ -0,0 +1,50 @@ +package org.archive.resource.arc; + + +import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; +import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; +import org.archive.resource.Resource; +import org.archive.resource.ResourceParseException; +import org.archive.resource.ResourceProducer; +import org.archive.util.StreamCopy; + +import org.json.JSONObject; + +import org.junit.jupiter.api.Test; + +public class ARCResourceTest { + + @Test + public void testARCResource() throws ResourceParseException, IOException { + String testFileName = "../../format/arc/IAH-20080430204825-00000-blackbook-truncated.arc"; + ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer extractor = new ExtractingResourceProducer(producer, mapper); + + Resource resource = extractor.getNext(); + + while (resource != null) { + JSONObject payloadMD = resource.getMetaData().getTopMetaData().getJSONObject("Envelope") + .getJSONObject("Payload-Metadata"); + System.err.println(payloadMD); + + if (payloadMD.has(PAYLOAD_LENGTH)) { + assertTrue(payloadMD.getLong(PAYLOAD_LENGTH) != -1); + } + if (payloadMD.has(PAYLOAD_SLOP_BYTES)) { + // does not occur with the tested ARC file + } + + StreamCopy.readToEOF(resource.getInputStream()); + resource = extractor.getNext(); + } + } +} diff --git a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java index 24b6c18a..e34d4e6f 100644 --- a/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java +++ b/src/test/java/org/archive/resource/html/ExtractingParseObserverTest.java @@ -1,15 +1,37 @@ package org.archive.resource.html; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.logging.Logger; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; import org.archive.resource.MetaData; +import org.archive.resource.Resource; +import org.archive.resource.ResourceParseException; +import org.archive.resource.ResourceProducer; import org.htmlparser.nodes.TextNode; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; -public class ExtractingParseObserverTest extends TestCase { +public class ExtractingParseObserverTest { + private static final Logger LOG = + Logger.getLogger(ExtractingParseObserverTest.class.getName()); + + @Test public void testHandleStyleNodeExceptions() throws Exception { String[] tests = { "some css", @@ -19,7 +41,9 @@ public void testHandleStyleNodeExceptions() throws Exception { "url (' ')", "url('\")", "url(')", - "url('\"')" + "url('\"')", + "url('\\\"\"')", + "url(''''')" }; boolean except = false; HTMLMetaData md = new HTMLMetaData(new MetaData()); @@ -29,7 +53,7 @@ public void testHandleStyleNodeExceptions() throws Exception { TextNode tn = new TextNode(css); epo.handleStyleNode(tn); } catch(Exception e) { - System.err.format("And the winner is....(%s)\n", css); + System.err.format(Locale.ROOT, "And the winner is....(%s)\n", css); e.printStackTrace(); except = true; throw e; @@ -37,6 +61,8 @@ public void testHandleStyleNodeExceptions() throws Exception { assertFalse(except); } } + + @Test public void testHandleStyleNode() throws Exception { String[][] tests = { {""}, @@ -45,31 +71,37 @@ public void testHandleStyleNode() throws Exception { {"url(\"foo.gif\")","foo.gif"}, {"url(\\\"foo.gif\\\")","foo.gif"}, {"url(\\'foo.gif\\')","foo.gif"}, - - }; + {"url(''foo.gif'')","foo.gif"}, + {"url( foo.gif )","foo.gif"}, + {"url('''')"}, + {"url('foo.gif'')","foo.gif"}, + }; for(String[] testa : tests) { checkExtract(testa); } - // boolean except = false; -// HTMLMetaData md = new HTMLMetaData(new MetaData()); -// ExtractingParseObserver epo = new ExtractingParseObserver(md); -// for(String css : tests) { -// try { -// TextNode tn = new TextNode(css); -// epo.handleStyleNode(tn); -// } catch(Exception e) { -// System.err.format("And the winner is....(%s)\n", css); -// e.printStackTrace(); -// except = true; -// throw e; -// } -// assertFalse(except); -// } } + + /** + * Test whether the pattern matcher does extract nothing and also does not + * not hang-up if an overlong CSS link is truncated. + */ + @Test + public void testHandleStyleNodeNoHangupTruncated() throws Exception { + StringBuilder sb = new StringBuilder(); + sb.append("url("); + for (int i = 0; i < 500000; i++) + sb.append('\''); + sb.append("foo.gif"); + for (int i = 0; i < 499000; i++) + sb.append('\''); + String[] test = new String[1]; + test[0] = sb.toString(); + checkExtract(test); + } + private void checkExtract(String[] data) throws JSONException { // System.err.format("CSS(%s) want[0](%s)\n",css,want[0]); String css = data[0]; - boolean except = false; HTMLMetaData md = new HTMLMetaData(new MetaData()); ExtractingParseObserver epo = new ExtractingParseObserver(md); try { @@ -87,12 +119,167 @@ private void checkExtract(String[] data) throws JSONException { assertTrue(o instanceof JSONObject); JSONObject jo = (JSONObject) o; - assertEquals(data[i],jo.getString("href")); + assertEquals(data[i], jo.getString("href"), + "CSS link extraction failed for <" + css + ">"); } } else { - assertNull(a); + assertNull(a, "Expected no extracted link for <" + css + ">"); } } + private void checkLink(Multimap links, String url, String path) { + assertTrue(links.containsKey(url), "Link with URL " + url + " not found"); + assertTrue(links.get(url).contains(path), "Wrong path " + path + " for " + url); + } + + private void checkLinks(Resource resource, String[][] expectedLinks) { + assertNotNull(resource); + assertInstanceOf(HTMLResource.class, resource, "Wrong instance type of Resource: " + resource.getClass()); + MetaData md = resource.getMetaData(); + LOG.info(md.toString()); + Multimap links = ArrayListMultimap.create(); + JSONObject head = md.optJSONObject("Head"); + if (head != null) { + // + String baseUrl = (String) head.opt("Base"); + if (baseUrl != null) { + links.put(baseUrl, "__base__"); + } + // + JSONArray metas = head.optJSONArray("Metas"); + if (metas != null) { + for (int i = 0; i < metas.length(); i++) { + JSONObject o = (JSONObject) metas.optJSONObject(i); + String httpEquiv = o.optString("http-equiv"); + if (httpEquiv != null && httpEquiv.equalsIgnoreCase("Refresh")) { + String metaRefreshTarget = o.optString("content"); + if (metaRefreshTarget != null) { + metaRefreshTarget = metaRefreshTarget.replaceFirst("(?i)(?:^\\d+\\s*;)?\\s*url=", ""); + links.put(metaRefreshTarget, "__meta_refresh__"); + } + } + } + } + } + // extract outlinks + List linkArrays = new ArrayList(); + if (md.optJSONArray("Links") != null) { + linkArrays.add(md.optJSONArray("Links")); + } + try { + if (md.getJSONObject("Head") != null && md.getJSONObject("Head").getJSONArray("Link") != null) { + linkArrays.add(md.getJSONObject("Head").getJSONArray("Link")); + } + } catch (JSONException e1) { + } + for (JSONArray ldata : linkArrays) { + for (int i = 0; i < ldata.length(); i++) { + JSONObject o = (JSONObject) ldata.optJSONObject(i); + try { + String url = o.getString("url"); + links.put(url, o.getString("path")); + LOG.info(" found link: " + o.getString("url") + " " + o.getString("path")); + } catch (JSONException e) { + fail("Failed to extract URL from link: " + e.getMessage()); + } + } + } + assertEquals(expectedLinks.length, links.size(), "Unexpected number of links"); + for (String[] l : expectedLinks) { + checkLink(links, l[0], l[1]); + } + } + + @Test + public void testLinkExtraction() throws ResourceParseException, IOException { + String testFileName = "link-extraction-test.warc"; + ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer extractor = + new ExtractingResourceProducer(producer, mapper); + extractor.getNext(); // skip warcinfo record + String[][] html4links = { + {"http://www.example.com/", "__base__"}, + {"http://www.example.com/redirected.html", "__meta_refresh__"}, + {"background.jpg", "BODY@/background"}, + {"http://www.example.com/a-href.html", "A@/href"}, + {"#anchor", "A@/href"}, + {"image.png", "IMG@/src"}, + {"image.gif", "IMG@/src"}, + {"http://example.com/image-description.html#image.gif", "IMG@/longdesc"}, + {"helloworld.swf", "OBJECT@/data"}, + {"http://www.example.com/shakespeare.html", "Q@/cite"}, + {"http://www.example.com/shakespeare-long.html", "BLOCKQUOTE@/cite"} + }; + checkLinks(extractor.getNext(), html4links); + String[][] html5links = { + {"http:///www.example.com/video.html", "LINK@/href", "canonical"}, + {"video.rss", "LINK@/href", "alternate"}, + {"https://archive.org/download/WebmVp8Vorbis/webmvp8.gif", "VIDEO@/poster"}, + {"https://archive.org/download/WebmVp8Vorbis/webmvp8.webm", "SOURCE@/src"}, + {"https://archive.org/download/WebmVp8Vorbis/webmvp8_512kb.mp4", "SOURCE@/src"}, + {"https://archive.org/download/WebmVp8Vorbis/webmvp8.ogv", "SOURCE@/src"} + }; + checkLinks(extractor.getNext(), html5links); + String[][] html5links2 = { + {"http://www.example.com/", "A@/href"}, + }; + checkLinks(extractor.getNext(), html5links2); + String[][] fbVideoLinks = { + {"https://www.facebook.com/facebook/videos/10153231379946729/", "BLOCKQUOTE@/cite"}, + {"https://www.facebook.com/facebook/videos/10153231379946729/", "A@/href"}, + {"https://www.facebook.com/facebook/", "A@/href"}, + {"https://www.facebook.com/facebook/videos/10153231379946729/", "DIV@/data-href"} + }; + checkLinks(extractor.getNext(), fbVideoLinks); + String[][] dataHrefLinks = { + {"standard.css", "LINK@/href", "stylesheet"}, + {"https://www.facebook.com/elegantthemes/videos/10153760379211923/", "DIV@/data-href"}, + {"https://www.facebook.com/facebook/videos/10153231379946729/", "DIV@/data-href"}, + {"https://www.facebook.com/facebook/videos/10153231379946729/", "BLOCKQUOTE@/cite"}, + {"https://www.facebook.com/facebook/videos/10153231379946729/", "A@/href"}, + {"https://www.facebook.com/facebook/", "A@/href"}, + {"//edge.flowplayer.org/bauhaus.webm", "SOURCE@/src"}, + {"//edge.flowplayer.org/bauhaus.mp4", "SOURCE@/src"}, + {"//edge.flowplayer.org/functional.webm", "BUTTON@/data-href"}, + {"/content-page", "ARTICLE@/data-href"}, + {"/content-page", "A@/href"}, + {"/tags/content","A@/href"}, + {"/tags/headlines", "A@/href"}, + {"http://grabaperch.com", "DIV@/data-href"}, + {"green.css", "LINK@/data-href"}, + {"blue.css", "LINK@/data-href"}, + {"http://codecanyon.net/user/CodingJack", "A@/data-href"}, + {"jackbox/img/thumbs/4.jpg", "IMG@/src"}, + {"//venobox-destination", "A@/data-href"}, + {"#", "A@/href"}, + {"http://www.youtube.com/v/itTskyFLSS8&rel=0&autohide=1&showinfo=0&autoplay=1", "DIV@/data-href"}, + {"#", "A@/href"}, + {"http://www.youtube.com/v/itTskyFLSS8&rel=0&autohide=1&showinfo=0", "IFRAME@/src"} + }; + checkLinks(extractor.getNext(), dataHrefLinks); + String[][] fbSocialLinks = { + {"http://www.your-domain.com/your-page.html", "DIV@/data-uri"}, + {"https://developers.facebook.com/docs/plugins/comments#configurator", "DIV@/data-href"}, + {"https://www.facebook.com/zuck/posts/10102735452532991?comment_id=1070233703036185", "DIV@/data-href"}, + {"https://www.facebook.com/zuck", "DIV@/data-href"}, + {"https://developers.facebook.com/docs/plugins/", "DIV@/data-href"}, + {"https://www.facebook.com/facebook", "DIV@/data-href"}, + {"https://www.facebook.com/facebook", "BLOCKQUOTE@/cite"}, + {"https://www.facebook.com/facebook", "A@/href"}, + {"http://www.your-domain.com/your-page.html", "DIV@/data-href"} + }; + checkLinks(extractor.getNext(), fbSocialLinks); + String[][] onClickLinks = { + {"webpage.html", "DIV@/onclick"}, + {"index.html", "INPUT@/onclick"}, + {"http://www.x.com/", "INPUT@/onclick"}, + {"button-child.php", "INPUT@/onclick"}, + {"http://example.com/", "INPUT@/onclick"}, + {"http://example.com/location/href/1.html", "INPUT@/onclick"}, + {"http://example.com/location/href/2.html", "INPUT@/onclick"} + }; + checkLinks(extractor.getNext(), onClickLinks); + } } diff --git a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java index fb255d3c..a3c8c1c9 100644 --- a/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java +++ b/src/test/java/org/archive/resource/html/HTMLMetaDataTest.java @@ -1,16 +1,16 @@ package org.archive.resource.html; +import java.util.Locale; + import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HTMLMetaDataTest extends TestCase { +public class HTMLMetaDataTest { - public void testHTMLParseData() { -// fail("Not yet implemented"); - } + @Test public void testJSON() throws JSONException { JSONObject data = new JSONObject(); JSONObject links = new JSONObject(); @@ -30,6 +30,8 @@ public void testJSON() throws JSONException { System.out.println(data.toString()); } + + @Test public void testJSON2() throws JSONException { String sa[][] = {{"one","1"},{"two","2"},{"three","3"}}; JSONObject jo = new JSONObject(); @@ -37,6 +39,8 @@ public void testJSON2() throws JSONException { appendStrArr(jo,sa); System.out.println(jo.toString(1)); } + + @Test public void testJSON3() throws JSONException { JSONObject jo = new JSONObject(); appendStrArr2(jo,"k",new String[] {"1","2","3","4"}); @@ -57,7 +61,7 @@ private void appendStrArr(JSONObject o, String a[][]) throws JSONException { } private void appendStrArr2(JSONObject o, String k, String... a) throws JSONException { - System.out.format("A length(%d)\n", a.length); + System.out.format(Locale.ROOT, "A length(%d)\n", a.length); JSONObject n = new JSONObject(); if((a.length & 1) == 1) { throw new IllegalArgumentException(); diff --git a/src/test/java/org/archive/resource/warc/WARCResourceTest.java b/src/test/java/org/archive/resource/warc/WARCResourceTest.java new file mode 100644 index 00000000..71c2a4ee --- /dev/null +++ b/src/test/java/org/archive/resource/warc/WARCResourceTest.java @@ -0,0 +1,49 @@ +package org.archive.resource.warc; + +import static org.archive.resource.ResourceConstants.PAYLOAD_LENGTH; +import static org.archive.resource.ResourceConstants.PAYLOAD_SLOP_BYTES; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; + +import org.archive.extract.ExtractingResourceFactoryMapper; +import org.archive.extract.ExtractingResourceProducer; +import org.archive.extract.ProducerUtils; +import org.archive.extract.ResourceFactoryMapper; +import org.archive.resource.Resource; +import org.archive.resource.ResourceParseException; +import org.archive.resource.ResourceProducer; +import org.archive.util.StreamCopy; + +import org.json.JSONObject; + +import org.junit.jupiter.api.Test; + +public class WARCResourceTest { + + @Test + public void testWARCResource() throws ResourceParseException, IOException { + String testFileName = "../../format/warc/IAH-urls-wget.warc"; + ResourceProducer producer = ProducerUtils.getProducer(getClass().getResource(testFileName).getPath()); + ResourceFactoryMapper mapper = new ExtractingResourceFactoryMapper(); + ExtractingResourceProducer extractor = new ExtractingResourceProducer(producer, mapper); + + Resource resource = extractor.getNext(); + + while (resource != null) { + JSONObject payloadMD = resource.getMetaData().getTopMetaData().getJSONObject("Envelope") + .getJSONObject("Payload-Metadata"); + + if (payloadMD.has(PAYLOAD_LENGTH)) { + assertTrue(payloadMD.getLong(PAYLOAD_LENGTH) != -1); + } + if (payloadMD.has(PAYLOAD_SLOP_BYTES)) { + assertEquals(4, payloadMD.getLong(PAYLOAD_SLOP_BYTES)); + } + + StreamCopy.readToEOF(resource.getInputStream()); + resource = extractor.getNext(); + } + } +} diff --git a/src/main/java/org/archive/io/arc/ARCConstants.java b/src/test/java/org/archive/uid/UUIDGeneratorTest.java similarity index 52% rename from src/main/java/org/archive/io/arc/ARCConstants.java rename to src/test/java/org/archive/uid/UUIDGeneratorTest.java index c44cfef7..66fbf7a8 100644 --- a/src/main/java/org/archive/io/arc/ARCConstants.java +++ b/src/test/java/org/archive/uid/UUIDGeneratorTest.java @@ -16,14 +16,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.archive.io.arc; +package org.archive.uid; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotSame; /** - * Constants used by ARC files and in ARC file processing. - * * @author stack - * @deprecated + * @version $Revision$ $Date$ */ -public interface ARCConstants extends org.archive.format.arc.ARCConstants { +public class UUIDGeneratorTest { + @Test + public void testQualifyRecordID() throws URISyntaxException { + RecordIDGenerator g = new UUIDGenerator(); + URI uri = g.getRecordID(); + Map qualifiers = new HashMap(); + qualifiers.put("a", "b"); + URI nuURI = g.qualifyRecordID(uri, qualifiers); + assertNotSame(uri, nuURI); + qualifiers.put("c", "d"); + nuURI = g.qualifyRecordID(nuURI, qualifiers); + assertNotSame(uri, nuURI); + } } diff --git a/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java b/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java index 711dbede..fff1ea1f 100644 --- a/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/AggressiveIAURLCanonicalizerTest.java @@ -2,10 +2,13 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class AggressiveIAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class AggressiveIAURLCanonicalizerTest { static AggressiveIAURLCanonicalizer ia = new AggressiveIAURLCanonicalizer(); + @Test public void testCanonicalize() throws URISyntaxException { // FULL end-to-end tests: check("http://www.alexa.com/","http://alexa.com/"); @@ -26,6 +29,6 @@ private static void check(String orig, String want) throws URISyntaxException { HandyURL u2 = URLParser.parse(got); ia.canonicalize(u2); String got2 = u2.getURLString(); - assertEquals("Second passs changed!",got,got2); + assertEquals(got,got2,"Second passs changed!"); } } diff --git a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java index c21bcbe8..1b4d7814 100644 --- a/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/BasicURLCanonicalizerTest.java @@ -1,14 +1,17 @@ package org.archive.url; import java.net.URISyntaxException; +import java.util.Locale; -import org.apache.commons.httpclient.URIException; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; -public class BasicURLCanonicalizerTest extends TestCase { +public class BasicURLCanonicalizerTest { BasicURLCanonicalizer guc = new BasicURLCanonicalizer(); - + + @Test public void testGetHex() { assertEquals(0,guc.getHex('0')); assertEquals(1,guc.getHex('1')); @@ -37,7 +40,8 @@ public void testGetHex() { assertEquals(-1,guc.getHex('q')); assertEquals(-1,guc.getHex(' ')); } - + + @Test public void testDecode() { assertEquals("A",guc.decode("A")); assertEquals("AA",guc.decode("AA")); @@ -131,7 +135,7 @@ public void testDecode() { assertEquals("\u2691%E2%9A!\u2691%E2%9A", guc.decode("%E2%9A%91%E2%9A%21%E2%9A%91%E2%9A")); } - + @Test public void testUnescapeRepeatedly() { assertEquals("%!A!!%",guc.unescapeRepeatedly("%!A%21%21%25")); assertEquals("%",guc.unescapeRepeatedly("%")); @@ -143,11 +147,15 @@ public void testUnescapeRepeatedly() { assertEquals("%",guc.unescapeRepeatedly("%25%32%35")); assertEquals("168.188.99.26",guc.unescapeRepeatedly("%31%36%38%2e%31%38%38%2e%39%39%2e%32%36")); + + assertEquals("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5", + guc.unescapeRepeatedly("tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); } - + + @Test public void testAttemptIPFormats() throws URIException { - assertEquals(null,guc.attemptIPFormats(null)); - assertEquals(null,guc.attemptIPFormats("www.foo.com")); + assertNull(guc.attemptIPFormats(null)); + assertNull(guc.attemptIPFormats("www.foo.com")); assertEquals("127.0.0.1",guc.attemptIPFormats("127.0.0.1")); assertEquals("15.0.0.1",guc.attemptIPFormats("017.0.0.1")); assertEquals("168.188.99.26",guc.attemptIPFormats("168.188.99.26")); @@ -187,21 +195,22 @@ In specifying the inet_addr() API, the POSIX standard [IEEE-1003.1] * For now, we'll enforce some strictness: */ - assertEquals(null,guc.attemptIPFormats("10.0.258")); - assertEquals(null,guc.attemptIPFormats("1.2.3.256")); + assertNull(guc.attemptIPFormats("10.0.258")); + assertNull(guc.attemptIPFormats("1.2.3.256")); } - + + @Test public void testFoo() { String path = "/a/b/c/"; String[] paths = path.split("/",-1); for(String p : paths) { - System.out.format("(%s)",p); + System.out.format(Locale.ROOT, "(%s)", p); } System.out.println(); paths = path.split("/"); for(String p : paths) { - System.out.format("(%s)",p); + System.out.format(Locale.ROOT, "(%s)", p); } System.out.println(); } @@ -209,6 +218,7 @@ public void testFoo() { /* * Tests copied from https://developers.google.com/safe-browsing/developers_guide_v2#Canonicalization */ + @Test public void testGoogleExamples() throws URISyntaxException { checkCanonicalization("http://host/%25%32%35", "http://host/%25"); checkCanonicalization("http://host/%25%32%35%25%32%35", "http://host/%25%25"); @@ -246,19 +256,22 @@ public void testGoogleExamples() throws URISyntaxException { checkCanonicalization("http://host.com/ab%23cd", "http://host.com/ab%23cd"); checkCanonicalization("http://host.com//twoslashes?more//slashes", "http://host.com/twoslashes?more//slashes"); } - + + @Test public void testStraySpacing() throws URISyntaxException { checkCanonicalization("http://example.org/\u2028", "http://example.org/"); checkCanonicalization("\nhttp://examp\rle.org/", "http://example.org/"); checkCanonicalization("\nhttp://examp\u2029\t\rle.org/ ", "http://example.org/"); } - + + @Test public void testSchemeCapitalsPreserved() throws URISyntaxException { checkCanonicalization("Http://example.com", "Http://example.com/"); checkCanonicalization("HTTP://example.com", "HTTP://example.com/"); checkCanonicalization("ftP://example.com", "ftP://example.com/"); } - + + @Test public void testUnicodeEscaping() throws URISyntaxException { checkCanonicalization("http://example.org/\u2691", "http://example.org/%E2%9A%91"); checkCanonicalization("http://example.org/%e2%9a%91", "http://example.org/%E2%9A%91"); @@ -274,6 +287,16 @@ public void testUnicodeEscaping() throws URISyntaxException { checkCanonicalization("http://example.org/%F0%9F%82%A1", "http://example.org/%F0%9F%82%A1"); } + @Test + public void testHostDots() throws URISyntaxException { + checkCanonicalization("https://foobar.org./", "https://foobar.org/"); + checkCanonicalization("https://.foobar.org/", "https://foobar.org/"); + checkCanonicalization("https://foo...bar.org/", "https://foo.bar.org/"); + checkCanonicalization("https://...foo...bar.org.../", "https://foo.bar.org/"); + checkCanonicalization("https://localhost/path/file.txt", "https://localhost/path/file.txt"); + checkCanonicalization("https://....../path/file.txt", "https:///path/file.txt"); + } + private void checkCanonicalization(String in, String want) throws URISyntaxException { HandyURL h = URLParser.parse(in); guc.canonicalize(h); diff --git a/src/test/java/org/archive/url/HandyURLTest.java b/src/test/java/org/archive/url/HandyURLTest.java index 28edff77..ad108db5 100644 --- a/src/test/java/org/archive/url/HandyURLTest.java +++ b/src/test/java/org/archive/url/HandyURLTest.java @@ -1,9 +1,12 @@ package org.archive.url; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class HandyURLTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class HandyURLTest { + + @Test public void testGetPublicSuffix() { HandyURL h = new HandyURL(); h.setHost("www.fool.com"); @@ -23,10 +26,4 @@ public void testGetPublicSuffix() { assertEquals("funky-images",h.getPublicPrefix()); } - - public void testGetPublicPrefix() { -// -// fail("Not yet implemented"); - } - } diff --git a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java index 3263edc7..aecddb3b 100644 --- a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java @@ -2,16 +2,20 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class IAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class IAURLCanonicalizerTest { + + @Test public void testFull() throws URISyntaxException { - IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); compCan(iaC,"http://www.archive.org:80/","http://archive.org/"); compCan(iaC,"https://www.archive.org:80/","https://archive.org:80/"); compCan(iaC,"http://www.archive.org:443/","http://archive.org:443/"); compCan(iaC,"https://www.archive.org:443/","https://archive.org/"); + compCan(iaC,"http://www.archive.org:/","http://archive.org/"); compCan(iaC,"http://www.archive.org/big/","http://archive.org/big"); compCan(iaC,"dns:www.archive.org","dns:www.archive.org"); @@ -25,6 +29,7 @@ private void compCan(URLCanonicalizer c, String orig, String want) throws URISyn assertEquals(want,got); } + @Test public void testAlphaReorderQuery() { assertEquals(null,IAURLCanonicalizer.alphaReorderQuery(null)); assertEquals("",IAURLCanonicalizer.alphaReorderQuery("")); @@ -40,6 +45,7 @@ public void testAlphaReorderQuery() { assertEquals("a=a&a=b&b=a&b=b",IAURLCanonicalizer.alphaReorderQuery("b=b&a=b&b=a&a=a")); } + @Test public void testMassageHost() { assertEquals("foo.com",IAURLCanonicalizer.massageHost("foo.com")); assertEquals("foo.com",IAURLCanonicalizer.massageHost("www.foo.com")); @@ -48,10 +54,22 @@ public void testMassageHost() { assertEquals("www2foo.com",IAURLCanonicalizer.massageHost("www2.www2foo.com")); } + @Test public void testGetDefaultPort() { assertEquals(0,IAURLCanonicalizer.getDefaultPort("foo")); assertEquals(80,IAURLCanonicalizer.getDefaultPort("http")); assertEquals(443,IAURLCanonicalizer.getDefaultPort("https")); } + @Test + public void testStripSessionId() throws URISyntaxException { + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); + compCan(iaC, + "http://www.nsf.gov/statistics/sed/2009/SED_2009.zip?CFID=14387305&CFTOKEN=72942008&jsessionid=f030eacc7e49c4ca0b077922347418418766", + "http://nsf.gov/statistics/sed/2009/sed_2009.zip?jsessionid=f030eacc7e49c4ca0b077922347418418766"); + compCan(iaC, + "http://www.nsf.gov/statistics/sed/2009/SED_2009.zip?CFID=14387305&CFTOKEN=72942008", + "http://nsf.gov/statistics/sed/2009/sed_2009.zip"); + } + } diff --git a/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java b/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java index 3c131105..175491fd 100644 --- a/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/OrdinaryIAURLCanonicalizerTest.java @@ -2,11 +2,14 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class OrdinaryIAURLCanonicalizerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class OrdinaryIAURLCanonicalizerTest { private OrdinaryIAURLCanonicalizer canon = new OrdinaryIAURLCanonicalizer(); - + + @Test public void testMisc() throws URISyntaxException { checkCanonicalization("http://...host..com..", "http://host.com/"); checkCanonicalization("http://example.org:80/", "http://example.org/"); @@ -17,6 +20,7 @@ public void testMisc() throws URISyntaxException { checkCanonicalization("http://example.org/foo/?", "http://example.org/foo/"); } + @Test public void testSchemeCapitals() throws URISyntaxException { checkCanonicalization("Http://example.com", "http://example.com/"); checkCanonicalization("HTTP://example.com", "http://example.com/"); diff --git a/src/test/java/org/archive/url/URLParserTest.java b/src/test/java/org/archive/url/URLParserTest.java index b060ffa7..c942a260 100644 --- a/src/test/java/org/archive/url/URLParserTest.java +++ b/src/test/java/org/archive/url/URLParserTest.java @@ -3,21 +3,26 @@ import java.io.UnsupportedEncodingException; import java.net.URISyntaxException; import java.net.URLDecoder; +import java.util.Locale; -import junit.framework.TestCase; +import com.google.common.net.InetAddresses; -import org.apache.commons.httpclient.URIException; +import org.junit.jupiter.api.Test; -import com.google.common.net.InetAddresses; +import static java.nio.charset.StandardCharsets.UTF_8; -public class URLParserTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class URLParserTest { + @Test public void testGuava() throws URIException, UnsupportedEncodingException { Long l = Long.parseLong("3279880203"); int i2 = l.intValue(); // int i = Integer.decode("3279880203"); - System.err.format("FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress()); + System.err.format(Locale.ROOT, "FromNum(%s)\n", InetAddresses.fromInteger(i2).getHostAddress()); } + @Test public void testAddDefaultSchemeIfNeeded() { assertEquals(null,URLParser.addDefaultSchemeIfNeeded(null)); assertEquals("http://",URLParser.addDefaultSchemeIfNeeded("")); @@ -27,9 +32,9 @@ public void testAddDefaultSchemeIfNeeded() { assertEquals("http://www.fool.com/",URLParser.addDefaultSchemeIfNeeded("www.fool.com/")); } - + @Test public void testParse() throws UnsupportedEncodingException, URISyntaxException { - System.out.format("O(%s) E(%s)\n","%66",URLDecoder.decode("%66","UTF-8")); + System.out.format(Locale.ROOT, "O(%s) E(%s)\n","%66", URLDecoder.decode("%66", UTF_8.name())); checkParse("http://www.archive.org/index.html#foo", null, "http", null, null, "www.archive.org", -1, "/index.html", null, "foo", "http://www.archive.org/index.html#foo", "/index.html"); @@ -86,13 +91,16 @@ public void testParse() throws UnsupportedEncodingException, URISyntaxException checkParse(" \n http://:****@www.archive.org:8080/inde\rx.html?query#foo \r\n \t ", null, "http", "", "****", "www.archive.org", 8080, "/index.html", "query", "foo", "http://:****@www.archive.org:8080/index.html?query#foo", "/index.html?query"); + checkParse("https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt", null, "https", null, null, + "[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]", -1, "/robots.txt", null, null, + "https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt", "/robots.txt"); } private void checkParse(String s, String opaque, String scheme, String authUser, String authPass, String host, int port, String path, String query, String fragment, String urlString, String pathQuery) throws URISyntaxException { HandyURL h = URLParser.parse(s); - System.out.format("Input:(%s)\nHandyURL\t%s\n",s,h.toDebugString()); + System.out.format(Locale.ROOT, "Input:(%s)\nHandyURL\t%s\n", s, h.toDebugString()); assertEquals(scheme, h.getScheme()); assertEquals(authUser, h.getAuthUser()); assertEquals(authPass, h.getAuthPass()); diff --git a/src/test/java/org/archive/url/URLRegexTransformerTest.java b/src/test/java/org/archive/url/URLRegexTransformerTest.java index 71979d06..d5c98f6a 100644 --- a/src/test/java/org/archive/url/URLRegexTransformerTest.java +++ b/src/test/java/org/archive/url/URLRegexTransformerTest.java @@ -1,12 +1,15 @@ package org.archive.url; -import org.apache.commons.httpclient.URIException; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.assertEquals; -public class URLRegexTransformerTest extends TestCase { +import java.util.Locale; +public class URLRegexTransformerTest { + + @Test public void testStripPathSessionID() { // strip jsessionid // String sid1 = "jsessionid=0123456789abcdefghijklemopqrstuv"; @@ -48,115 +51,117 @@ public void testStripPathSessionID() { private static void checkStripPathSessionID(String orig, String want) { String got = URLRegexTransformer.stripPathSessionID(orig); - assertTrue(String.format("FAIL Orig(%s) Got(%s) Want(%s)",orig,got,want),want.equals(got)); + assertEquals(want, got, String.format(Locale.ROOT, "FAIL Orig(%s) Got(%s) Want(%s)", orig, got, want)); } // private static final String BASE = "http://www.archive.org/index.html"; private static final String BASE = ""; + @Test public void testStripQuerySessionID() throws URIException { String str32id = "0123456789abcdefghijklemopqrstuv"; String url = BASE + "?jsessionid=" + str32id; String expectedResult = BASE + "?"; String result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test that we don't strip if not 32 chars only. url = BASE + "?jsessionid=" + str32id + '0'; expectedResult = url; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test what happens when followed by another key/value pair. url = BASE + "?jsessionid=" + str32id + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed (" + result + ")", expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed (" + result + ")"); // Test what happens when followed by another key/value pair and // prefixed by a key/value pair. url = BASE + "?one=two&jsessionid=" + str32id + "&x=y"; expectedResult = BASE + "?one=two&x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test what happens when prefixed by a key/value pair. url = BASE + "?one=two&jsessionid=" + str32id; expectedResult = BASE + "?one=two&"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test aspsession. url = BASE + "?aspsessionidABCDEFGH=" + "ABCDEFGHIJKLMNOPQRSTUVWX" + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test archive phpsession. url = BASE + "?phpsessid=" + str32id + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // With prefix too. url = BASE + "?one=two&phpsessid=" + str32id + "&x=y"; expectedResult = BASE + "?one=two&x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // With only prefix url = BASE + "?one=two&phpsessid=" + str32id; expectedResult = BASE + "?one=two&"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Test sid. url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&x=y"; expectedResult = BASE + "?x=y"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); // Igor test. url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&" + "jsessionid=" + str32id; expectedResult = BASE + "?"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=1169580&CFTOKEN=48630702&dtstamp=22%2F08%2F2006%7C06%3A58%3A11"; expectedResult = "?dtstamp=22%2F08%2F2006%7C06%3A58%3A11"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=12412453&CFTOKEN=15501799&dt=19_08_2006_22_39_28"; expectedResult = "?dt=19_08_2006_22_39_28"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=14475712&CFTOKEN=2D89F5AF-3048-2957-DA4EE4B6B13661AB&r=468710288378&m=forgotten"; expectedResult = "?r=468710288378&m=forgotten"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=16603925&CFTOKEN=2AE13EEE-3048-85B0-56CEDAAB0ACA44B8"; expectedResult = "?"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); url = "?CFID=4308017&CFTOKEN=63914124&requestID=200608200458360%2E39414378"; expectedResult = "?requestID=200608200458360%2E39414378"; result = URLRegexTransformer.stripQuerySessionID(url); - assertTrue("Failed " + result, expectedResult.equals(result)); + assertEquals(expectedResult, result, "Failed " + result); } - + + @Test public void testSURT() { assertEquals("org,archive,www",URLRegexTransformer.hostToSURT("www.archive.org")); } diff --git a/src/test/java/org/archive/url/UsableURIFactoryTest.java b/src/test/java/org/archive/url/UsableURIFactoryTest.java index af190957..8daebe12 100644 --- a/src/test/java/org/archive/url/UsableURIFactoryTest.java +++ b/src/test/java/org/archive/url/UsableURIFactoryTest.java @@ -19,15 +19,12 @@ package org.archive.url; -import java.util.Iterator; import java.util.TreeMap; -import junit.framework.TestCase; +import org.apache.commons.lang3.SerializationUtils; +import org.junit.jupiter.api.Test; -import org.apache.commons.httpclient.URIException; -import org.apache.commons.lang.SerializationUtils; -import org.archive.url.UsableURI; -import org.archive.url.UsableURIFactory; +import static org.junit.jupiter.api.Assertions.*; /** * Test UURIFactory for proper UURI creation across variety of @@ -37,8 +34,9 @@ * * @author igor stack gojomo */ -public class UsableURIFactoryTest extends TestCase { - +public class UsableURIFactoryTest { + + @Test public final void testEscaping() throws URIException { // Note: single quote is not being escaped by URI class. final String ESCAPED_URISTR = "http://archive.org/" + @@ -64,44 +62,46 @@ public final void testEscaping() throws URIException { UsableURI uuri = UsableURIFactory.getInstance(URISTR); final String uuriStr = uuri.toString(); - assertEquals("expected escaping", ESCAPED_URISTR, uuriStr); + assertEquals(ESCAPED_URISTR, uuriStr, "expected escaping"); } + @Test public final void testUnderscoreMakesPortParseFail() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://one-two_three:8080/index.html"); int port = uuri.getPort(); - assertTrue("Failed find of port " + uuri, port == 8080); + assertEquals(8080, port, "Failed find of port " + uuri); } - + + @Test public final void testRelativeURIWithTwoSlashes() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.archive.org"); UsableURI uuri = UsableURIFactory.getInstance(base, "one//index.html"); - assertTrue("Doesn't do right thing with two slashes " + uuri, - uuri.toString().equals( - "http://www.archive.org/one//index.html")); + assertEquals("http://www.archive.org/one//index.html", uuri.toString(), + "Doesn't do right thing with two slashes " + uuri); } - + + @Test public final void testSchemelessURI() throws URIException { UsableURI base = UsableURIFactory.getInstance("https://www.archive.org"); UsableURI uuri = UsableURIFactory.getInstance(base, "//example.com/monkey?this:uri:has:colons"); - assertTrue("Doesn't do right thing with a schemeless URI " + uuri, - uuri.toString().equals( - "https://example.com/monkey?this:uri:has:colons")); + assertEquals("https://example.com/monkey?this:uri:has:colons", uuri.toString(), + "Doesn't do right thing with a schemeless URI " + uuri); } - + + @Test public final void testTrailingEncodedSpace() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.nps-shoes.co.uk%20"); - assertTrue("Doesn't strip trailing encoded space 1 " + uuri, - uuri.toString().equals("http://www.nps-shoes.co.uk/")); + assertEquals("http://www.nps-shoes.co.uk/", uuri.toString(), + "Doesn't strip trailing encoded space 1 " + uuri); uuri = UsableURIFactory.getInstance("http://www.nps-shoes.co.uk%20%20%20"); - assertTrue("Doesn't strip trailing encoded space 2 " + uuri, - uuri.toString().equals("http://www.nps-shoes.co.uk/")); + assertEquals("http://www.nps-shoes.co.uk/", uuri.toString(), + "Doesn't strip trailing encoded space 2 " + uuri); } - + + @Test public final void testPort0080is80() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://archive.org:0080"); - assertTrue("Doesn't strip leading zeros " + uuri, - uuri.toString().equals("http://archive.org/")); + assertEquals("http://archive.org/", uuri.toString(), "Doesn't strip leading zeros " + uuri); } // DISABLING TEST AS PRECURSOR TO ELIMINATION @@ -127,13 +127,15 @@ public final void testPort0080is80() throws URIException { // } // assertNotNull("Didn't get expected exception.", message); // } - + + @Test public final void testEscapeEncoding() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.y1y1.com/" + "albums/userpics/11111/normal_%E3%E4%EC%EC%EC.jpg", "windows-1256"); uuri.getPath(); } - + + @Test public final void testTooLongAfterEscaping() { StringBuffer buffer = new StringBuffer("http://www.archive.org/a/"); // Append bunch of spaces. When escaped, they'll triple in size. @@ -147,36 +149,36 @@ public final void testTooLongAfterEscaping() { } catch (URIException e) { message = e.getMessage(); } - assertTrue("Wrong or no exception: " + message, (message != null) && - message.startsWith("Created (escaped) uuri >")); + assertTrue((message != null) && message.startsWith("Created (escaped) uuri >"), + "Wrong or no exception: " + message); } - + + @Test public final void testFtpUris() throws URIException { final String FTP = "ftp"; final String AUTHORITY = "pfbuser:pfbuser@mprsrv.agri.gov.cn"; final String PATH = "/clzreceive/"; final String uri = FTP + "://" + AUTHORITY + PATH; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertTrue("Failed to get matching scheme: " + uuri.getScheme(), - (uuri.getScheme()).equals(FTP)); - assertTrue("Failed to get matching authority: " + - uuri.getAuthority(), (uuri.getAuthority()).equals(AUTHORITY)); - assertTrue("Failed to get matching path: " + - uuri.getPath(), (uuri.getPath()).equals(PATH)); + assertEquals(FTP, (uuri.getScheme()), "Failed to get matching scheme: " + uuri.getScheme()); + assertEquals(AUTHORITY, (uuri.getAuthority()), "Failed to get matching authority: " + + uuri.getAuthority()); + assertEquals(PATH, (uuri.getPath()), "Failed to get matching path: " + + uuri.getPath()); } - + + @Test public final void testWhitespaceEscaped() throws URIException { // Test that we get all whitespace even if the uri is // already escaped. String uri = "http://archive.org/index%25 .html"; String tgtUri = "http://archive.org/index%25%20.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertTrue("Not equal " + uuri.toString(), - uuri.toString().equals(tgtUri)); + assertEquals(tgtUri, uuri.toString(), "Not equal " + uuri); uri = "http://archive.org/index%25\u001D.html"; - tgtUri = "http://archive.org/index%25%1D.html".toLowerCase(); + tgtUri = "http://archive.org/index%25%1D.html"; uuri = UsableURIFactory.getInstance(uri); - assertEquals("whitespace escaping", tgtUri, uuri.toString()); + assertEquals(tgtUri, uuri.toString(), "whitespace escaping"); uri = "http://gemini.info.usaid.gov/directory/" + "pbResults.cfm?&urlNameLast=Rumplestiltskin"; tgtUri = "http://gemini.info.usaid.gov/directory/faxResults.cfm?" + @@ -184,7 +186,13 @@ public final void testWhitespaceEscaped() throws URIException { uuri = UsableURIFactory.getInstance(UsableURIFactory.getInstance(uri), "faxResults.cfm?name=Ebenezer +Rumplestiltskin,&location=" + "RRB%20%20%20%205%2E08%2D006"); - assertEquals("whitespace escaping", tgtUri, uuri.toString()); + assertEquals(tgtUri, uuri.toString(), "whitespace escaping"); + + // https://webarchive.jira.com/browse/HER-2089 + uri = "http://archive.org/index%25\u3000.html"; + tgtUri = "http://archive.org/index%25%E3%80%80.html"; + uuri = UsableURIFactory.getInstance(uri); + assertEquals(tgtUri, uuri.toString(), "U+3000 ideographic space escaping"); } // public final void testFailedGetPath() throws URIException { @@ -197,44 +205,48 @@ public final void testWhitespaceEscaped() throws URIException { // String foundPath = uuri.getPath(); // assertEquals("unexpected path", path, foundPath); // } - + + @Test public final void testDnsHost() throws URIException { String uri = "dns://ads.nandomedia.com:81/one.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); String host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); uri = "dns:ads.nandomedia.com"; uuri = UsableURIFactory.getInstance(uri); host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); uri = "dns:ads.nandomedia.com?a=b"; uuri = UsableURIFactory.getInstance(uri); host = uuri.getReferencedHost(); - assertTrue("Host is wrong " + host, host.equals("ads.nandomedia.com")); + assertEquals("ads.nandomedia.com", host, "Host is wrong " + host); } - + + @Test public final void testPercentEscaping() throws URIException { final String uri = "http://archive.org/%a%%%%%.html"; // tests indicate firefox (1.0.6) does not encode '%' at all final String tgtUri = "http://archive.org/%a%%%%%.html"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Not equal",tgtUri, uuri.toString()); + assertEquals(tgtUri,uuri.toString(), "Not equal"); } - + + @Test public final void testRelativeDblPathSlashes() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.archive.org/index.html"); UsableURI uuri = UsableURIFactory.getInstance(base, "JIGOU//KYC//INDEX.HTM"); - assertTrue("Double slash not working " + uuri.toString(), - uuri.getPath().equals("/JIGOU//KYC//INDEX.HTM")); + assertEquals("/JIGOU//KYC//INDEX.HTM", uuri.getPath(), "Double slash not working " + uuri); } - + + @Test public final void testRelativeWithScheme() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/some/page"); UsableURI uuri = UsableURIFactory.getInstance(base, "http:boo"); - assertTrue("Relative with scheme not working " + uuri.toString(), - uuri.toString().equals("http://www.example.com/some/boo")); + assertEquals("http://www.example.com/some/boo", uuri.toString(), + "Relative with scheme not working " + uuri); } - + + @Test public final void testBadBaseResolve() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://license.joins.com/board/" + "etc_board_list.asp?board_name=new_main&b_type=&nPage=" + @@ -242,29 +254,29 @@ public final void testBadBaseResolve() throws URIException { "notice&gate=02"); UsableURIFactory.getInstance(base, "http://www.changeup.com/...[ 1010966 ] crawl.log has URIs with spaces in them. - * @throws URIException */ + @Test public final void testSpaceDoubleEncoding() throws URIException { final String uri = "http://www.brook.edu/i.html? %20taxonomy=Politics"; final String encodedUri = "http://www.brook.edu/i.html?%20%20taxonomy=Politics"; UsableURI uuri = UsableURIFactory.getInstance(uri, "ISO-8859-1"); - assertTrue("Not equal " + uuri.toString(), - uuri.toString().equals(encodedUri)); + assertEquals(encodedUri, uuri.toString(), "Not equal " + uuri.toString()); } /** * Test for doubly-encoded sequences. * See [ 966219 ] UURI doubly-encodes %XX sequences. - * @throws URIException */ + @Test public final void testDoubleEncoding() throws URIException { final char ae = '\u00E6'; final String uri = "http://archive.org/DIR WITH SPACES/home" + @@ -310,20 +321,20 @@ public final void testDoubleEncoding() throws URIException { final String encodedUri = "http://archive.org/DIR%20WITH%20SPACES/home%E6.html"; UsableURI uuri = UsableURIFactory.getInstance(uri, "ISO-8859-1"); - assertEquals("single encoding", encodedUri, uuri.toString()); + assertEquals(encodedUri, uuri.toString(), "single encoding"); // Dbl-encodes. uuri = UsableURIFactory.getInstance(uuri.toString(), "ISO-8859-1"); uuri = UsableURIFactory.getInstance(uuri.toString(), "ISO-8859-1"); - assertEquals("double encoding", encodedUri, uuri.toString()); + assertEquals(encodedUri, uuri.toString(), "double encoding"); // Do default utf-8 test. uuri = UsableURIFactory.getInstance(uri); final String encodedUtf8Uri = "http://archive.org/DIR%20WITH%20SPACES/home%C3%A6.html"; - assertEquals("Not equal utf8", encodedUtf8Uri, uuri.toString()); + assertEquals(encodedUtf8Uri, uuri.toString(), "Not equal utf8"); // Now dbl-encode. uuri = UsableURIFactory.getInstance(uuri.toString()); uuri = UsableURIFactory.getInstance(uuri.toString()); - assertEquals("Not equal (dbl-encoding) utf8", encodedUtf8Uri, uuri.toString()); + assertEquals(encodedUtf8Uri, uuri.toString(), "Not equal (dbl-encoding) utf8"); } /** @@ -331,26 +342,25 @@ public final void testDoubleEncoding() throws URIException { * @see [ 788219 ] URI Syntax Errors stop page parsing * @throws URIException */ + @Test public final void testThreeSlashes() throws URIException { UsableURI goodURI = UsableURIFactory. getInstance("http://lcweb.loc.gov/rr/goodtwo.html"); String uuri = "http:///lcweb.loc.gov/rr/goodtwo.html"; UsableURI rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); uuri = "http:////lcweb.loc.gov/rr/goodtwo.html"; rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); // Check https. goodURI = UsableURIFactory. getInstance("https://lcweb.loc.gov/rr/goodtwo.html"); uuri = "https:////lcweb.loc.gov/rr/goodtwo.html"; rewrittenURI = UsableURIFactory.getInstance(uuri); - assertTrue("Not equal " + goodURI + ", " + uuri, - goodURI.toString().equals(rewrittenURI.toString())); + assertEquals(goodURI.toString(), rewrittenURI.toString(), "Not equal " + goodURI + ", " + uuri); } - + + @Test public final void testNoScheme() { boolean expectedException = false; String uuri = "www.loc.gov/rr/european/egw/polishex.html"; @@ -360,10 +370,10 @@ public final void testNoScheme() { // Expected exception. expectedException = true; } - assertTrue("Didn't get expected exception: " + uuri, - expectedException); + assertTrue(expectedException, "Didn't get expected exception: " + uuri); } - + + @Test public final void testRelative() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/home.html"); @@ -371,26 +381,25 @@ public final void testRelative() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, "/home.html"); - assertTrue("Not equal", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Not equal"); } - + + @Test public void testSchemelessRelative() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.itsnicethat.com/articles/laura-hobson"); UsableURI test1 = UsableURIFactory.getInstance(base, "//www.facebook.com/plugins/like.php"); - assertEquals("schemaless relative 1", "http://www.facebook.com/plugins/like.php", test1.toString()); + assertEquals("http://www.facebook.com/plugins/like.php", test1.toString(), "schemaless relative 1"); // reported by Erin Staniland UsableURI test2 = UsableURIFactory.getInstance(base, "//www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson"); - assertEquals("schemeless relative 2", "http://www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson", - test2.toString()); + assertEquals("http://www.facebook.com/plugins/like.php?href=http://www.itsnicethat.com/articles/laura-hobson", test2.toString(), + "schemeless relative 2"); } /** * Test that an empty uuri does the right thing -- that we get back the * base. - * - * @throws URIException */ + @Test public final void testRelativeEmpty() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/one/two/three.html"); @@ -398,10 +407,10 @@ public final void testRelativeEmpty() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, ""); - assertTrue("Empty length don't work", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Empty length don't work"); } - + + @Test public final void testAbsolute() throws URIException { UsableURI uuriTgt = UsableURIFactory. getInstance("http://archive.org:83/home.html"); @@ -409,14 +418,14 @@ public final void testAbsolute() throws URIException { getInstance("http://archive.org:83/one/two/three.html"); UsableURI uuri = UsableURIFactory. getInstance(uri, "http://archive.org:83/home.html"); - assertTrue("Not equal", - uuriTgt.toString().equals(uuri.toString())); + assertEquals(uuriTgt.toString(), uuri.toString(), "Not equal"); } /** * Test for [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts). * @see [ 962892 ] UURI accepting/creating unUsable URIs (bad hosts) */ + @Test public final void testHostWithLessThan() { checkExceptionOnIllegalDomainlabel("http://www.betamobile.com[ 1012520 ] UURI.length() > 2k */ + @Test public final void test2kURI() throws URIException { final StringBuffer buffer = new StringBuffer("http://a.b"); final String subPath = "/123456789"; @@ -445,8 +454,7 @@ public final void test2kURI() throws URIException { } catch (URIException e) { gotException = true; } - assertTrue("No expected exception complaining about long URI", - gotException); + assertTrue(gotException, "No expected exception complaining about long URI"); } private void checkExceptionOnIllegalDomainlabel(String uuri) { @@ -457,23 +465,21 @@ private void checkExceptionOnIllegalDomainlabel(String uuri) { // Expected exception. expectedException = true; } - assertTrue("Didn't get expected exception: " + uuri, - expectedException); + assertTrue(expectedException, "Didn't get expected exception: " + uuri); } /** * Test for doing separate DNS lookup for same host * * @see [ 788277 ] Doing separate DNS lookup for same host - * @throws URIException */ + @Test public final void testHostWithPeriod() throws URIException { UsableURI uuri1 = UsableURIFactory. getInstance("http://www.loc.gov./index.html"); UsableURI uuri2 = UsableURIFactory. getInstance("http://www.loc.gov/index.html"); - assertEquals("Failed equating hosts with dot", - uuri1.getHost(), uuri2.getHost()); + assertEquals(uuri1.getHost(), uuri2.getHost(), "Failed equating hosts with dot"); } /** @@ -482,12 +488,12 @@ public final void testHostWithPeriod() throws URIException { * @see [ 874220 ] NPE in java.net.URI.encode * @throws URIException */ + @Test public final void testHostEncodedChars() throws URIException { String s = "http://g.msn.co.kr/0nwkokr0/00/19??" + "PS=10274&NC=10009&CE=42&CP=949&HL=" + "���?��"; - assertNotNull("Encoded chars " + s, - UsableURIFactory.getInstance(s)); + assertNotNull(UsableURIFactory.getInstance(s), "Encoded chars " + s); } /** @@ -495,6 +501,7 @@ public final void testHostEncodedChars() throws URIException { * * See [ 927940 ] java.net.URI parses %20 but getHost null */ + @Test public final void testSpaceInHost() { boolean expectedException = false; try { @@ -504,7 +511,7 @@ public final void testSpaceInHost() { } catch (URIException e) { expectedException = true; } - assertTrue("Did not fail with escaped space.", expectedException); + assertTrue(expectedException, "Did not fail with escaped space."); expectedException = false; try { @@ -514,26 +521,27 @@ public final void testSpaceInHost() { } catch (URIException e) { expectedException = true; } - assertTrue("Did not fail with real space.", expectedException); + assertTrue(expectedException, "Did not fail with real space."); } /** * Test for java.net.URI chokes on hosts_with_underscores. * * @see [ 808270 ] java.net.URI chokes on hosts_with_underscores - * @throws URIException - */ + */ + @Test public final void testHostWithUnderscores() throws URIException { UsableURI uuri = UsableURIFactory.getInstance( "http://x_underscore_underscore.2u.com.tw/nonexistent_page.html"); - assertEquals("Failed get of host with underscore", - "x_underscore_underscore.2u.com.tw", uuri.getHost()); + assertEquals("x_underscore_underscore.2u.com.tw", + uuri.getHost(), "Failed get of host with underscore"); } /** * Two dots for igor. */ + @Test public final void testTwoDots() { boolean expectedException = false; try { @@ -542,20 +550,19 @@ public final void testTwoDots() { } catch (URIException e) { expectedException = true; } - assertTrue("Two dots did not throw exception", expectedException); + assertTrue(expectedException, "Two dots did not throw exception"); } /** * Test for java.net.URI#getHost fails when leading digit. * * @see [ 910120 ] java.net.URI#getHost fails when leading digit. - * @throws URIException */ + @Test public final void testHostWithDigit() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://0204chat.2u.com.tw/nonexistent_page.html"); - assertEquals("Failed get of host with digit", - "0204chat.2u.com.tw", uuri.getHost()); + assertEquals("0204chat.2u.com.tw", uuri.getHost(), "Failed get of host with digit"); } /** @@ -563,6 +570,7 @@ public final void testHostWithDigit() throws URIException { * * @see [ 949548 ] Constraining java URI class */ + @Test public final void testPort() { checkBadPort("http://www.tyopaikat.com:a/robots.txt"); checkBadPort("http://158.144.21.3:80808/robots.txt"); @@ -585,19 +593,18 @@ private void checkBadPort(String uri) { catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception: " + uri, exception); + assertTrue(exception, "Didn't throw exception: " + uri); } /** * Preserve userinfo capitalization. - * @throws URIException */ + @Test public final void testUserinfo() throws URIException { final String authority = "stack:StAcK@www.tyopaikat.com"; final String uri = "http://" + authority + "/robots.txt"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Authority not equal", uuri.getAuthority(), - authority); + assertEquals(authority, uuri.getAuthority(), "Authority not equal"); /* String tmp = uuri.toString(); assertTrue("URI not equal", tmp.equals(uri)); @@ -606,8 +613,8 @@ public final void testUserinfo() throws URIException { /** * Test user info + port - * @throws URIException */ + @Test public final void testUserinfoPlusPort() throws URIException { final String userInfo = "stack:StAcK"; final String authority = "www.tyopaikat.com"; @@ -615,14 +622,13 @@ public final void testUserinfoPlusPort() throws URIException { final String uri = "http://" + userInfo + "@" + authority + ":" + port + "/robots.txt"; UsableURI uuri = UsableURIFactory.getInstance(uri); - assertEquals("Host not equal", authority,uuri.getHost()); - assertEquals("Userinfo Not equal",userInfo,uuri.getUserinfo()); - assertEquals("Port not equal",port,uuri.getPort()); - assertEquals("Authority wrong","stack:StAcK@www.tyopaikat.com:8080", - uuri.getAuthority()); - assertEquals("AuthorityMinusUserinfo wrong","www.tyopaikat.com:8080", - uuri.getAuthorityMinusUserinfo()); - + assertEquals(authority, uuri.getHost(),"Host not equal"); + assertEquals(userInfo,uuri.getUserinfo(),"Userinfo Not equal"); + assertEquals(port,uuri.getPort(),"Port not equal"); + assertEquals("stack:StAcK@www.tyopaikat.com:8080",uuri.getAuthority(), + "Authority wrong"); + assertEquals("www.tyopaikat.com:8080",uuri.getAuthorityMinusUserinfo(), + "AuthorityMinusUserinfo wrong"); } public final void testRFC3986RelativeChange() throws URIException { @@ -658,9 +664,8 @@ public final void testRFC3986RelativeChange() throws URIException { * "../../" = "http://a/" * "../../g" = "http://a/g" * - * - * @throws URIException */ + @Test public final void testRFC3986Relative() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://a/b/c/d;p?q"); tryRelative(base, "g:h", "g:h"); @@ -691,9 +696,8 @@ public final void testRFC3986Relative() throws URIException { protected void tryRelative(UsableURI base, String relative, String expected) throws URIException { UsableURI uuri = UsableURIFactory.getInstance(base, relative); - assertEquals("Derelativized " + relative + " gave " - + uuri + " not " + expected, - UsableURIFactory.getInstance(expected),uuri); + assertEquals(UsableURIFactory.getInstance(expected), uuri,"Derelativized " + relative + " gave " + + uuri + " not " + expected); } /** @@ -724,9 +728,8 @@ protected void tryRelative(UsableURI base, String relative, String expected) * ../../ = http://a/ * ../../g = http://a/g * - * - * @throws URIException */ + @Test public final void testRFC2396Relative() throws URIException { UsableURI base = UsableURIFactory. getInstance("http://a/b/c/d;p?q"); @@ -758,13 +761,11 @@ public final void testRFC2396Relative() throws URIException { m.put("/../../../../../../../../g", "http://a/g"); m.put("../../../../../../../../g", "http://a/g"); m.put("../G", "http://a/b/G"); - for (Iterator i = m.keySet().iterator(); i.hasNext();) { - String key = (String)i.next(); - String value = (String)m.get(key); - UsableURI uuri = UsableURIFactory.getInstance(base, key); - assertTrue("Unexpected " + key + " " + value + " " + uuri, - uuri.equals(UsableURIFactory.getInstance(value))); - } + for (String key : m.keySet()) { + String value = m.get(key); + UsableURI uuri = UsableURIFactory.getInstance(base, key); + assertEquals(uuri, UsableURIFactory.getInstance(value), "Unexpected " + key + " " + value + " " + uuri); + } } /** @@ -772,14 +773,13 @@ public final void testRFC2396Relative() throws URIException { * unused and irrelevant for network fetches. * * See [ 970666 ] #anchor links not trimmed, and thus recrawled - * - * @throws URIException */ + @Test public final void testAnchors() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); - assertEquals("Not equal", "http://www.example.com/path?query", - uuri.toString()); + assertEquals("http://www.example.com/path?query", uuri.toString(), + "Not equal"); } @@ -787,50 +787,47 @@ public final void testAnchors() throws URIException { * Ensure that URI strings beginning with a colon are treated * the same as browsers do (as relative, rather than as absolute * with zero-length scheme). - * - * @throws URIException */ + @Test public void testStartsWithColon() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/path/page"); UsableURI uuri = UsableURIFactory.getInstance(base,":foo"); - assertEquals("derelativize starsWithColon", + assertEquals("http://www.example.com/path/:foo", uuri.getURI(), - "http://www.example.com/path/:foo"); + "derelativize starsWithColon"); } /** * Ensure that relative URIs with colons in late positions * aren't mistakenly interpreted as absolute URIs with long, * illegal schemes. - * - * @throws URIException */ + @Test public void testLateColon() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com/path/page"); UsableURI uuri1 = UsableURIFactory.getInstance(base,"example.html;jsessionid=deadbeef:deadbeed?parameter=this:value"); - assertEquals("derelativize lateColon", + assertEquals("http://www.example.com/path/example.html;jsessionid=deadbeef:deadbeed?parameter=this:value", uuri1.getURI(), - "http://www.example.com/path/example.html;jsessionid=deadbeef:deadbeed?parameter=this:value"); + "derelativize lateColon"); UsableURI uuri2 = UsableURIFactory.getInstance(base,"example.html?parameter=this:value"); - assertEquals("derelativize lateColon", + assertEquals("http://www.example.com/path/example.html?parameter=this:value", uuri2.getURI(), - "http://www.example.com/path/example.html?parameter=this:value"); + "derelativize lateColon"); } /** * Ensure that stray trailing '%' characters do not prevent * UURI instances from being created, and are reasonably * escaped when encountered. - * - * @throws URIException */ + @Test public void testTrailingPercents() throws URIException { String plainPath = "http://www.example.com/path%"; UsableURI plainPathUuri = UsableURIFactory.getInstance(plainPath); - assertEquals("plainPath getURI", plainPath, plainPathUuri.getURI()); - assertEquals("plainPath getEscapedURI", - "http://www.example.com/path%", // browsers don't escape '%' - plainPathUuri.getEscapedURI()); + assertEquals(plainPath, plainPathUuri.getURI(), "plainPath getURI"); + assertEquals("http://www.example.com/path%", + plainPathUuri.getEscapedURI(), // browsers don't escape '%' + "plainPath getEscapedURI"); String partiallyEscapedPath = "http://www.example.com/pa%20th%"; UsableURI partiallyEscapedPathUuri = UsableURIFactory.getInstance( @@ -839,9 +836,9 @@ public void testTrailingPercents() throws URIException { // "http://www.example.com/pa th%", // TODO: is this desirable? //// partiallyEscapedPath, // partiallyEscapedPathUuri.getURI()); - assertEquals("partiallyEscapedPath getEscapedURI", - "http://www.example.com/pa%20th%", - partiallyEscapedPathUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa%20th%", + partiallyEscapedPathUuri.getEscapedURI(), + "partiallyEscapedPath getEscapedURI"); String plainQueryString = "http://www.example.com/path?q=foo%"; UsableURI plainQueryStringUuri = UsableURIFactory.getInstance( @@ -849,58 +846,58 @@ public void testTrailingPercents() throws URIException { // assertEquals("plainQueryString getURI", // plainQueryString, // plainQueryStringUuri.getURI()); - assertEquals("plainQueryString getEscapedURI", - "http://www.example.com/path?q=foo%", - plainQueryStringUuri.getEscapedURI()); + assertEquals("http://www.example.com/path?q=foo%", + plainQueryStringUuri.getEscapedURI(), + "plainQueryString getEscapedURI"); String partiallyEscapedQueryString = "http://www.example.com/pa%20th?q=foo%"; UsableURI partiallyEscapedQueryStringUuri = UsableURIFactory.getInstance( partiallyEscapedQueryString); - assertEquals("partiallyEscapedQueryString getURI", - "http://www.example.com/pa th?q=foo%", - partiallyEscapedQueryStringUuri.getURI()); - assertEquals("partiallyEscapedQueryString getEscapedURI", - "http://www.example.com/pa%20th?q=foo%", - partiallyEscapedQueryStringUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa th?q=foo%", + partiallyEscapedQueryStringUuri.getURI(), + "partiallyEscapedQueryString getURI"); + assertEquals("http://www.example.com/pa%20th?q=foo%", + partiallyEscapedQueryStringUuri.getEscapedURI(), + "partiallyEscapedQueryString getEscapedURI"); } /** * Ensure that stray '%' characters do not prevent * UURI instances from being created, and are reasonably * escaped when encountered. - * - * @throws URIException */ + @Test public void testStrayPercents() throws URIException { String oneStray = "http://www.example.com/pa%th"; UsableURI oneStrayUuri = UsableURIFactory.getInstance(oneStray); - assertEquals("oneStray getURI", oneStray, oneStrayUuri.getURI()); - assertEquals("oneStray getEscapedURI", - "http://www.example.com/pa%th", // browsers don't escape '%' - oneStrayUuri.getEscapedURI()); + assertEquals(oneStray, oneStrayUuri.getURI(), "oneStray getURI"); + assertEquals("http://www.example.com/pa%th", + oneStrayUuri.getEscapedURI(), // browsers don't escape '%' + "oneStray getEscapedURI"); String precededByValidEscape = "http://www.example.com/pa%20th%way"; UsableURI precededByValidEscapeUuri = UsableURIFactory.getInstance( precededByValidEscape); - assertEquals("precededByValidEscape getURI", - "http://www.example.com/pa th%way", // getURI interprets escapes - precededByValidEscapeUuri.getURI()); - assertEquals("precededByValidEscape getEscapedURI", - "http://www.example.com/pa%20th%way", - precededByValidEscapeUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa th%way", + precededByValidEscapeUuri.getURI(), // getURI interprets escapes + "precededByValidEscape getURI"); + assertEquals("http://www.example.com/pa%20th%way", + precededByValidEscapeUuri.getEscapedURI(), + "precededByValidEscape getEscapedURI"); String followedByValidEscape = "http://www.example.com/pa%th%20way"; UsableURI followedByValidEscapeUuri = UsableURIFactory.getInstance( followedByValidEscape); - assertEquals("followedByValidEscape getURI", - "http://www.example.com/pa%th way", // getURI interprets escapes - followedByValidEscapeUuri.getURI()); - assertEquals("followedByValidEscape getEscapedURI", - "http://www.example.com/pa%th%20way", - followedByValidEscapeUuri.getEscapedURI()); + assertEquals("http://www.example.com/pa%th way", + followedByValidEscapeUuri.getURI(), // getURI interprets escapes + "followedByValidEscape getURI"); + assertEquals("http://www.example.com/pa%th%20way", + followedByValidEscapeUuri.getEscapedURI(), + "followedByValidEscape getEscapedURI"); } - + + @Test public void testEscapingNotNecessary() throws URIException { String escapesUnnecessary = "http://www.example.com/misc;reserved:chars@that&don't=need" @@ -908,42 +905,46 @@ public void testEscapingNotNecessary() throws URIException { // expect everything but the #fragment String expected = escapesUnnecessary.substring(0, escapesUnnecessary .length() - 3); - assertEquals("escapes unnecessary", - expected, - UsableURIFactory.getInstance(escapesUnnecessary).toString()); + assertEquals(expected, + UsableURIFactory.getInstance(escapesUnnecessary).toString(), + "escapes unnecessary"); } - + + @Test public void testIdn() throws URIException { // See http://www.josefsson.org/idn.php. // http://räksmörgås.josefßon.org/ String idn1 = "http://r\u00e4ksm\u00f6rg\u00e5s.josef\u00dfon.org/"; String puny1 = "http://xn--rksmrgs-5wao1o.josefsson.org/"; - assertEquals("encoding of " + idn1, puny1, UsableURIFactory - .getInstance(idn1).toString()); + assertEquals(puny1, UsableURIFactory + .getInstance(idn1).toString(), "encoding of " + idn1); // http://www.pølse.dk/ String idn2 = "http://www.p\u00f8lse.dk/"; String puny2 = "http://www.xn--plse-gra.dk/"; - assertEquals("encoding of " + idn2, puny2, UsableURIFactory - .getInstance(idn2).toString()); + assertEquals(puny2, UsableURIFactory + .getInstance(idn2).toString(), "encoding of " + idn2); // http://例子.測試 String idn3 = "http://\u4F8B\u5B50.\u6E2C\u8A66"; String puny3 = "http://xn--fsqu00a.xn--g6w251d/"; - assertEquals("encoding of " + idn3, puny3, UsableURIFactory - .getInstance(idn3).toString()); + assertEquals(puny3, UsableURIFactory + .getInstance(idn3).toString(), "encoding of " + idn3); } - + + @Test public void testNewLineInURL() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.ar\rchive\n." + "org/i\n\n\r\rndex.html"); assertEquals("http://www.archive.org/index.html", uuri.toString()); } - + + @Test public void testTabsInURL() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http://www.ar\tchive\t." + "org/i\t\r\n\tndex.html"); assertEquals("http://www.archive.org/index.html", uuri.toString()); } - + + @Test public void testQueryEscaping() throws URIException { UsableURI uuri = UsableURIFactory.getInstance( "http://www.yahoo.com/foo?somechars!@$%^&*()_-+={[}]|\'\";:/?.>,<"); @@ -957,50 +958,44 @@ public void testQueryEscaping() throws URIException { * Check that our 'normalization' does same as Nutch's * Below before-and-afters were taken from the nutch urlnormalizer-basic * TestBasicURLNormalizer class (December 2006, Nutch 0.9-dev). - * @throws URIException */ + @Test public void testSameAsNutchURLFilterBasic() throws URIException { - assertEquals(UsableURIFactory.getInstance(" http://foo.com/ ").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance(" http://foo.com/ ").toString()); // check that protocol is lower cased - assertEquals(UsableURIFactory.getInstance("HTTP://foo.com/").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("HTTP://foo.com/").toString()); // check that host is lower cased - assertEquals(UsableURIFactory. - getInstance("http://Foo.Com/index.html").toString(), - "http://foo.com/index.html"); - assertEquals(UsableURIFactory. - getInstance("http://Foo.Com/index.html").toString(), - "http://foo.com/index.html"); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://Foo.Com/index.html").toString()); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://Foo.Com/index.html").toString()); // check that port number is normalized - assertEquals(UsableURIFactory. - getInstance("http://foo.com:80/index.html").toString(), - "http://foo.com/index.html"); - assertEquals(UsableURIFactory.getInstance("http://foo.com:81/").toString(), - "http://foo.com:81/"); + assertEquals("http://foo.com/index.html", + UsableURIFactory.getInstance("http://foo.com:80/index.html").toString()); + assertEquals("http://foo.com:81/", + UsableURIFactory.getInstance("http://foo.com:81/").toString()); // check that null path is normalized - assertEquals(UsableURIFactory.getInstance("http://foo.com").toString(), - "http://foo.com/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com").toString()); // check that references are removed - assertEquals(UsableURIFactory. - getInstance("http://foo.com/foo.html#ref").toString(), - "http://foo.com/foo.html"); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/foo.html#ref").toString()); // // check that encoding is normalized // normalizeTest("http://foo.com/%66oo.html", "http://foo.com/foo.html"); // check that unnecessary "../" are removed - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../").toString(), - "http://foo.com/" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/../").toString(), - "http://foo.com/aa/"); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com/aa/../").toString()); + assertEquals("http://foo.com/aa/", + UsableURIFactory.getInstance("http://foo.com/aa/bb/../").toString()); /* We fail this one. Here we produce: 'http://foo.com/'. assertEquals(UURIFactory. @@ -1008,45 +1003,33 @@ public void testSameAsNutchURLFilterBasic() throws URIException { "http://foo.com/aa/.."); */ - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString(), - "http://foo.com/aa/foo.html"); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html"). - toString(), - "http://foo.com/aa/cc/ee/foo.html"); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../aa/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/aa/../bb/../foo.html/../../"). - toString(), - "http://foo.com/" ); - assertEquals(UsableURIFactory.getInstance("http://foo.com/../aa/foo.html"). - toString(), "http://foo.com/aa/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/../aa/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/a..a/foo.html").toString(), - "http://foo.com/a..a/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/a..a/../foo.html").toString(), - "http://foo.com/foo.html" ); - assertEquals(UsableURIFactory. - getInstance("http://foo.com/foo.foo/../foo.html").toString(), - "http://foo.com/foo.html" ); + assertEquals("http://foo.com/aa/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/bb/cc/../../foo.html").toString()); + assertEquals("http://foo.com/aa/cc/ee/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/bb/../cc/dd/../ee/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/aa/../../foo.html").toString()); + assertEquals("http://foo.com/", + UsableURIFactory.getInstance("http://foo.com/aa/../bb/../foo.html/../../").toString()); + assertEquals("http://foo.com/aa/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/../aa/../foo.html").toString()); + assertEquals("http://foo.com/a..a/foo.html", + UsableURIFactory.getInstance("http://foo.com/a..a/foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/a..a/../foo.html").toString()); + assertEquals("http://foo.com/foo.html", + UsableURIFactory.getInstance("http://foo.com/foo.foo/../foo.html").toString()); } - + + @Test public void testHttpSchemeColonSlash() { boolean exception = false; try { @@ -1054,16 +1037,17 @@ public void testHttpSchemeColonSlash() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); exception = false; try { UsableURIFactory.getInstance("http://"); } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); } - + + @Test public void testNakedHttpsSchemeColon() { boolean exception = false; try { @@ -1071,7 +1055,7 @@ public void testNakedHttpsSchemeColon() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); exception = false; try { UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); @@ -1079,15 +1063,14 @@ public void testNakedHttpsSchemeColon() { } catch (URIException e) { exception = true; } - assertTrue("Didn't throw exception when one expected", exception); + assertTrue(exception, "Didn't throw exception when one expected"); } /** * Test motivated by [#HER-616] The UURI class may throw * NullPointerException in getReferencedHost() - * - * @throws URIException */ + @Test public void testMissingHttpColon() throws URIException { String suspectUri = "http//www.test.foo"; UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); @@ -1099,7 +1082,7 @@ public void testMissingHttpColon() throws URIException { // should get relative-uri-no-base exception exceptionThrown = true; } finally { - assertTrue("expected exception not thrown",exceptionThrown); + assertTrue(exceptionThrown,"expected exception not thrown"); } UsableURI goodUuri = UsableURIFactory.getInstance(base,suspectUri); goodUuri.getReferencedHost(); @@ -1108,33 +1091,31 @@ public void testMissingHttpColon() throws URIException { /** * A UURI's string representation should be same after a * serialization roundtrip. - * - * @throws URIException */ + @Test public final void testSerializationRoundtrip() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); UsableURI uuri2 = (UsableURI) SerializationUtils.deserialize( SerializationUtils.serialize(uuri)); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); uuri = UsableURIFactory. getInstance("file://///boo_hoo/wwwroot/CMS/Images1/Banner.gif"); uuri2 = (UsableURI) SerializationUtils.deserialize( SerializationUtils.serialize(uuri)); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); } /** * A UURI's string representation should be same after a * toCustomString-getInstance roundtrip. - * - * @throws URIException */ + @Test public final void testToCustomStringRoundtrip() throws URIException { UsableURI uuri = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); UsableURI uuri2 = UsableURIFactory.getInstance(uuri.toCustomString()); - assertEquals("Not equal", uuri.toString(), uuri2.toString()); + assertEquals(uuri.toString(), uuri2.toString(), "Not equal"); // TODO: fix // see [HER-1470] UURI String roundtrip (UURIFactory.getInstance(uuri.toString()) results in different URI for file: (and perhaps other) URIs // http://webteam.archive.org/jira/browse/HER-1470 @@ -1147,9 +1128,8 @@ public final void testToCustomStringRoundtrip() throws URIException { /** * A UURI's string representation should be same after a * toCustomString-getInstance roundtrip. - * - * @throws URIException */ + @Test public final void testHostnamePortRoundtrip() throws URIException { UsableURI base = UsableURIFactory. getInstance("http://www.example.com/path?query#anchor"); @@ -1157,13 +1137,14 @@ public final void testHostnamePortRoundtrip() throws URIException { System.out.println("scheme:"+test.getScheme()); System.out.println(test.toCustomString()); UsableURI roundtrip = UsableURIFactory.getInstance(test.toCustomString()); - assertEquals("Not equal", test.toString(), roundtrip.toString()); + assertEquals(test.toString(), roundtrip.toString(), "Not equal"); } /** * Test bad port throws URIException not NumberFormatException */ + @Test public void testExtremePort() { try { UsableURI uuri = UsableURIFactory.getInstance("http://Tel.:010101010101"); @@ -1177,9 +1158,8 @@ public void testExtremePort() { /** * Bars ('|') in path-segments aren't encoded by FF, preferred by some * RESTful-URI-ideas guides, so should work without error. - * - * @throws URIException */ + @Test public void testBarsInRelativePath() throws URIException { UsableURI base = UsableURIFactory.getInstance("http://www.example.com"); String relative = "foo/bar|baz|yorple"; @@ -1191,9 +1171,8 @@ public void testBarsInRelativePath() throws URIException { * To match IE behavior, backslashes in path-info (really, anywhere before * query string) assumed to be slashes, to match IE behavior. In * query-string, they are escaped to %5C. - * - * @throws URIException */ + @Test public void testBackslashes() throws URIException { UsableURI uuri = UsableURIFactory.getInstance("http:\\/www.example.com\\a/b\\c/d?q\\r\\|s/t\\v"); String expected = "http://www.example.com/a/b/c/d?q%5Cr%5C|s/t%5Cv"; diff --git a/src/test/java/org/archive/url/UsableURITest.java b/src/test/java/org/archive/url/UsableURITest.java index 2aec0e96..161e215a 100644 --- a/src/test/java/org/archive/url/UsableURITest.java +++ b/src/test/java/org/archive/url/UsableURITest.java @@ -20,19 +20,20 @@ import java.net.URISyntaxException; -import org.apache.commons.httpclient.URIException; -import org.archive.url.UsableURI; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static org.junit.jupiter.api.Assertions.*; -public class UsableURITest extends TestCase { +public class UsableURITest { + @Test public void testHasScheme() { assertTrue(UsableURI.hasScheme("http://www.archive.org")); assertTrue(UsableURI.hasScheme("http:")); assertFalse(UsableURI.hasScheme("ht/tp://www.archive.org")); assertFalse(UsableURI.hasScheme("/tmp")); } - + + @Test public void testGetFileName() throws URISyntaxException { final String filename = "x.arc.gz"; assertEquals(filename, @@ -44,13 +45,42 @@ public void testGetFileName() throws URISyntaxException { UsableURI.parseFilename("rsync://archive.org/tmp/one.two/" + filename)); } - + + @Test public void testSchemalessRelative() throws URIException { UsableURI base = new UsableURI("http://www.archive.org/a", true, "UTF-8"); UsableURI relative = new UsableURI("//www.facebook.com/?href=http://www.archive.org/a", true, "UTF-8"); - assertEquals(null, relative.getScheme()); + assertNull(relative.getScheme()); assertEquals("www.facebook.com", relative.getAuthority()); UsableURI test = new UsableURI(base, relative); assertEquals("http://www.facebook.com/?href=http://www.archive.org/a", test.toString()); } + + /** + * Test of toUnicodeHostString method, of class UsableURI. + */ + @Test + public void testToUnicodeHostString() throws URIException { + assertEquals("http://øx.dk", new UsableURI("http://xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("xn--x-4ga.dk", new UsableURI("xn--x-4ga.dk", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user:pass@øx.dk:8080", new UsableURI("http://user:pass@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://user@øx.dk:8080", new UsableURI("http://user@xn--x-4ga.dk:8080", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://øx.dk/foo/bar?query=q", new UsableURI("http://xn--x-4ga.dk/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + assertEquals("http://127.0.0.1/foo/bar?query=q", new UsableURI("http://127.0.0.1/foo/bar?query=q", true, "UTF-8").toUnicodeHostString()); + + // test idn round trip + // XXX fails because idn is not handled here (it is converted to punycode in UsableURIFactory.fixupDomainlabel()) + // assertEquals("http://øx.dk", new UsableURI("http://øx.dk", false, "UTF-8").toUnicodeHostString()); + // To check the round trip it is then necessary to use the factory method in UsableURIFactory. + assertEquals("http://øx.dk/", UsableURIFactory.getInstance("http://øx.dk/", "UTF-8").toUnicodeHostString()); + + // non-idn domain name + assertEquals("http://example.org", new UsableURI("http://example.org", true, "UTF-8").toUnicodeHostString()); + + // ensure a call to toUnicodeHostString() has no effect on toString() + UsableURI uri = new UsableURI("http://xn--x-4ga.dk", true, "UTF-8"); + assertEquals("http://øx.dk", uri.toUnicodeHostString()); + uri.setPath(uri.getPath()); // force toString() cached value to be recomputed + assertEquals("http://xn--x-4ga.dk", uri.toString()); + } } diff --git a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java index 34bfe625..28a00422 100644 --- a/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java +++ b/src/test/java/org/archive/url/WaybackURLKeyMakerTest.java @@ -2,10 +2,13 @@ import java.net.URISyntaxException; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class WaybackURLKeyMakerTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class WaybackURLKeyMakerTest { + + @Test public void testMakeKey() throws URISyntaxException { WaybackURLKeyMaker km = new WaybackURLKeyMaker(); assertEquals("-", km.makeKey(null)); @@ -22,6 +25,16 @@ public void testMakeKey() throws URISyntaxException { assertEquals("org,archive)/goo", km.makeKey("http://archive.org/goo/?")); assertEquals("org,archive)/goo?a&b", km.makeKey("http://archive.org/goo/?b&a")); assertEquals("org,archive)/goo?a=1&a=2&b", km.makeKey("http://archive.org/goo/?a=2&b&a=1")); + assertEquals("org,archive)/", km.makeKey("http://archive.org:/")); + assertEquals("192,211,203,34)/robots.txt", km.makeKey("https://34.203.211.192/robots.txt")); + assertEquals("2600:1f18:200d:fb00:2b74:867c:ab0c:150a)/robots.txt", + km.makeKey("https://[2600:1f18:200d:fb00:2b74:867c:ab0c:150a]/robots.txt")); + assertEquals("ua,1kr)/newslist.html?tag=%e4%ee%f8%ea%ee%eb%fc%ed%ee%e5", + km.makeKey("http://1kr.ua/newslist.html?tag=%E4%EE%F8%EA%EE%EB%FC%ED%EE%E5")); + assertEquals("com,aluroba)/tags/%c3%ce%ca%c7%d1%e5%c7.htm", + km.makeKey("http://www.aluroba.com/tags/%C3%CE%CA%C7%D1%E5%C7.htm")); + assertEquals("ac,insbase)/xoops2/modules/xpwiki?%a4%d5%a4%af%a4%aa%a4%ab%b8%a9%a4%aa%a4%aa%a4%ce%a4%b8%a4%e7%a4%a6%bb%d4", + km.makeKey("https://www.insbase.ac/xoops2/modules/xpwiki/?%A4%D5%A4%AF%A4%AA%A4%AB%B8%A9%A4%AA%A4%AA%A4%CE%A4%B8%A4%E7%A4%A6%BB%D4")); } } diff --git a/src/test/java/org/archive/util/ArchiveUtilsTest.java b/src/test/java/org/archive/util/ArchiveUtilsTest.java index 8251615a..18337498 100644 --- a/src/test/java/org/archive/util/ArchiveUtilsTest.java +++ b/src/test/java/org/archive/util/ArchiveUtilsTest.java @@ -19,15 +19,16 @@ package org.archive.util; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; + import java.text.ParseException; import java.util.Date; import java.util.HashSet; import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; +import static org.junit.jupiter.api.Assertions.*; /** * JUnit test suite for ArchiveUtils @@ -35,47 +36,21 @@ * @author James Casey * @version $Id$ */ -public class ArchiveUtilsTest extends TestCase { - - /** - * Create a new ArchiveUtilsTest object - * - * @param testName the name of the test - */ - public ArchiveUtilsTest(final String testName) { - super(testName); - } - - /** - * run all the tests for ArchiveUtilsTest - * - * @param argv the command line arguments - */ - public static void main(String argv[]) { - junit.textui.TestRunner.run(suite()); - } - - /** - * return the suite of tests for ArchiveUtilsTest - * - * @return the suite of test - */ - public static Test suite() { - return new TestSuite(ArchiveUtilsTest.class); - } +public class ArchiveUtilsTest { /** check the getXXDigitDate() methods produce valid dates*/ + @Test public void testGetXXDigitDate() { // TODO - we only really test the date lengths here. How to test // other stuff well ? final String date12 = ArchiveUtils.get12DigitDate(); - assertEquals("12 digits", 12, date12.length()); + assertEquals(12, (Object) date12.length(), "12 digits"); final String date14 = ArchiveUtils.get14DigitDate(); - assertEquals("14 digits", 14, date14.length()); + assertEquals(14, (Object) date14.length(), "14 digits"); final String date17 = ArchiveUtils.get17DigitDate(); - assertEquals("17 digits", 17, date17.length()); + assertEquals(17, (Object) date17.length(), "17 digits"); // now parse, and check they're all within 1 minute @@ -93,18 +68,19 @@ public void testGetXXDigitDate() { } /** check that getXXDigitDate(long) does the right thing */ + @Test public void testGetXXDigitDateLong() { final long now = System.currentTimeMillis(); final String date12 = ArchiveUtils.get12DigitDate(now); - assertEquals("12 digits", 12, date12.length()); + assertEquals(12, (Object) date12.length(), "12 digits"); final String date14 = ArchiveUtils.get14DigitDate(now); - assertEquals("14 digits", 14, date14.length()); - assertEquals("first twelve digits same as date12", date12, date14.substring(0, 12)); + assertEquals(14, (Object) date14.length(), "14 digits"); + assertEquals(date12, date14.substring(0, 12), "first twelve digits same as date12"); final String date17 = ArchiveUtils.get17DigitDate(now); - assertEquals("17 digits", 17, date17.length()); - assertEquals("first twelve digits same as date12", date12, date17.substring(0, 12)); - assertEquals("first fourteen digits same as date14", date14, date17.substring(0, 14)); + assertEquals(17, (Object) date17.length(), "17 digits"); + assertEquals(date12, date17.substring(0, 12), "first twelve digits same as date12"); + assertEquals(date14, date17.substring(0, 14), "first fourteen digits same as date14"); } /** @@ -112,6 +88,7 @@ public void testGetXXDigitDateLong() { * * @throws ParseException */ + @Test public void testParseXXDigitDate() throws ParseException { // given a date, check it get resolved properly // It's 02 Jan 2004, 12:40:02.111 @@ -128,7 +105,8 @@ public void testParseXXDigitDate() throws ParseException { fail("Could not parse a date : " + e.getMessage()); } } - + + @Test public void testTooShortParseDigitDate() throws ParseException { String d = "X"; boolean b = false; @@ -157,6 +135,7 @@ public void testTooShortParseDigitDate() throws ParseException { } /** check that parse12DigitDate doesn't accept a bad date */ + @Test public void testBad12Date() { // now try a badly formed dates assertBad12DigitDate("a-stringy-digit-date"); @@ -166,6 +145,7 @@ public void testBad12Date() { /** * check that parse14DigitDate doesn't accept a bad date */ + @Test public void testBad14Date() { // now try a badly formed dates assertBad14DigitDate("a-stringy-digit-date"); @@ -175,6 +155,7 @@ public void testBad14Date() { /** * check that parse12DigitDate doesn't accept a bad date */ + @Test public void testBad17Date() { // now try a badly formed dates assertBad17DigitDate("a-stringy-digit-date"); @@ -184,27 +165,30 @@ public void testBad17Date() { } /** check that padTo(String) works */ + @Test public void testPadToString() { - assertEquals("pad to one (smaller)", "foo", ArchiveUtils.padTo("foo", 1)); - assertEquals("pad to 0 (no sense)", "foo", ArchiveUtils.padTo("foo", 0)); - assertEquals("pad to neg (nonsense)", "foo", ArchiveUtils.padTo("foo", 0)); - assertEquals("pad to 4", " foo", ArchiveUtils.padTo("foo", 4)); - assertEquals("pad to 10", " foo", ArchiveUtils.padTo("foo", 10)); + assertEquals("foo", ArchiveUtils.padTo("foo", 1), "pad to one (smaller)"); + assertEquals("foo", ArchiveUtils.padTo("foo", 0), "pad to 0 (no sense)"); + assertEquals("foo", ArchiveUtils.padTo("foo", 0), "pad to neg (nonsense)"); + assertEquals(" foo", ArchiveUtils.padTo("foo", 4), "pad to 4"); + assertEquals(" foo", ArchiveUtils.padTo("foo", 10), "pad to 10"); } /** * check that padTo(int) works */ + @Test public void testPadToInt() { - assertEquals("pad to one (smaller)", "123", ArchiveUtils.padTo(123, 1)); - assertEquals("pad to 0 (no sense)", "123", ArchiveUtils.padTo(123, 0)); - assertEquals("pad to neg (nonsense)", "123", ArchiveUtils.padTo(123, 0)); - assertEquals("pad to 4", " 123", ArchiveUtils.padTo(123, 4)); - assertEquals("pad to 10", " 123", ArchiveUtils.padTo(123, 10)); - assertEquals("pad -123 to 10", " -123", ArchiveUtils.padTo(-123, 10)); + assertEquals("123", ArchiveUtils.padTo(123, 1), "pad to one (smaller)"); + assertEquals("123", ArchiveUtils.padTo(123, 0), "pad to 0 (no sense)"); + assertEquals("123", ArchiveUtils.padTo(123, 0), "pad to neg (nonsense)"); + assertEquals(" 123", ArchiveUtils.padTo(123, 4), "pad to 4"); + assertEquals(" 123", ArchiveUtils.padTo(123, 10), "pad to 10"); + assertEquals(" -123", ArchiveUtils.padTo(-123, 10), "pad -123 to 10"); } /** check that byteArrayEquals() works */ + @Test public void testByteArrayEquals() { // foo == foo2, foo != bar, foo != bar2 byte[] foo = new byte[10], bar = new byte[20]; @@ -214,59 +198,52 @@ public void testByteArrayEquals() { foo[i] = foo2[i] = bar[i] = i; bar2[i] = (byte)(01 + i); } - assertTrue("two nulls", ArchiveUtils.byteArrayEquals(null, null)); - assertFalse("lhs null", ArchiveUtils.byteArrayEquals(null, foo)); - assertFalse("rhs null", ArchiveUtils.byteArrayEquals(foo, null)); + assertTrue(ArchiveUtils.byteArrayEquals(null, null), "two nulls"); + assertFalse(ArchiveUtils.byteArrayEquals(null, foo), "lhs null"); + assertFalse(ArchiveUtils.byteArrayEquals(foo, null), "rhs null"); // now check with same length, with same (foo2) and different (bar2) // contents - assertFalse("different lengths", ArchiveUtils.byteArrayEquals(foo, bar)); + assertFalse(ArchiveUtils.byteArrayEquals(foo, bar), "different lengths"); - assertTrue("same to itself", ArchiveUtils.byteArrayEquals(foo, foo)); - assertTrue("same contents", ArchiveUtils.byteArrayEquals(foo, foo2)); - assertFalse("different contents", ArchiveUtils.byteArrayEquals(foo, bar2)); + assertTrue(ArchiveUtils.byteArrayEquals(foo, foo), "same to itself"); + assertTrue(ArchiveUtils.byteArrayEquals(foo, foo2), "same contents"); + assertFalse(ArchiveUtils.byteArrayEquals(foo, bar2), "different contents"); } /** test doubleToString() */ + @Test public void testDoubleToString(){ - double test = 12.345; - assertTrue( - "cecking zero precision", - ArchiveUtils.doubleToString(test, 0).equals("12")); - assertTrue( - "cecking 2 character precision", - ArchiveUtils.doubleToString(test, 2).equals("12.34")); - assertTrue( - "cecking precision higher then the double has", - ArchiveUtils.doubleToString(test, 65).equals("12.345")); + double test = 12.121d; + assertEquals("12", ArchiveUtils.doubleToString(test, 0), "cecking zero precision"); + assertEquals("12.12", ArchiveUtils.doubleToString(test, 2), "cecking 2 character precision"); + assertEquals("12.121", ArchiveUtils.doubleToString(test, 65), "cecking precision higher then the double has"); } + @Test public void testFormatBytesForDisplayPrecise(){ - assertEquals("formating negative number", "0 B", ArchiveUtils - .formatBytesForDisplay(-1)); - assertEquals("0 bytes", "0 B", ArchiveUtils - .formatBytesForDisplay(0)); - assertEquals("1 B", ArchiveUtils.formatBytesForDisplay(1)); - assertEquals("9 B", ArchiveUtils.formatBytesForDisplay(9)); - assertEquals("512 B", ArchiveUtils.formatBytesForDisplay(512)); - assertEquals("1023 bytes", "1,023 B", ArchiveUtils - .formatBytesForDisplay(1023)); - assertEquals("1025 bytes", "1.0 KiB", ArchiveUtils - .formatBytesForDisplay(1025)); + assertEquals("0 B", ArchiveUtils + .formatBytesForDisplay(-1), "formating negative number"); + assertEquals("0 B", ArchiveUtils + .formatBytesForDisplay(0), "0 bytes"); + Object a2 = ArchiveUtils.formatBytesForDisplay(1); + assertEquals("1 B", a2); + Object a1 = ArchiveUtils.formatBytesForDisplay(9); + assertEquals("9 B", a1); + Object a = ArchiveUtils.formatBytesForDisplay(512); + assertEquals( "512 B", a); + assertEquals("1,023 B", ArchiveUtils + .formatBytesForDisplay(1023), "1023 bytes"); + assertEquals("1.0 KiB", ArchiveUtils + .formatBytesForDisplay(1025), "1025 bytes"); // expected display values taken from Google calculator - assertEquals("10,000 bytes", "9.8 KiB", - ArchiveUtils.formatBytesForDisplay(10000)); - assertEquals("1,000,000 bytes", "977 KiB", - ArchiveUtils.formatBytesForDisplay(1000000)); - assertEquals("100,000,000 bytes", "95 MiB", - ArchiveUtils.formatBytesForDisplay(100000000)); - assertEquals("100,000,000,000 bytes", "93 GiB", - ArchiveUtils.formatBytesForDisplay(100000000000L)); - assertEquals("100,000,000,000,000 bytes", "91 TiB", - ArchiveUtils.formatBytesForDisplay(100000000000000L)); - assertEquals("100,000,000,000,000,000 bytes", "90,949 TiB", - ArchiveUtils.formatBytesForDisplay(100000000000000000L)); + assertEquals("9.8 KiB", ArchiveUtils.formatBytesForDisplay(10000), "10,000 bytes"); + assertEquals("977 KiB", ArchiveUtils.formatBytesForDisplay(1000000), "1,000,000 bytes"); + assertEquals("95 MiB", ArchiveUtils.formatBytesForDisplay(100000000), "100,000,000 bytes"); + assertEquals("93 GiB", ArchiveUtils.formatBytesForDisplay(100000000000L), "100,000,000,000 bytes"); + assertEquals("91 TiB", ArchiveUtils.formatBytesForDisplay(100000000000000L), "100,000,000,000,000 bytes"); + assertEquals("90,949 TiB", ArchiveUtils.formatBytesForDisplay(100000000000000000L), "100,000,000,000,000,000 bytes"); } /* @@ -318,11 +295,12 @@ private void assertBad17DigitDate(final String date) { /** check that two longs are within a given delta */ private void assertClose(String desc, long date1, long date2, long delta) { - assertTrue(desc, date1 == date2 || + assertTrue(date1 == date2 || (date1 < date2 && date2 < (date1 + delta)) || - (date2 < date1 && date1 < (date2 + delta))); + (date2 < date1 && date1 < (date2 + delta)), desc); } - + + @Test public void testArrayToLong() { testOneArrayToLong(-1); testOneArrayToLong(1); @@ -336,19 +314,23 @@ private void testOneArrayToLong(final long testValue) { final long l = ArchiveUtils.byteArrayIntoLong(a, 0); assertEquals(testValue, l); } - + + @Test public void testSecondsSinceEpochCalculation() throws ParseException { - assertEquals(ArchiveUtils.secondsSinceEpoch("20010909014640"), - "1000000000"); - assertEquals(ArchiveUtils.secondsSinceEpoch("20010909014639"), - "0999999999"); - assertEquals(ArchiveUtils.secondsSinceEpoch("19700101"), - "0000000000"); - assertEquals(ArchiveUtils.secondsSinceEpoch("2005"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("200501"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("20050101"), "1104537600"); - assertEquals(ArchiveUtils.secondsSinceEpoch("2005010100"), - "1104537600"); + String m6 = ArchiveUtils.secondsSinceEpoch("20010909014640"); + assertEquals("1000000000", m6); + String m5 = ArchiveUtils.secondsSinceEpoch("20010909014639"); + assertEquals("0999999999", m5); + String m4 = ArchiveUtils.secondsSinceEpoch("19700101"); + assertEquals("0000000000", m4); + String m3 = ArchiveUtils.secondsSinceEpoch("2005"); + assertEquals("1104537600", m3); + String m2 = ArchiveUtils.secondsSinceEpoch("200501"); + assertEquals("1104537600", m2); + String m1 = ArchiveUtils.secondsSinceEpoch("20050101"); + assertEquals("1104537600", m1); + String m = ArchiveUtils.secondsSinceEpoch("2005010100"); + assertEquals("1104537600", m); boolean eThrown = false; try { ArchiveUtils.secondsSinceEpoch("20050"); @@ -357,10 +339,13 @@ public void testSecondsSinceEpochCalculation() throws ParseException { } assertTrue(eThrown); } - - public static void testZeroPadInteger() { - assertEquals(ArchiveUtils.zeroPadInteger(1), "0000000001"); - assertEquals(ArchiveUtils.zeroPadInteger(1000000000), "1000000000"); + + @Test + public void testZeroPadInteger() { + String m1 = ArchiveUtils.zeroPadInteger(1); + assertEquals("0000000001", m1); + String m = ArchiveUtils.zeroPadInteger(1000000000); + assertEquals("1000000000", m); } /** @@ -368,7 +353,9 @@ public static void testZeroPadInteger() { * * @throws InterruptedException */ - public static void testDateFormatConcurrency() throws InterruptedException { + @Test + @EnabledIfSystemProperty(named = "runSlowTests", matches = "true") + public void testDateFormatConcurrency() throws InterruptedException { final int COUNT = 1000; Thread [] ts = new Thread[COUNT]; final Semaphore allDone = new Semaphore(-COUNT+1); @@ -400,24 +387,29 @@ public void run() { while(!ts[i].isAlive()) /* Wait for thread to spin up*/; } allDone.acquire(); // wait for all threads to finish - assertEquals(failures.get()+" format mismatches",0,failures.get()); + String m = failures.get()+" format mismatches"; + assertEquals(0, (Object) failures.get(), m); } - + + @Test public void testIsTld() { - assertTrue("TLD test problem", ArchiveUtils.isTld("com")); - assertTrue("TLD test problem", ArchiveUtils.isTld("COM")); + assertTrue(ArchiveUtils.isTld("com"), "TLD test problem"); + assertTrue(ArchiveUtils.isTld("COM"), "TLD test problem"); } - + + @Test public void testUnique17() { HashSet uniqueTimestamps = new HashSet(); for(int i = 0; i<10; i++) { - assertTrue("timestamp17 repeated",uniqueTimestamps.add(ArchiveUtils.getUnique17DigitDate())); + assertTrue(uniqueTimestamps.add(ArchiveUtils.getUnique17DigitDate()),"timestamp17 repeated"); } } + + @Test public void testUnique14() { HashSet uniqueTimestamps = new HashSet(); for(int i = 0; i<10; i++) { - assertTrue("timestamp14 repeated",uniqueTimestamps.add(ArchiveUtils.getUnique14DigitDate())); + assertTrue(uniqueTimestamps.add(ArchiveUtils.getUnique14DigitDate()),"timestamp14 repeated"); } } } diff --git a/src/test/java/org/archive/util/ByteOpTest.java b/src/test/java/org/archive/util/ByteOpTest.java index de6a164f..eb89353e 100644 --- a/src/test/java/org/archive/util/ByteOpTest.java +++ b/src/test/java/org/archive/util/ByteOpTest.java @@ -4,23 +4,25 @@ import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.IOException; - -import org.archive.util.ByteOp; +import java.util.Locale; import com.google.common.io.LittleEndianDataOutputStream; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; -public class ByteOpTest extends TestCase { +public class ByteOpTest { + @Test public void testReadShort() throws IOException { byte a[] = new byte[]{0,1,2,3}; ByteArrayInputStream bais = new ByteArrayInputStream(a); int bos = ByteOp.readShort(bais); - System.out.format("BO.Read short(%d)\n", bos); + System.out.format(Locale.ROOT, "BO.Read short(%d)\n", bos); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(a)); int disv = dis.readUnsignedShort(); - System.out.format("DI.Read short(%d)\n", disv); + System.out.format(Locale.ROOT, "DI.Read short(%d)\n", disv); for(int i = 0; i < 256 * 256; i++) { ByteArrayOutputStream baos = new ByteArrayOutputStream(2); LittleEndianDataOutputStream dos = new LittleEndianDataOutputStream(baos); @@ -31,6 +33,7 @@ public void testReadShort() throws IOException { } } + @Test public void testAppend() { byte a[] = new byte[]{1}; byte b[] = new byte[]{2}; @@ -48,8 +51,4 @@ public void testAppend() { assertEquals(5,n2[4]); } - - public void testReadInt() { - } - } diff --git a/src/test/java/org/archive/util/CrossProductTest.java b/src/test/java/org/archive/util/CrossProductTest.java index edadb859..a487ab15 100644 --- a/src/test/java/org/archive/util/CrossProductTest.java +++ b/src/test/java/org/archive/util/CrossProductTest.java @@ -2,12 +2,12 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; -import org.archive.util.CrossProduct; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +public class CrossProductTest { -public class CrossProductTest extends TestCase { private void dumpC(List a) { StringBuilder sb = new StringBuilder(); boolean first = false; @@ -21,15 +21,20 @@ private void dumpC(List a) { } System.out.println("Dump:" + sb.toString()); } + private void dumpLOL(List> coc) { for(List co : coc) { dumpC(co); } } + + @Test public void testVersion() { String version = IAUtils.loadCommonsVersion(); - System.out.format("Loaded version(%s)\n", version); + System.out.format(Locale.ROOT, "Loaded version(%s)\n", version); } + + @Test public void testCrossProduct() { ArrayList> input = new ArrayList>(); CrossProduct xp = new CrossProduct(); @@ -40,6 +45,7 @@ public void testCrossProduct() { List> cross = xp.crossProduct(input); dumpLOL(cross); } + private List AtoL(Object... a) { ArrayList al = new ArrayList(a.length); for(Object s : a) { diff --git a/src/test/java/org/archive/util/FileUtilsTest.java b/src/test/java/org/archive/util/FileUtilsTest.java new file mode 100644 index 00000000..51c416f0 --- /dev/null +++ b/src/test/java/org/archive/util/FileUtilsTest.java @@ -0,0 +1,296 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.LongRange; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + + +/** + * FileUtils tests. + * + * @author stack + * @author gojomo + * @version $Date$, $Revision$ + */ +public class FileUtilsTest { + private String srcDirName = FileUtilsTest.class.getName() + ".srcdir"; + private File srcDirFile = null; + private String tgtDirName = FileUtilsTest.class.getName() + ".tgtdir"; + private File tgtDirFile = null; + + protected File zeroLengthLinesUnix; + protected File zeroLengthLinesWindows; + + protected File smallLinesUnix; + protected File smallLinesWindows; + protected File largeLinesUnix; + protected File largeLinesWindows; + protected File nakedLastLineUnix; + protected File nakedLastLineWindows; + + @TempDir + Path tempDir; + + @BeforeEach + protected void setUp() throws Exception { + this.srcDirFile = new File(tempDir.toFile(), srcDirName); + FileUtils.ensureWriteableDirectory(srcDirFile); + this.tgtDirFile = new File(tempDir.toFile(), tgtDirName); + FileUtils.ensureWriteableDirectory(tgtDirFile); + addFiles(); + + zeroLengthLinesUnix = setUpLinesFile("zeroLengthLinesUnix",0,0,400,IOUtils.LINE_SEPARATOR_UNIX); + zeroLengthLinesWindows = setUpLinesFile("zeroLengthLinesUnix",0,0,400,IOUtils.LINE_SEPARATOR_WINDOWS); + + smallLinesUnix = setUpLinesFile("smallLinesUnix", 0, 25, 400, IOUtils.LINE_SEPARATOR_UNIX); + smallLinesWindows = setUpLinesFile("smallLinesWindows", 0, 25, 400, IOUtils.LINE_SEPARATOR_WINDOWS); + largeLinesUnix = setUpLinesFile("largeLinesUnix", 128, 256, 5, IOUtils.LINE_SEPARATOR_UNIX); + largeLinesWindows = setUpLinesFile("largeLinesWindows", 128, 256, 4096, IOUtils.LINE_SEPARATOR_WINDOWS); + + nakedLastLineUnix = setUpLinesFile("nakedLastLineUnix", 0, 50, 401, IOUtils.LINE_SEPARATOR_UNIX); + org.apache.commons.io.FileUtils.writeStringToFile(nakedLastLineUnix,"a"); + nakedLastLineWindows = setUpLinesFile("nakedLastLineWindows", 0, 50, 401, IOUtils.LINE_SEPARATOR_WINDOWS); + org.apache.commons.io.FileUtils.writeStringToFile(nakedLastLineWindows,"a"); + } + + private void addFiles() throws IOException { + addFiles(3, FileUtilsTest.class.getName()); + } + + private void addFiles(final int howMany, final String baseName) + throws IOException { + for (int i = 0; i < howMany; i++) { + File.createTempFile(baseName, null, this.srcDirFile); + } + } + + private File setUpLinesFile(String name, int minLineSize, int maxLineSize, int lineCount, String lineEnding) throws IOException { + List lines = new LinkedList(); + StringBuilder sb = new StringBuilder(maxLineSize); + for(int i = 0; i< lineSize; j++) { + sb.append("-"); + } + lines.add(sb.toString()); + } + File file = File.createTempFile(name, null); + org.apache.commons.io.FileUtils.writeLines(file, lines, lineEnding); + return file; + + } + + @AfterEach + protected void tearDown() throws Exception { + org.apache.commons.io.FileUtils.deleteQuietly(this.srcDirFile); + org.apache.commons.io.FileUtils.deleteQuietly(this.tgtDirFile); + org.apache.commons.io.FileUtils.deleteQuietly(zeroLengthLinesUnix); + org.apache.commons.io.FileUtils.deleteQuietly(zeroLengthLinesWindows); + org.apache.commons.io.FileUtils.deleteQuietly(smallLinesUnix); + org.apache.commons.io.FileUtils.deleteQuietly(smallLinesWindows); + org.apache.commons.io.FileUtils.deleteQuietly(largeLinesUnix); + org.apache.commons.io.FileUtils.deleteQuietly(largeLinesWindows); + org.apache.commons.io.FileUtils.deleteQuietly(nakedLastLineUnix); + org.apache.commons.io.FileUtils.deleteQuietly(nakedLastLineWindows); + + } + + @Test + public void testCopyFile() { + // Test exception copying nonexistent file. + File [] srcFiles = this.srcDirFile.listFiles(); + srcFiles[0].delete(); + IOException e = null; + try { + FileUtils.copyFile(srcFiles[0], + new File(this.tgtDirFile, srcFiles[0].getName())); + } catch (IOException ioe) { + e = ioe; + } + assertNotNull(e, "Didn't get expected IOE"); + } + + @Test + public void testTailLinesZeroLengthUnix() throws IOException { + verifyTailLines(zeroLengthLinesUnix); + } + + @Test + public void testTailLinesZeroLengthWindows() throws IOException { + verifyTailLines(zeroLengthLinesWindows); + } + + @Test + public void testTailLinesSmallUnix() throws IOException { + verifyTailLines(smallLinesUnix); + } + + @Test + public void testTailLinesLargeUnix() throws IOException { + verifyTailLines(largeLinesUnix); + } + + @Test + public void testTailLinesSmallWindows() throws IOException { + verifyTailLines(smallLinesWindows); + } + + @Test + public void testTailLinesLargeWindows() throws IOException { + verifyTailLines(largeLinesWindows); + } + + @Test + public void testTailLinesNakedUnix() throws IOException { + verifyTailLines(nakedLastLineUnix); + } + + @Test + public void testTailLinesNakedWindows() throws IOException { + verifyTailLines(nakedLastLineWindows); + } + + private void verifyTailLines(File file) throws IOException { + List lines = org.apache.commons.io.FileUtils.readLines(file); + verifyTailLines(file, lines, 1, 80); + verifyTailLines(file, lines, 5, 80); + verifyTailLines(file, lines, 10, 80); + verifyTailLines(file, lines, 20, 80); + verifyTailLines(file, lines, 100, 80); + verifyTailLines(file, lines, 1, 1); + verifyTailLines(file, lines, 5, 1); + verifyTailLines(file, lines, 10, 1); + verifyTailLines(file, lines, 20, 1); + verifyTailLines(file, lines, 100, 1); + } + + + private void verifyTailLines(File file, List lines, int count, int estimate) throws IOException { + List testLines; + testLines = getTestTailLines(file,count,estimate); + assertEquals(lines.size(),testLines.size(),"line counts not equal:"+file.getName()+" "+count+" "+estimate); + assertEquals(lines,testLines,"lines not equal: "+file.getName()+" "+count+" "+estimate); + } + + private List getTestTailLines(File file, int count, int estimate) throws IOException { + long pos = -1; + List testLines = new LinkedList(); + do { + List returnedLines = new LinkedList(); + LongRange range = FileUtils.pagedLines(file,pos,-count,returnedLines,estimate); + Collections.reverse(returnedLines); + testLines.addAll(returnedLines); + pos = range.getMinimum()-1; + } while (pos>=0); + Collections.reverse(testLines); + return testLines; + } + + @Test + public void testHeadLinesZeroLengthUnix() throws IOException { + verifyHeadLines(zeroLengthLinesUnix); + } + + @Test + public void testHeadLinesZeroLengthWindows() throws IOException { + verifyHeadLines(zeroLengthLinesWindows); + } + + @Test + public void testHeadLinesSmallUnix() throws IOException { + verifyHeadLines(smallLinesUnix); + } + + @Test + public void testHeadLinesLargeUnix() throws IOException { + verifyHeadLines(largeLinesUnix); + } + + @Test + public void testHeadLinesSmallWindows() throws IOException { + verifyHeadLines(smallLinesWindows); + } + + @Test + public void testHeadLinesLargeWindows() throws IOException { + verifyHeadLines(largeLinesWindows); + } + + @Test + public void testHeadLinesNakedUnix() throws IOException { + verifyHeadLines(nakedLastLineUnix); + } + + @Test + public void testHeadLinesNakedWindows() throws IOException { + verifyHeadLines(nakedLastLineWindows); + } + + + private void verifyHeadLines(File file) throws IOException { + List lines = org.apache.commons.io.FileUtils.readLines(file); + verifyHeadLines(file, lines, 1, 80); + verifyHeadLines(file, lines, 5, 80); + verifyHeadLines(file, lines, 10, 80); + verifyHeadLines(file, lines, 20, 80); + verifyHeadLines(file, lines, 100, 80); + verifyHeadLines(file, lines, 1, 1); + verifyHeadLines(file, lines, 5, 1); + verifyHeadLines(file, lines, 10, 1); + verifyHeadLines(file, lines, 20, 1); + verifyHeadLines(file, lines, 100, 1); + } + + + private void verifyHeadLines(File file, List lines, int count, int estimate) throws IOException { + List testLines; + testLines = getTestHeadLines(file,count,estimate); + assertEquals(lines.size(),testLines.size(),"line counts not equal:"+file.getName()+" "+count+" "+estimate); + assertEquals(lines,testLines,"lines not equal: "+file.getName()+" "+count+" "+estimate); + } + + private List getTestHeadLines(File file, int count, int estimate) throws IOException { + long pos = 0; + List testLines = new LinkedList(); + do { + LongRange range = FileUtils.pagedLines(file,pos,count,testLines,estimate); + pos = range.getMaximum(); + } while (pos q = new LinkedBlockingQueue(); Thread t = tryMatchInThread(INPUT, BACKTRACKER, q); Thread.sleep(1000); t.interrupt(); - Object result = q.take(); - assertTrue("mismatch uncompleted",Boolean.FALSE.equals(result)); + Object result = q.take(); + assertEquals(Boolean.FALSE, result, "mismatch uncompleted"); } - + + @Test public void testInterruptibility() throws InterruptedException { - BlockingQueue q = new LinkedBlockingQueue(); - Thread t = tryMatchInThread(new InterruptibleCharSequence(INPUT), BACKTRACKER, q); - Thread.sleep(500); - t.interrupt(); - Object result = q.take(); - if(result instanceof Boolean) { - System.err.println(result+" match beat interrupt"); + long sleepMillis = 512; + while (sleepMillis > 0) { + BlockingQueue q = new LinkedBlockingQueue(); + Thread t = tryMatchInThread(new InterruptibleCharSequence(INPUT), BACKTRACKER, q); + Thread.sleep(sleepMillis); + if (t.getState() == Thread.State.TERMINATED) { + sleepMillis /= 2; + System.err.println("already done, retrying with shorter sleep time: " + sleepMillis + "ms"); + continue; + } + t.interrupt(); + Object result = q.take(); + if(result instanceof Boolean) { + System.err.println(result+" match beat interrupt"); + } + assertTrue(result instanceof RuntimeException,"exception not thrown"); + return; } - assertTrue("exception not thrown",result instanceof RuntimeException); + fail("failed to interrupt InterruptibleCharSequence with given sleeping intervals"); } } diff --git a/src/test/java/org/archive/util/MimetypeUtilsTest.java b/src/test/java/org/archive/util/MimetypeUtilsTest.java new file mode 100644 index 00000000..1ed19616 --- /dev/null +++ b/src/test/java/org/archive/util/MimetypeUtilsTest.java @@ -0,0 +1,68 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.archive.util; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author stack + * @version $Date$, $Revision$ + */ +public class MimetypeUtilsTest { + + @Test + public void testStraightTruncate() { + assertTrue(MimetypeUtils.truncate("text/html").equals("text/html"), + "Straight broken"); + } + + @Test + public void testWhitespaceTruncate() { + assertTrue(MimetypeUtils.truncate(null).equals("no-type"), + "Null broken"); + assertTrue(MimetypeUtils.truncate("").equals("no-type"), + "Empty broken"); + assertTrue(MimetypeUtils.truncate(" ").equals("no-type"), + "Tab broken"); + assertTrue(MimetypeUtils.truncate(" ").equals("no-type"), + "Multispace broken"); + assertTrue(MimetypeUtils.truncate("\n").equals("no-type"), + "NL broken"); + } + + @Test + public void testCommaTruncate() { + assertTrue(MimetypeUtils.truncate("text/html,text/html").equals("text/html"), + "Comma broken"); + assertTrue(MimetypeUtils.truncate("text/html, text/html"). + equals("text/html"), + "Comma space broken"); + assertTrue(MimetypeUtils.truncate("text/html;charset=iso9958-1"). + equals("text/html"), + "Charset broken"); + assertTrue(MimetypeUtils.truncate("text/html; charset=iso9958-1"). + equals("text/html"), + "Charset space broken"); + assertTrue(MimetypeUtils. + truncate("text/html, text/html; charset=iso9958-1"). + equals("text/html"), "dbl text/html space charset broken"); + } +} diff --git a/src/main/java/org/archive/io/GZIPMembersInputStream.java b/src/test/java/org/archive/util/PropertyUtilsTest.java similarity index 53% rename from src/main/java/org/archive/io/GZIPMembersInputStream.java rename to src/test/java/org/archive/util/PropertyUtilsTest.java index 35fb9e90..7f703ee5 100644 --- a/src/main/java/org/archive/io/GZIPMembersInputStream.java +++ b/src/test/java/org/archive/util/PropertyUtilsTest.java @@ -16,23 +16,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.archive.io; + +package org.archive.util; + import java.io.IOException; -import java.io.InputStream; +import java.util.Properties; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + /** - * @deprecated use {@link org.archive.util.zip.GZIPMembersInputStream} + * PropertyUtils tests. + * + * @author gojomo + * @version $Date: 2009-11-19 14:39:53 -0800 (Thu, 19 Nov 2009) $, $Revision: 6674 $ */ -@Deprecated -public class GZIPMembersInputStream extends org.archive.util.zip.GZIPMembersInputStream { +public class PropertyUtilsTest { - public GZIPMembersInputStream(InputStream in) throws IOException { - super(in); - } - - public GZIPMembersInputStream(InputStream in, int size) throws IOException { - super(in, size); + @Test + public void testSimpleInterpolate() throws IOException { + Properties props = new Properties(); + props.put("foo", "OOF"); + props.put("bar","RAB"); + String original = "FOO|${foo} BAR|${bar}"; + String expected = "FOO|OOF BAR|RAB"; + assertEquals(expected,PropertyUtils.interpolateWithProperties(original,props),"interpalation problem"); } - -} \ No newline at end of file +} diff --git a/src/test/java/org/archive/util/StringFieldExtractorTest.java b/src/test/java/org/archive/util/StringFieldExtractorTest.java index 5f0b4464..7ecb4279 100644 --- a/src/test/java/org/archive/util/StringFieldExtractorTest.java +++ b/src/test/java/org/archive/util/StringFieldExtractorTest.java @@ -2,10 +2,13 @@ import org.archive.util.StringFieldExtractor.StringTuple; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class StringFieldExtractorTest extends TestCase { +import static org.junit.jupiter.api.Assertions.assertEquals; +public class StringFieldExtractorTest { + + @Test public void testExtract() { StringFieldExtractor ex1 = new StringFieldExtractor(' ', 0); StringFieldExtractor ex2 = new StringFieldExtractor(' ', 1); @@ -29,7 +32,8 @@ private void checkSplit(String f, String s,StringTuple t) { assertEquals(f,t.first); assertEquals(s,t.second); } - + + @Test public void testSplit() { StringFieldExtractor sfx = new StringFieldExtractor(' ',2); checkSplit("a b","x y",sfx.split("a b x y")); diff --git a/src/test/java/org/archive/util/TestUtils.java b/src/test/java/org/archive/util/TestUtils.java index 81fd6fd6..b8fee0f4 100644 --- a/src/test/java/org/archive/util/TestUtils.java +++ b/src/test/java/org/archive/util/TestUtils.java @@ -3,25 +3,23 @@ import java.io.IOException; import java.io.InputStream; import java.util.List; - -import junit.framework.TestCase; - +import java.util.Locale; import com.google.common.io.ByteStreams; -public class TestUtils extends TestCase { - public void testNothing() { - assertEquals(2,1+1); - } +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class TestUtils { public static void dumpMatch(String context, List> res) { - System.out.format("Context(%s) Found (%d) matches\n", context, res.size()); + System.out.format(Locale.ROOT, "Context(%s) Found (%d) matches\n", context, res.size()); for(List r : res) { - System.out.format("Match(%s)\n", StringParse.join(r)); + System.out.format(Locale.ROOT, "Match(%s)\n", StringParse.join(r)); } } - public static void assertLoLMatches(String want[][], List> got) { + public static void assertLoLMatches(String[][] want, List> got) { assertEquals(want.length,got.size()); for(int i = 0; i < want.length; i++) { String [] wantSub = want[i]; @@ -32,8 +30,8 @@ public static void assertLoLMatches(String want[][], List> got) { } } } - public static void assertStreamEquals(InputStream is,byte b[]) throws IOException { - byte got[] = ByteStreams.toByteArray(is); + public static void assertStreamEquals(InputStream is, byte[] b) throws IOException { + byte[] got = ByteStreams.toByteArray(is); assertEquals(got.length,b.length); assertTrue(ByteOp.cmp(got,b)); } diff --git a/src/test/java/org/archive/util/anvl/ANVLRecordTest.java b/src/test/java/org/archive/util/anvl/ANVLRecordTest.java new file mode 100644 index 00000000..1889a156 --- /dev/null +++ b/src/test/java/org/archive/util/anvl/ANVLRecordTest.java @@ -0,0 +1,138 @@ +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.util.anvl; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.logging.Logger; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class ANVLRecordTest { + private final Logger logger = Logger.getLogger(this.getClass().getName()); + + @Test + public void testAdd() { + ANVLRecord am = new ANVLRecord(); + am.add(new Element(new Label("entry"))); + am.add(new Element(new Label("who"), + new Value("Gilbert, W.S. | Sullivan, Arthur"))); + am.add(new Element(new Label("what"), + new Value("\rThe Yeoman of \rthe guard"))); + am.add(new Element(new Label("what"), + new Value("The Yeoman of\r\n the guard"))); + am.add(new Element(new Label("what"), + new Value("The Yeoman of \n\tthe guard"))); + am.add(new Element(new Label("what"), + new Value("The Yeoman of \r the guard"))); + am.add(new Element(new Label("when/created"), + new Value("1888"))); + logger.fine(am.toString()); + Map m = am.asMap(); + logger.fine(m.toString()); + } + + @Test + public void testEmptyRecord() throws Exception { + byte [] b = ANVLRecord.EMPTY_ANVL_RECORD.getUTF8Bytes(); + assertEquals(2, b.length); + assertEquals('\r', b[0]); + assertEquals('\n', b[1]); + } + + @Test + public void testFolding() throws Exception { + ANVLRecord am = new ANVLRecord(); + Exception e = null; + try { + am.addLabel("Label with \n in it"); + } catch (IllegalArgumentException iae) { + e = iae; + } + assertInstanceOf(IllegalArgumentException.class, e); + am.addLabelValue("label", "value with \n in it"); + } + + @Test + public void testParse() throws UnsupportedEncodingException, IOException { + String record = " a: b\r\n#c#\r\nc:d\r\n \t\t\r\t\n\te" + + "\r\nx:\r\n # z\r\n\r\n"; + ANVLRecord r = ANVLRecord.load(new ByteArrayInputStream( + record.getBytes(StandardCharsets.ISO_8859_1))); + logger.fine(r.toString()); + assertEquals("a: b", r.get(0).toString()); + record = " a: b\r\n\r\nsdfsdsdfds"; + r = ANVLRecord.load(new ByteArrayInputStream( + record.getBytes(StandardCharsets.ISO_8859_1))); + logger.fine(r.toString()); + record = "x:\r\n # z\r\ny:\r\n\r\n"; + r = ANVLRecord.load(new ByteArrayInputStream( + record.getBytes(StandardCharsets.ISO_8859_1))); + logger.fine(r.toString()); + assertEquals("x:", r.get(0).toString()); + } + + @Test + public void testExampleParse() + throws IOException { + final String sample = "entry:\t\t\r\n# first ###draft\r\n" + + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + + "what:\tThe Yeoman of\r\n" + + "\t\tthe Guard\r\n" + + "when/created:\t 1888\r\n\r\n"; + ANVLRecord r = ANVLRecord.load(new ByteArrayInputStream( + sample.getBytes(StandardCharsets.ISO_8859_1))); + logger.fine(r.toString()); + } + + @Test + public void testPoundLabel() + throws IOException { + final String sample = "ent#ry:\t\t\r\n# first ###draft\r\n" + + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + + "what:\tThe Yeoman of\r\n" + + "\t\tthe Guard\r\n" + + "when/created:\t 1888\r\n\r\n"; + ANVLRecord r = ANVLRecord.load(sample); + logger.fine(r.toString()); + } + + @Test + public void testNewlineLabel() + throws IOException { + final String sample = "ent\nry:\t\t\r\n# first ###draft\r\n" + + "who:\tGilbert, W.S. | Sullivan, Arthur\r\n" + + "what:\tThe Yeoman of\r\n" + + "\t\tthe Guard\r\n" + + "when/created:\t 1888\r\n\r\n"; + IllegalArgumentException iae = null; + try { + ANVLRecord.load(sample); + } catch(IllegalArgumentException e) { + iae = e; + } + assertNotNull(iae); + } +} diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 2c9d19e8..26d7a16d 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -4,18 +4,26 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.Locale; import org.archive.util.binsearch.impl.RandomAccessFileSeekableLineReaderFactory; import org.archive.util.iterator.CloseableIterator; +import org.junit.jupiter.api.Test; -import junit.framework.TestCase; +import static java.nio.charset.StandardCharsets.UTF_8; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; + +public class SortedTextFileTest { -public class SortedTextFileTest extends TestCase { private static String formatS(int i) { - return String.format("%07d",i); + return String.format(Locale.ROOT, "%07d", i); } - private void createFile(File target, int max) throws FileNotFoundException { - PrintWriter pw = new PrintWriter(target); + + private void createFile(File target, int max) throws FileNotFoundException, UnsupportedEncodingException { + PrintWriter pw = new PrintWriter(target, UTF_8.name()); for(int i = 0; i < max; i++) { pw.println(formatS(i)); } @@ -23,9 +31,9 @@ private void createFile(File target, int max) throws FileNotFoundException { pw.close(); } - + @Test public void testGetRecordIteratorStringBoolean() throws IOException { - File test = new File("/tmp/test.tmp"); + File test = File.createTempFile("test", null); int max = 1000000; createFile(test,max); RandomAccessFileSeekableLineReaderFactory factory = diff --git a/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java b/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java index 5b5be272..d35413cd 100644 --- a/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java +++ b/src/test/java/org/archive/util/iterator/CachingStringFilterTest.java @@ -1,8 +1,9 @@ package org.archive.util.iterator; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class CachingStringFilterTest extends TestCase { +public class CachingStringFilterTest { + @Test public void testCache() { StringFilter tf = new StringFilter() { public boolean isFiltered(String text) { diff --git a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java index 0c0dce6d..6d5685ad 100644 --- a/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/FilterStringIteratorTest.java @@ -5,12 +5,15 @@ import java.util.List; import java.util.TreeSet; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class FilterStringIteratorTest extends TestCase { +import static org.junit.jupiter.api.Assertions.*; - public void t2estHasNext() { - String blocks[] = {"a","ab","ba","cc"}; +public class FilterStringIteratorTest { + + @Test + public void testHasNext() { + String[] blocks = {"a","ab","ba","cc"}; List bl = Arrays.asList(blocks); TransformingPrefixStringFilter f = new TransformingPrefixStringFilter(bl); @@ -26,23 +29,20 @@ public void t2estHasNext() { assertBlocked(true,"cc",f); assertBlocked(true,"cca",f); } - + + @Test public void testTreeSet() { - String blocks[] = {"a","ab","ba","cc"}; + String[] blocks = {"a","ab","ba","cc"}; TreeSet s = TransformingPrefixStringFilter.makeTreeSet(Arrays.asList(blocks),null); assertTrue(s.contains("a")); assertFalse(s.contains("ab")); - String blocks2[] = {"ab","a","ba","cc"}; + String[] blocks2 = {"ab","a","ba","cc"}; TreeSet s2 = TransformingPrefixStringFilter.makeTreeSet(Arrays.asList(blocks2),null); assertTrue(s2.contains("a")); assertFalse(s2.contains("ab")); - - - } - - + private void assertBlocked(boolean blocked, String s, StringFilter f) { ArrayList l = new ArrayList(); l.add(s); diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index f1c2a0ec..fa1213f7 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -2,42 +2,41 @@ import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintWriter; import java.util.Comparator; -import junit.framework.TestCase; +import org.junit.jupiter.api.Test; -public class SortedCompositeIteratorTest extends TestCase { +import static java.nio.charset.StandardCharsets.UTF_8; - public void testHasNext() throws FileNotFoundException { +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class SortedCompositeIteratorTest { + + @Test + public void testHasNext() throws FileNotFoundException, IOException { - long t = 210000; - long c = 134; - float f = (float)c / (float)t; - System.err.format("F(%f)\n",f); + File a = File.createTempFile("filea", null); + File b = File.createTempFile("fileb", null); - File a = new File("/tmp/a"); - File b = new File("/tmp/b"); - if(a.isFile()) { - a.delete(); - } - if(b.isFile()) { - b.delete(); - } - PrintWriter apw = new PrintWriter(a); - PrintWriter bpw = new PrintWriter(b); + PrintWriter apw = new PrintWriter(a, UTF_8.name()); + PrintWriter bpw = new PrintWriter(b, UTF_8.name()); apw.println("1"); apw.println("3"); bpw.println("2"); bpw.println("4"); apw.close(); bpw.close(); - BufferedReader abr = new BufferedReader(new FileReader(a)); - BufferedReader bbr = new BufferedReader(new FileReader(b)); + BufferedReader abr = new BufferedReader(new InputStreamReader(new FileInputStream(a), UTF_8)); + BufferedReader bbr = new BufferedReader(new InputStreamReader(new FileInputStream(b), UTF_8)); SortedCompositeIterator sci = new SortedCompositeIterator(new Comparator() { + @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } diff --git a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java index d3dc1ff6..f53befd3 100644 --- a/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java +++ b/src/test/java/org/archive/util/zip/GZIPMembersInputStreamTest.java @@ -21,24 +21,25 @@ import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.Random; -import junit.framework.TestCase; - import org.apache.commons.io.IOUtils; import org.archive.util.ArchiveUtils; -import org.archive.util.zip.GZIPMembersInputStream; -import com.google.common.io.NullOutputStream; +import com.google.common.io.ByteStreams; import com.google.common.primitives.Bytes; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; /** * Tests for GZIPMembersInputStream - * @contributor gojomo + * @author gojomo * @version $ $ */ -public class GZIPMembersInputStreamTest extends TestCase { +public class GZIPMembersInputStreamTest { byte[] noise1k_gz; byte[] noise32k_gz; byte[] a_gz; @@ -54,8 +55,8 @@ public class GZIPMembersInputStreamTest extends TestCase { buf = new byte[32*1024]; rand.nextBytes(buf); noise32k_gz = ArchiveUtils.gzip(buf); - a_gz = ArchiveUtils.gzip("a".getBytes("ASCII")); - hello_gz = ArchiveUtils.gzip("hello".getBytes("ASCII")); + a_gz = ArchiveUtils.gzip("a".getBytes(StandardCharsets.US_ASCII)); + hello_gz = ArchiveUtils.gzip("hello".getBytes(StandardCharsets.US_ASCII)); allfour_gz = Bytes.concat(noise1k_gz,noise32k_gz,a_gz,hello_gz); sixsmall_gz = Bytes.concat(a_gz,hello_gz,a_gz,hello_gz,a_gz,hello_gz); } catch (IOException e) { @@ -63,167 +64,169 @@ public class GZIPMembersInputStreamTest extends TestCase { } } - public static void main(String [] args) { - junit.textui.TestRunner.run(GZIPMembersInputStreamTest.class); - } - + @Test public void testFullReadAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong length uncompressed data", 1024+(32*1024)+1+5, count); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1024+(32*1024)+1+5, count, "wrong length uncompressed data"); } - + + @Test public void testFullReadSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); - int count = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong length uncompressed data", 1+5+1+5+1+5, count); + int count = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1+5+1+5+1+5, count, "wrong length uncompressed data"); } - + + @Test public void testReadPerMemberAllFour() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); - int count0 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + int count0 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); gzin.nextMember(); - int count1 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + int count1 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); gzin.nextMember(); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(0, countEnd, "wrong eof count"); } - + + @Test public void testReadPerMemberSixSmall() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(sixsmall_gz)); gzin.setEofEachMember(true); for(int i = 0; i < 3; i++) { - int count2 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1, count2, "wrong 1-byte member count"); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(5, count3, "wrong 5-byte member count"); gzin.nextMember(); } - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(0, countEnd, "wrong eof count"); } - + @Test public void testByteReadPerMember() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); int count0 = 0; while(gzin.read()>-1) count0++; - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); gzin.nextMember(); int count1 = 0; while(gzin.read()>-1) count1++; - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); gzin.nextMember(); int count2 = 0; while(gzin.read()>-1) count2++; - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); gzin.nextMember(); int count3 = 0; while(gzin.read()>-1) count3++; - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); gzin.nextMember(); int countEnd = 0; while(gzin.read()>-1) countEnd++; - assertEquals("wrong eof count", 0, countEnd); + assertEquals(0, countEnd, "wrong eof count"); } - + + @Test public void testMemberSeek() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); gzin.setEofEachMember(true); gzin.compressedSeek(noise1k_gz.length+noise32k_gz.length); - int count2 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); + int count2 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(1, count2, "wrong 1-byte member count"); // assertEquals("wrong Member number", 2, gzin.getMemberNumber()); - assertEquals("wrong Member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong Member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong Member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong Member2 end"); gzin.nextMember(); - int count3 = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); + int count3 = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(5, count3, "wrong 5-byte member count"); // assertEquals("wrong Member number", 3, gzin.getMemberNumber()); - assertEquals("wrong Member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong Member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong Member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong Member3 end"); gzin.nextMember(); - int countEnd = IOUtils.copy(gzin, new NullOutputStream()); - assertEquals("wrong eof count", 0, countEnd); + int countEnd = IOUtils.copy(gzin, ByteStreams.nullOutputStream()); + assertEquals(0, countEnd, "wrong eof count"); } @SuppressWarnings("deprecation") + @Test public void testMemberIterator() throws IOException { GZIPMembersInputStream gzin = new GZIPMembersInputStream(new ByteArrayInputStream(allfour_gz)); Iterator iter = gzin.memberIterator(); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember0 = iter.next(); - int count0 = IOUtils.copy(gzMember0, new NullOutputStream()); - assertEquals("wrong 1k member count", 1024, count0); - assertEquals("wrong member number", 0, gzin.getMemberNumber()); - assertEquals("wrong member0 start", 0, gzin.getCurrentMemberStart()); - assertEquals("wrong member0 end", noise1k_gz.length, gzin.getCurrentMemberEnd()); + int count0 = IOUtils.copy(gzMember0, ByteStreams.nullOutputStream()); + assertEquals(1024, count0, "wrong 1k member count"); + assertEquals(0, gzin.getMemberNumber(), "wrong member number"); + assertEquals(0, gzin.getCurrentMemberStart(), "wrong member0 start"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberEnd(), "wrong member0 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember1 = iter.next(); - int count1 = IOUtils.copy(gzMember1, new NullOutputStream()); - assertEquals("wrong 32k member count", (32*1024), count1); - assertEquals("wrong member number", 1, gzin.getMemberNumber()); - assertEquals("wrong member1 start", noise1k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member1 end", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd()); + int count1 = IOUtils.copy(gzMember1, ByteStreams.nullOutputStream()); + assertEquals((32*1024), count1, "wrong 32k member count"); + assertEquals(1, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length, gzin.getCurrentMemberStart(), "wrong member1 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberEnd(), "wrong member1 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember2 = iter.next(); - int count2 = IOUtils.copy(gzMember2, new NullOutputStream()); - assertEquals("wrong 1-byte member count", 1, count2); - assertEquals("wrong member number", 2, gzin.getMemberNumber()); - assertEquals("wrong member2 start", noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member2 end", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd()); + int count2 = IOUtils.copy(gzMember2, ByteStreams.nullOutputStream()); + assertEquals(1, count2, "wrong 1-byte member count"); + assertEquals(2, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length, gzin.getCurrentMemberStart(), "wrong member2 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberEnd(), "wrong member2 end"); assertTrue(iter.hasNext()); GZIPMembersInputStream gzMember3 = iter.next(); - int count3 = IOUtils.copy(gzMember3, new NullOutputStream()); - assertEquals("wrong 5-byte member count", 5, count3); - assertEquals("wrong member number", 3, gzin.getMemberNumber()); - assertEquals("wrong member3 start", noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart()); - assertEquals("wrong member3 end", noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd()); + int count3 = IOUtils.copy(gzMember3, ByteStreams.nullOutputStream()); + assertEquals(5, count3, "wrong 5-byte member count"); + assertEquals(3, gzin.getMemberNumber(), "wrong member number"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length, gzin.getCurrentMemberStart(), "wrong member3 start"); + assertEquals(noise1k_gz.length+noise32k_gz.length+a_gz.length+hello_gz.length, gzin.getCurrentMemberEnd(), "wrong member3 end"); assertFalse(iter.hasNext()); } diff --git a/src/test/resources/forbidden-apis-signatures.txt b/src/test/resources/forbidden-apis-signatures.txt new file mode 100644 index 00000000..1eda9eec --- /dev/null +++ b/src/test/resources/forbidden-apis-signatures.txt @@ -0,0 +1,2 @@ +java.net.URL#equals(java.lang.Object) @ may trigger a DNS lookup to resolve the host part +java.net.URL#hashCode() @ may trigger a DNS lookup to resolve the host part diff --git a/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc new file mode 100644 index 00000000..3cbffb81 --- /dev/null +++ b/src/test/resources/org/archive/format/arc/IAH-20080430204825-00000-blackbook-truncated.arc @@ -0,0 +1,1006 @@ +filedesc://IAH-20080430204825-00000-blackbook-truncated.arc 0.0.0.0 20080430204825 text/plain 1300 +1 1 InternetArchive +URL IP-address Archive-date Content-type Archive-length + + +Heritrix @VERSION@ http://crawler.archive.org +blackbook +192.168.1.13 +archive.org-shallow +archive.org shallow +Admin +2008-04-30T20:48:24+00:00 +Mozilla/5.0 (compatible; heritrix/1.14.0 +http://crawler.archive.org) +archive-crawler-agent@lists.sourceforge.net +classic +ARC file version 1.1 +http://www.archive.org/web/researcher/ArcFileFormat.php + +dns:www.archive.org 68.87.76.178 20080430204825 text/dns 56 +20080430204825 +www.archive.org. 589 IN A 207.241.229.39 +http://www.archive.org/robots.txt 207.241.229.39 20080430204825 text/plain 782 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:24 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sat, 02 Feb 2008 19:40:44 GMT +ETag: "47c3-1d3-11134700" +Accept-Ranges: bytes +Content-Length: 467 +Connection: close +Content-Type: text/plain; charset=UTF-8 + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## +User-agent: * +Disallow: /nothing---please-crawl-us-- + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Crawl-Delay: 10 +http://www.archive.org/ 207.241.229.39 20080430204826 text/html 680 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Wed, 09 Jan 2008 23:18:29 GMT +ETag: "47ac-16e-4f9e5b40" +Accept-Ranges: bytes +Content-Length: 366 +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + +
    +Please visit our website at: +http://www.archive.org + + +http://www.archive.org/index.php 207.241.229.39 20080430204826 text/html 29000 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:25 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Set-Cookie: PHPSESSID=657fa9749e9426f2ffa75f14b54ed4ac; path=/; domain=.archive.org +Connection: close +Content-Type: text/html; charset=UTF-8 + + + + + + + Internet Archive + + + + + + + + + + + + +
    + (logo) + + + + + + +
    + +Web | +Moving Images | +Texts | +Audio | +Software | +Education | +Patron Info | +About IA
    Forums | FAQs | Contributions | Jobs | Donate
    + + + + + +
    +

    + Search: + + + + + + + +

    +
    +
    +
    + (navigation image)
    +
    + + + + + + + + + + + + + + + + +
    UploadAnonymous User (login or join us) 
       +
    Announcements (more)
    +
       +
    Web85 billion pages
    +
    +
    +
    + + + + + + + + +
    + (wayback logo) + + +
    + + + + Advanced Search + +
    +
    +
    +
       +
    Welcome to the ArchiveSee recent additions in RSS
    +
    +
    +The Internet Archive is building a digital library of Internet + sites and other cultural artifacts in digital form. Like a paper + library, we provide free access to researchers, historians, + scholars, and the general public.
    +
      
    + +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
       +
    Moving Images
     115,646 movies
    movies icon
    +
    +
    +
    Browse   + + (by keyword)
    +
       +
    Live Music Archive
     48,893 concerts
    etree icon
    +
    +
    +
    Browse   + + (by band)
    +
       +
    Audio
     250,854 recordings
    audio icon
    +
    +
    +
    Browse   + + (by keyword)
    +
       +
    Texts
     395,004 texts
    texts icon
    +
    +
    +
    Browse   + + (by keyword)
    +
      
       +
    Curator's Choice (more)
    +
    (movies pick)
    A Few Good G-Men
    Randall Glass, the maker of "Warthog Jump," re-creates in "A Few Good G-Men" an entire scene from...
    +
       +
    Curator's Choice (more)
    +
    (etree pick)
    Grateful Dead Live at Nashville Municipal...
    Set 1 Sugaree Beat It On Down The Line Candyman Me And My Uncle -> Big River Stagger Lee Looks Like...
    +
       +
    Curator's Choice (more)
    +
    (audio pick)
    Zanstones - Slaakhuis: Live in Rotterdam, Holland
    Zanstones confuses the dutch masses with this live display of wacked rhythms, whacked vocals, and...
    +
       +
    Curator's Choice (more)
    +
      
       +
    Recent Reviews
    +
       +
    Recent Reviews
    +
       +
    Recent Reviews
    +
       +
    Recent Reviews
    +
      
    + + +
    + + + +
       +
    + + + + + + + + + + + + + +
    Most recent posts (write a post by going to a forum) more...
    Subject Poster Forum RepliesViewsDate
    Re: Making a mix for a chick I know... William Tell GratefulDead 0 6 20 minutes ago
    Re: Bob's shorts not going into archives BobsShortShorts GratefulDead 0 9 26 minutes ago
    Re: Thanks to All airgarcia416 GratefulDead 0 5 26 minutes ago
    Re: Bob's shorts not going into archives sydthecat2 GratefulDead 0 8 36 minutes ago
    Re: What is the worst-reviewed feature film on IA? RipJarvis feature_films 0 9 50 minutes ago
    Re: Playin' In The Band...all day and all night sydthecat2 GratefulDead 0 11 58 minutes ago
    Re: Playin' In The Band...all day and all night rastamon GratefulDead 0 16 1 hour ago
    Re: Making a mix for a chick I know... caspersvapors GratefulDead 1 11 1 hour ago
    Re: Bob's shorts not going into archives rastamon GratefulDead 0 11 1 hour ago
    Re: Bob's shorts not going into archives bluedevil GratefulDead 1 13 1 hour ago
    +
      
    + + +
      + 
    + + +
    +

    Skin: classic | columns | custom!
    + + Terms of Use (10 Mar 2001) +

    + +http://www.archive.org/images/logoc.jpg 207.241.229.39 20080430204829 image/jpeg 1963 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:28 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Mon, 16 Jun 2003 22:28:51 GMT +ETag: "34dc-67e-2ed02ec0" +Accept-Ranges: bytes +Content-Length: 1662 +Connection: close +Content-Type: image/jpeg + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<)w@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C;http://www.archive.org/images/star.png 207.241.229.39 20080430204830 image/png 564 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +Last-Modified: Sun, 27 Feb 2005 21:35:31 GMT +ETag: "358d-109-f15f4ec0" +Accept-Ranges: bytes +Content-Length: 265 +Connection: close +Content-Type: image/png + +PNG + + IHDR e^|3PLTEXtRNS@fbKGDH pHYs  tIME; BFoOIDATUQ0P霓?P1~@a 9?C31aѻ__ucTCԑJٕ4xW{2~aIENDB`http://www.archive.org/services/collection-rss.php 207.241.229.39 20080430204830 text/xml 50832 +HTTP/1.1 200 OK +Date: Wed, 30 Apr 2008 20:48:29 GMT +Server: Apache/2.0.54 (Ubuntu) PHP/5.0.5-2ubuntu1.4 mod_ssl/2.0.54 OpenSSL/0.9.7g +X-Powered-By: PHP/5.0.5-2ubuntu1.4 +Connection: close +Content-Type: text/xml + + + + + http://www.archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + tracey@archive.org + Wed, 30 Apr 2008 20:48:29 GMT + + http://www.archive.org/images/logo.jpg + Internet Archive + http://www.archive.org + + + ArtTECHtonic 5 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Arttechtonic5&mediatype=audio&collection=opensource_audio"/><p>An interview with Gretchen Wagner, General Counsel and Secretary of ARTstor on Fair Use.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Arttechtonic5 + http://www.archive.org/details/Arttechtonic5 + Wed, 30 Apr 2008 20:44:20 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + fair_use, ARTstor, libraries + + + ۩۞۩ جبريل يسأل والنبى يجيب - ترجمه الأمام مسلم (30-4-2008)۩۞۩ للشيخ محمد حسان ۩۞۩ + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=55322&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 24Kbps MP3, Cinepack, Metadata</p> + http://www.archive.org/details/55322 + http://www.archive.org/details/55322 + Wed, 30 Apr 2008 20:43:16 GMT + movies/opensource_movies + + + + alsrdaab + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=alsrdaab_125&mediatype=Other&collection=ourmedia"/><p>alsrdaab.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/alsrdaab_125 + http://www.archive.org/details/alsrdaab_125 + Wed, 30 Apr 2008 20:43:01 GMT + Other/ourmedia + + alsrdaab + + + DOC-DEBUT: Super Amigos + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_superamigos20080430&mediatype=movies&collection=opensource_movies"/><p>This action filled documentary follows five real-life "social wrestlers" in Mexico City who have capitalized on the popularity of Mexico's larger than life Lucha Libre wrestlers to fight for social justice rather than trophies..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 350Kb, MPEG4 60Kb, Metadata</p> + http://www.archive.org/details/linktv_superamigos20080430 + http://www.archive.org/details/linktv_superamigos20080430 + Wed, 30 Apr 2008 20:41:22 GMT + movies/opensource_movies + + + + erwews + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=reit987erfed&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media</p> + http://www.archive.org/details/reit987erfed + http://www.archive.org/details/reit987erfed + Wed, 30 Apr 2008 20:40:57 GMT + movies/opensource_movies + + + + quran + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=mohadart&mediatype=movies&collection=opensource_movies"/><p>walo had sa3a.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/mohadart + http://www.archive.org/details/mohadart + Wed, 30 Apr 2008 20:38:31 GMT + movies/opensource_movies + + + + asdas + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=asdas_725&mediatype=Image&collection=ourmedia"/><p>saa.</p><p>This item belongs to: Image/ourmedia.</p><p>This item has files of the following types: Metadata</p> + http://www.archive.org/details/asdas_725 + http://www.archive.org/details/asdas_725 + Wed, 30 Apr 2008 20:37:02 GMT + Image/ourmedia + + saas + + + nibrasukul + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=nibrasukul&mediatype=texts&collection=opensource"/><p>nibrasukul.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/nibrasukul + http://www.archive.org/details/nibrasukul + Wed, 30 Apr 2008 20:36:56 GMT + texts/opensource + + nibrasukul + + + rtyed + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hyu121&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Windows Media Audio</p> + http://www.archive.org/details/hyu121 + http://www.archive.org/details/hyu121 + Wed, 30 Apr 2008 20:36:53 GMT + movies/opensource_movies + + + + remomberfiler58.info + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=uictfwt&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Unknown</p> + http://www.archive.org/details/uictfwt + http://www.archive.org/details/uictfwt + Wed, 30 Apr 2008 20:21:58 GMT + movies/opensource_movies + + + + The committing magistrate, a treatise on the arrest, examination, bailing, and commitment of offenders, including fugitives from justice, with the remedial features of the writs of habeas corpus, certiorari, mandamus, and prohibition + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=committingmagist00flam&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/committingmagist00flam + http://www.archive.org/details/committingmagist00flam + Wed, 30 Apr 2008 03:14:26 GMT + texts/americana + + Police magistrates -- New York (State), Habeas corpus, Mandamus, Prohibition (Law), Appellate procedure -- New York (State) + + + Papers and addresses + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=papersaddresses00bras&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/papersaddresses00bras + http://www.archive.org/details/papersaddresses00bras + Wed, 30 Apr 2008 01:11:05 GMT + texts/americana + + Imperial federation, Great Britain -- Colonies + + + European years; the letters of an idle man + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=europeanyearslet00warn&mediatype=texts&collection=americana"/><p>No description available.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Library JP2 ZIP, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Single Page Watermark JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/europeanyearslet00warn + http://www.archive.org/details/europeanyearslet00warn + Tue, 29 Apr 2008 16:43:31 GMT + texts/americana + + Europe -- Description and travel, United States -- Description and travel + + + Cable and satellite carrier compulsory licenses : hearing before the Subcommittee on Intellectual Property and Judicial Administration of the Committee on the Judiciary, House of Representatives, One Hundred Third Congress, first session, on H.R. 759 and H.R. 1103 ... March 17, 1993 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=cablesatelliteca00unit&mediatype=texts&collection=americana"/><p>Includes bibliographical references.</p><p>This item belongs to: texts/americana.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, DjVu, DjVuTXT, Djvu XML, Flippy ZIP, Grayscale LuraTech PDF, Metadata, Single Page Original JP2 Tar, Single Page Processed JP2 ZIP, Standard LuraTech PDF</p> + http://www.archive.org/details/cablesatelliteca00unit + http://www.archive.org/details/cablesatelliteca00unit + Tue, 29 Apr 2008 16:28:47 GMT + texts/americana + + Cable television -- Licenses United States, Direct broadcast satellite television -- Licenses United States + + + Leinender + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Leinender&mediatype=Other&collection=ourmedia"/><p>TSOP Leinender.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/Leinender + http://www.archive.org/details/Leinender + Tue, 29 Apr 2008 06:59:06 GMT + Other/ourmedia + + TSOP + + + tribute + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=tribute_488&mediatype=audio&collection=opensource_audio"/><p>Tribute Yanni music Vocal: Nathan-Pacheco.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/tribute_488 + http://www.archive.org/details/tribute_488 + Tue, 29 Apr 2008 06:53:43 GMT + audio/opensource_audio + + Yanni, Tribute + + + La Voz de Brasil #012: Efemerides 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=RodrigoDubLaVozdeBrasil_012_Efemerides2008&mediatype=Audio&collection=ourmedia"/><p>¡Ogum yê! En el año en que el Sobrevivendo no Inferno de los Racionais MC's completa su decimo cumpleaño, la Voz de Brasil presenta algunas otras efemérides - empezando por João Gilberto y la primera invención de Brasil (la segunda fué del maestro Jorge Ben, pero eso queda para otro programa....</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 160Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + http://www.archive.org/details/RodrigoDubLaVozdeBrasil_012_Efemerides2008 + Tue, 29 Apr 2008 06:53:09 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + musica, brasil, brasileña, brazilian, music, psicodelia, psychodelic + + + Doubleknit Podcast #1 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=DoubleknitPodcast1&mediatype=audio&collection=opensource_audio"/><p>Debut podcast of the Doubleknit Twins..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/DoubleknitPodcast1 + http://www.archive.org/details/DoubleknitPodcast1 + Tue, 29 Apr 2008 06:51:57 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + audio/opensource_audio + + knit, knitting, yarn, seattle + + + etceterapodcast37 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=etceterapodcast37_831&mediatype=audio&collection=opensource_audio"/><p>Restaurantes repetidos, los lenguages que se pierden, trencito de gendarmería, trailer de la hamburgesa perfecta, la película fué Shaun of The Dead, guerra en la convención de Taekwondo, pez globo, hara-kiris, océanos del mundo, el efecto Rocky y un Adam Sandler, el punto en que las a....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/etceterapodcast37_831 + http://www.archive.org/details/etceterapodcast37_831 + Tue, 29 Apr 2008 06:49:31 GMT + http://creativecommons.org/licenses/by-nc-nd/2.5/ar/ + audio/opensource_audio + + Anhdres, Andres, Nahuel, Etcetera, Etc, Podcast, Español, Spanish, Buenos Aires, Argentina + + + midoz + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=22008-04-02.wwe0002&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, Real Media</p> + http://www.archive.org/details/22008-04-02.wwe0002 + http://www.archive.org/details/22008-04-02.wwe0002 + Tue, 29 Apr 2008 06:47:41 GMT + movies/opensource_movies + + + + music9 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=music9&mediatype=audio&collection=opensource_audio"/><p>music9.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/music9 + http://www.archive.org/details/music9 + Tue, 29 Apr 2008 06:45:18 GMT + audio/opensource_audio + + music9 + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_741&mediatype=audio&collection=opensource_audio"/><p>Desire YANNI music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_741 + http://www.archive.org/details/desire_741 + Tue, 29 Apr 2008 06:44:53 GMT + audio/opensource_audio + + Desire * Yanni * Ender-Thomas + + + Live at Nelson Ledges Quarry Park on 2008-04-25 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-25.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Chicago-> 02. Hookahville-> 03. Chicago 04. Mississippi Steamboat 05. Sure Cure For the Blues* 06. Mexican Opera* 07. Sail Away 08. Washboard Annie 09. Serpentine 10. Tumblin' Set 2 (Disc 2): 01....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + http://www.archive.org/details/eh2008-04-25.dubsbd.16441 + Tue, 29 Apr 2008 06:44:36 GMT + etree/EkoostikHookah + + + + Live at Nelson Ledges Quarry Park on 2008-04-26 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=eh2008-04-26.dubsbd.16441&mediatype=etree&collection=EkoostikHookah"/><p>Set 1 (Disc 1): 01. Right Back Out in the Streets 02. Utopia 03. The Devil & Me 04. When the Sun Goes Down 05. Rocketman 06. Stuck In the Snow 07. John Henry 08. Green 09. Shadane Set 2 (Disc 2): 01. Ecstasy 02....</p><p>This item belongs to: etree/EkoostikHookah.</p><p>This item has files of the following types: Flac, Flac FingerPrint, Metadata, Text</p> + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + http://www.archive.org/details/eh2008-04-26.dubsbd.16441 + Tue, 29 Apr 2008 06:44:23 GMT + etree/EkoostikHookah + + + + Burn C.C. + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BurnC.c&mediatype=movies&collection=opensource_movies"/><p>Burn C.C..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/BurnC.c + http://www.archive.org/details/BurnC.c + Tue, 29 Apr 2008 06:43:49 GMT + movies/opensource_movies + + + + + + + Burn C.C. + + + TOTD 29 APRIL JAM 17 - DEWI SHINTAW ATY - ICHSANUDIN NOORSY - MANTAN ANGGOTA DPR - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=visioning_641&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/visioning_641 + http://www.archive.org/details/visioning_641 + Tue, 29 Apr 2008 06:43:36 GMT + audio/opensource_audio + + pasfm + + + IrationVibrationShow-4-27-08-p3 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=IrationVibrationShow-4-27-08-p3&mediatype=audio&collection=opensource_audio"/><p>pt3 of.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + http://www.archive.org/details/IrationVibrationShow-4-27-08-p3 + Tue, 29 Apr 2008 06:43:04 GMT + http://creativecommons.org/licenses/publicdomain/ + audio/opensource_audio + + iration, vibration, luciano, toots, promo + + + Birthday Party + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=BirthdayParty&mediatype=movies&collection=opensource_movies"/><p>Home movie of a birthday party and travels through India from the late 1960s. From Lostinlight.org.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Video, 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, MPEG1, MPEG2, Metadata, Thumbnail</p> + http://www.archive.org/details/BirthdayParty + http://www.archive.org/details/BirthdayParty + Tue, 29 Apr 2008 06:41:45 GMT + http://creativecommons.org/licenses/by-nc/3.0/ + movies/opensource_movies + + + + + + + + + + + rADIO + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=rADIO_538&mediatype=audio&collection=opensource_audio"/><p>rADIO rADIO rADIO rADIO rADIO.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/rADIO_538 + http://www.archive.org/details/rADIO_538 + Tue, 29 Apr 2008 06:40:23 GMT + audio/opensource_audio + + rADIO rADIO V + + + desire + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=desire_583&mediatype=audio&collection=opensource_audio"/><p>Desire Yanni music Vocal: Ender-Thomas.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/desire_583 + http://www.archive.org/details/desire_583 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + Yanni * Desire + + + TOTD 29 APRIL JAM 12 - DEWI SHINTAW ATY - HAMDAN ZULVA - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_253&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_253 + http://www.archive.org/details/topic_253 + Tue, 29 Apr 2008 06:39:11 GMT + audio/opensource_audio + + pasfm + + + The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents&mediatype=audio&collection=opensource_audio"/><p>The Not Doctor Laura Show_Mon Apr 28 2008 - how to handle stressful events.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + http://www.archive.org/details/TheNotDoctorLauraShow_monApr282008-HowToHandleStressfulEvents + Tue, 29 Apr 2008 06:38:38 GMT + audio/opensource_audio + + a + + + beethoven 9th + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Beethovenbeethoven9th&mediatype=Audio&collection=ourmedia"/><p>sample symphony music, test file.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata</p> + http://www.archive.org/details/Beethovenbeethoven9th + http://www.archive.org/details/Beethovenbeethoven9th + Tue, 29 Apr 2008 06:38:01 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + beethoven, 9th, symphony + + + KASDAMAM + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=KASDAMAM_444&mediatype=texts&collection=opensource"/><p>KASDAMAM.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Metadata, PDF</p> + http://www.archive.org/details/KASDAMAM_444 + http://www.archive.org/details/KASDAMAM_444 + Tue, 29 Apr 2008 06:37:53 GMT + texts/opensource + + KASDAMAM + + + James + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=James_278&mediatype=Other&collection=ourmedia"/><p>d.</p><p>This item belongs to: Other/ourmedia.</p><p>This item has files of the following types: Metadata, ZIP</p> + http://www.archive.org/details/James_278 + http://www.archive.org/details/James_278 + Tue, 29 Apr 2008 06:37:47 GMT + Other/ourmedia + + s + + + Mosaic News - 04/28/08: World News From The Middle East + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=linktv_mosaic20080428&mediatype=movies&collection=opensource_movies"/><p>The Peabody Award-winning daily compilation of television news reports from the Middle East, including Egypt, Lebanon, Israel, Syria, the Palestinian Authority, Iraq and Iran..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: FLV 400k, MPEG4 1.5Mbps, MPEG4 350Kb, MPEG4 60Kb, Metadata, iPod Video (MP4)</p> + http://www.archive.org/details/linktv_mosaic20080428 + http://www.archive.org/details/linktv_mosaic20080428 + Tue, 29 Apr 2008 06:37:38 GMT + movies/opensource_movies + + + + Fouth Wall Weekly #2 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Jesster_StateoftheArtandFredSoloFouthWallWeekly_2&mediatype=Audio&collection=ourmedia"/><p>In this weeks edition we go over : The end of countdown and Batman R.I.P preview Cloverfiled DVD Mortal Kombat aka the death of a franchise and GTA preview And other film news.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + http://www.archive.org/details/Jesster_StateoftheArtandFredSoloFouthWallWeekly_2 + Tue, 29 Apr 2008 06:36:06 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Comics, film, videogames + + + John 11:20-44 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=AlanDisbrowJohn11_20-44_0&mediatype=Audio&collection=ourmedia"/><p>Bible Study of John 11:20-44, Arise to a New Life, by Alan Disbrow..</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 128Kbps MP3, 128kbps M3U, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis</p> + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + http://www.archive.org/details/AlanDisbrowJohn11_20-44_0 + Tue, 29 Apr 2008 06:34:13 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Christianity, Jesus, Bible Study, Calvary Chapel, John + + + day 6 potok + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Joshleo-day6Potok652-3&mediatype=movies&collection=bliptv"/><p>me and this cat don't get along.</p><p>This item belongs to: movies/bliptv.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash, Metadata, Quicktime, Thumbnail</p> + http://www.archive.org/details/Joshleo-day6Potok652-3 + http://www.archive.org/details/Joshleo-day6Potok652-3 + Tue, 29 Apr 2008 06:33:24 GMT + http://creativecommons.org/licenses/by-nc-sa/2.0/ + movies/bliptv + + + + + + + + + 4jkfhwjkl + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=4jkfhwjkl&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/4jkfhwjkl + http://www.archive.org/details/4jkfhwjkl + Tue, 29 Apr 2008 06:32:22 GMT + movies/opensource_movies + + + + 5knfwk + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=5knfwk&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Metadata, RAR</p> + http://www.archive.org/details/5knfwk + http://www.archive.org/details/5knfwk + Tue, 29 Apr 2008 06:31:57 GMT + movies/opensource_movies + + + + SA11 - Upheaval - Descending in Motion + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Sa11-Upheaval-DescendingInMotion&mediatype=audio&collection=opensource_audio"/><p>SA11 - Upheaval - Descending in Motion -------------------------------------- Descending in Motion -------------------------------------- Tom Maggio (also of Turmoil and Domestic Turmoil) brings us an EP of subtle sinking sounds and vaguely disturbing industrial echos....</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, JPEG, Metadata, Ogg Vorbis, Text, VBR M3U, VBR MP3, VBR ZIP, ZIP</p> + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + http://www.archive.org/details/Sa11-Upheaval-DescendingInMotion + Tue, 29 Apr 2008 06:31:51 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + dark, ambient, industrial + + + Vespa ride to Ufomammut - Smoke (3) + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=VespaRideToUfomammut-Smoke3&mediatype=movies&collection=opensource_movies"/><p>riding through London on my vespa, listening to Ufomammut, Smoke..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Flash Video, Metadata, QuickTime, Thumbnail</p> + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + http://www.archive.org/details/VespaRideToUfomammut-Smoke3 + Tue, 29 Apr 2008 06:31:05 GMT + http://creativecommons.org/licenses/by-nc-nd/2.0/uk/ + movies/opensource_movies + + + + vespa, london, ufomammut + + + NaturesLead_OV_04__LockedInAGraveyard + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=NaturesLead_OV_04__LockedInAGraveyard&mediatype=audio&collection=opensource_audio"/><p>In this Open Valley, I share my experience of getting locked in Rome's Protestant Cemetery where Keats and Shelley are buried..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + http://www.archive.org/details/NaturesLead_OV_04__LockedInAGraveyard + Tue, 29 Apr 2008 06:31:03 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/us/ + audio/opensource_audio + + Keats, Shelley, cemetery + + + 20/20 Podcast #12A + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=2020Podcast12a&mediatype=audio&collection=opensource_audio"/><p>Discussion between Jamie and Scott about Google Android and the Open Handset Alliance. All things cellular are fair game..</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/2020Podcast12a + http://www.archive.org/details/2020Podcast12a + Tue, 29 Apr 2008 06:30:26 GMT + audio/opensource_audio + + google, android, cell phone, gadget, samsung, htc, t-mobile + + + Urdu Poetry podcast + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=ZAhmedUrduPoetrypodcast_2&mediatype=Audio&collection=ourmedia"/><p>An Urdu poem by N M Rashid with english translation.</p><p>This item belongs to: Audio/ourmedia.</p><p>This item has files of the following types: 256Kbps MP3, 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + http://www.archive.org/details/ZAhmedUrduPoetrypodcast_2 + Tue, 29 Apr 2008 06:30:23 GMT + http://creativecommons.org/licenses/by/2.5/ + Audio/ourmedia + + Urdu, Rashid, poetry + + + Beth Ann Turkey 2008 + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=SteveEasomBethAnnTurkey2008&mediatype=MovingImage&collection=ourmedia"/><p>2008 Rio Turkey taken By Beth Ann in Oklahoma.</p><p>This item belongs to: MovingImage/ourmedia.</p><p>This item has files of the following types: Metadata, QuickTime</p> + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + http://www.archive.org/details/SteveEasomBethAnnTurkey2008 + Tue, 29 Apr 2008 06:30:21 GMT + http://creativecommons.org/licenses/by/2.5/ + MovingImage/ourmedia + + Beth Ann, Turkey + + + TOTD 29 APRIL JAM 06 - DEWI SHINTAW ATY - AS HIKAM - POLITISI - PEMERASAN OLEH JAKSA + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=topic_462&mediatype=audio&collection=opensource_audio"/><p>visioning indonesia.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/topic_462 + http://www.archive.org/details/topic_462 + Tue, 29 Apr 2008 06:30:12 GMT + audio/opensource_audio + + pasfm + + + hosam + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=hosam_244&mediatype=movies&collection=opensource_movies"/><p>6w7u45r7.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: 256Kb MPEG4, 64Kb MPEG4, Animated GIF, Flash Video, Metadata, Thumbnail, Windows Media</p> + http://www.archive.org/details/hosam_244 + http://www.archive.org/details/hosam_244 + Tue, 29 Apr 2008 06:30:05 GMT + movies/opensource_movies + + + + + + + 57uy436 + + + free - destiny's child + <img width="160" style="padding-right:3px;float:left;" src="http://www.archive.org/services/get-item-image.php?identifier=Free-DestinysChild&mediatype=audio&collection=opensource_audio"/><p>from destiny fufilled.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: 64Kbps M3U, 64Kbps MP3, 64Kbps MP3 ZIP, Metadata, Ogg Vorbis, VBR M3U, VBR MP3, VBR ZIP</p> + http://www.archive.org/details/Free-DestinysChild + http://www.archive.org/details/Free-DestinysChild + Tue, 29 Apr 2008 06:30:01 GMT + audio/opensource_audio + + free, destiny's child + + + diff --git a/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz new file mode 100644 index 00000000..fa248f8d Binary files /dev/null and b/src/test/resources/org/archive/format/gzip/IAH-urls-wget.warc.gz differ diff --git a/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc new file mode 100644 index 00000000..1125fe98 --- /dev/null +++ b/src/test/resources/org/archive/format/warc/IAH-urls-wget.warc @@ -0,0 +1,3156 @@ +WARC/1.0 +WARC-Type: warcinfo +Content-Type: application/warc-fields +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-Filename: IAH-urls-wget.warc.gz +WARC-Block-Digest: sha1:I7UCIFZZDYO4O55ZOG6X5PRMVWMPZWMJ +Content-Length: 235 + +software: Wget/1.14 (darwin11.4.0) +format: WARC File Format 1.0 +conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf +robots: classic +wget-arguments: "-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:06Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPCUG5OU46Y5YHPTFCZLZV465AFPFJYY +Content-Length: 126 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:06Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:3L4DY55OVKT2IEHZEKOSIXRCQKJ7MNIE +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 435 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:06 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/robots.txt +Expires: Tue, 22 Oct 2013 03:53:06 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/robots.txt +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RQBBTMHS45XDYLYGRCT7YQ7P3UORCEQU +Content-Length: 122 + +GET /robots.txt HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/robots.txt +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:ORAXOWRNZAEDKBOJUW2PYNLDX2LRDCBK +WARC-Payload-Digest: sha1:ARS5OJBVROJW62M7JMB3BCHEUUEBVMJK +Content-Type: application/http;msgtype=response +Content-Length: 1014 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/plain +Content-Length: 727 +Last-Modified: Mon, 21 Oct 2013 18:55:18 GMT +Connection: keep-alive +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 +Accept-Ranges: bytes + + +Sitemap: http://archive.org/sitemap/sitemap.xml + +############################################## +# +# Welcome to the Archive! +# +############################################## +# Please crawl our files. +# We appreciate if you can crawl responsibly. +# Stay open! +############################################## + + +# slow down the ask jeeves crawler which was hitting our SE a little too fast +# via collection pages. --Feb2008 tracey-- +User-agent: Teoma +Disallow: /control/ +Disallow: /report/ + + +User-agent: * +Disallow: /control/ +Disallow: /report/ +Disallow: /details/goldenbull2007john/ +Disallow: /stream/goldenbull2007john/ +Disallow: /download/goldenbull2007john/ +Disallow: /14/items/goldenbull2007john/goldenbull2007john_djvu.txt + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:GCYSQOYQGB7JDB57XMUYWFQERAKMNEQQ +Content-Length: 116 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/ +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:WDSM4DEMHGZEOPEG2HMQAIUBQJ6WRRN5 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:07 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:07 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:07Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CPMG7AGNNEDLYK5UOOZLLRHPI4JLEC3U +Content-Length: 121 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:07Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RYQILVXCYAVUO7TRRO7CQ7VYKSD4COHM +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:08 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:09Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:G6KJJNG7G7HVRFGJJZ7ELDMO2ZZEX4WR +Content-Length: 112 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:09Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:VRAITOLIHCUNC5A7LDUBFHDSYQCUO7JM +WARC-Payload-Digest: sha1:WDT537KNDSUIRPB7R56KBDX3K77IR7W3 +Content-Type: application/http;msgtype=response +Content-Length: 30849 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:09 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Set-Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87; path=/; domain=.archive.org + +7756 + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
    + Universal Access To All Knowledge
    + + + + + + + + + + + + + +
    + Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
    + + + +
    + + + + + + + + + + + + + + + + + +
    +
    + Search: + + + + + + + + Advanced Search +
    +
    + + Anonymous User + + (login + or + + join us) + + +
    Upload
    +
    + +
    + + + + +
    + + + +
    +
    +

    +
    + 361 billion pages +
    + Web +

    +
    + + + + + + + + +
    + (wayback logo) + + +
    + + + more info +
    +
    +
    +
    + + +
    +

    +
    + See recent additions in RSS +
    + Welcome to the Archive +

    +
    + The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
    +
    +
    + + + +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Video +
    + + + 1,411,240 movies + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (movies pick) +
    +
    + filmcollectief-00-060a
    + + Unknown movie, found in a cannister which should contain something elso. So if someone can help me... +
    +
    + +

    Recent Review

    +
    +
    + The Stars Look Down (1940)
    Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

    +
    +
    + +
    +
    +
    +
    +

    +
    + Browse +
    + (by band) +
    +
    + Live Music +
    + + + 121,538 concerts + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (etree pick) +
    +
    + Grateful Dead Live at Jai-Alai Fronton on...
    + + Set 1 Ramble On Rose Black Throated Wind Mississippi Half-Step Uptown Toodeloo Beat It On Down The... +
    +
    + +

    Recent Review

    + + +
    +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Audio +
    + + + 1,744,979 recordings + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (audio pick) +
    +
    + Various Artists - phase01 [hi001]
    + + Our first release! This compilation includes all the original Heavy Industries collaborators... +
    +
    + +

    Recent Review

    +
    +
    + IAA Top 40 Countdown
    Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

    +
    +
    + +
    +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Texts +
    + + + 5,325,972 texts + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (texts pick) +
    +
    + Sommaire du plaidoyé. Pour les abbé, prieur,...
    + + 7 p. ; in-2 Cote du document : FOL Z 588 INV 518 FA (P.29) +
    +
    + +

    Recent Review

    + + +
    +
    +
    + + + + + + + +
    +
    +

    Most recent posts (write a post by going to a forum) more...

    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    SubjectPosterForumRepliesDate
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
    + Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
    + Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
    + band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
    + Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
    + Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
    + Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
    +
    +
    +
    + + + + + + + +

    +

    + Terms of Use (10 Mar 2001) +

    + + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:10Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:DRAV5TKA4765LYFANCFHVNKEWGLRKUMM +Content-Length: 171 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/index.php +WARC-Date: 2013-10-21T21:53:10Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:YXATLZCFORQS33ZVB3M3SMJY3S2Z6QUD +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 434 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/index.php +Expires: Tue, 22 Oct 2013 03:53:11 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/index.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:11Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:D53DT5RU7NGDFBHOJOKLF56UG32P7AYF +Content-Length: 167 + +GET /index.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/index.php +WARC-Date: 2013-10-21T21:53:11Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:RS3Z4Z3NZ6BS6ANPCRKWA43E5O5YPVG6 +WARC-Payload-Digest: sha1:63IMMQZVCWADA6ZOVJVHKYHHNFSUS26H +Content-Type: application/http;msgtype=response +Content-Length: 258 + +HTTP/1.1 301 Moved Permanently +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:11 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 +Location: https://archive.org + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: https://archive.org/ +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:12Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:HRBVH5XQCN2OWGMQ7THZ675AZ4L4SEWV +Content-Length: 158 + +GET / HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: https://archive.org/ +WARC-Date: 2013-10-21T21:53:12Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:24OHCKJGVHH4GDPS65MSGZAS2FWN6U44 +WARC-Payload-Digest: sha1:7DW5UIXJ5NGLWNQ5WYE7AB4E5L74X275 +Content-Type: application/http;msgtype=response +Content-Length: 30679 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:13 GMT +Content-Type: text/html; charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +76fb + + + + + Internet Archive: Digital Library of Free Books, Movies, Music & Wayback Machine + + + + + + + + + + + + + + + + + + +
    + Universal Access To All Knowledge
    + + + + + + + + + + + + + +
    + Home + + Forums | +FAQs | +Contributions | +Volunteer Positions | +Jobs | +donate +
    + + + +
    + + + + + + + + + + + + + + + + + +
    +
    + Search: + + + + + + + + Advanced Search +
    +
    + + Anonymous User + + (login + or + + join us) + + +
    Upload
    +
    + +
    + + + + +
    + + + +
    +
    +

    +
    + 361 billion pages +
    + Web +

    +
    + + + + + + + + +
    + (wayback logo) + + +
    + + + more info +
    +
    +
    +
    + + +
    +

    +
    + See recent additions in RSS +
    + Welcome to the Archive +

    +
    + The Internet Archive, a 501(c)(3) non-profit, is building a digital library of Internet sites and other cultural artifacts in digital form. Like a paper library, we provide free access to researchers, historians, scholars, the print disabled, and the general public.
    +
    +
    + + + +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Video +
    + + + 1,411,240 movies + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (movies pick) +
    +
    + Baby nursery (reel 5)
    + + Description: Amateur movie of the baby nursery at the Peoples Temple Agricultural Mission in... +
    +
    + +

    Recent Review

    +
    +
    + The Stars Look Down (1940)
    Average rating: 4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars4.83 out of 5 stars

    +
    +
    + +
    +
    +
    +
    +

    +
    + Browse +
    + (by band) +
    +
    + Live Music +
    + + + 121,538 concerts + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (etree pick) +
    +
    + Grateful Dead Live at Uptown Theater on 1979-12-05
    + + Alabama Getaway-> Greatest Story Ever Told, Dire Wolf, Me & My Uncle-> Big River, Cold Rain & Snow,... +
    +
    + +

    Recent Review

    + + +
    +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Audio +
    + + + 1,744,979 recordings + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (audio pick) +
    +
    + [Miga_v16] "Nice summer"
    + + extra video for audio-release [Miga32] Rominger "Music for camping" [Miga_v16] "Nice summer" video:... +
    +
    + +

    Recent Review

    +
    +
    + IAA Top 40 Countdown
    Average rating: 5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars5.00 out of 5 stars

    +
    +
    + +
    +
    +
    +
    +

    +
    + Browse +
    + (by keyword) +
    +
    + Texts +
    + + + 5,325,972 texts + + +

    + +
    +

    + + Curator's Choice + + (more) + + +

    +
    + (texts pick) +
    +
    + Outlines of European history
    + + pt. 1. Earliest man...the Orient, Greece, and Rome; Europe from the break-up of the Roman Empire to... +
    +
    + +

    Recent Review

    + + +
    +
    +
    + + + + + + + +
    +
    +

    Most recent posts (write a post by going to a forum) more...

    + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    SubjectPosterForumRepliesDate
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 0 + 19 minutes ago +
    + Re: What's in a name? 'The Grateful Dead' + + Diamondhead + + GratefulDead + + 0 + 23 minutes ago +
    + Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 2 + 23 minutes ago +
    + band called Last to Know from Taos, NM + + menudo505 + + etree + + 0 + 27 minutes ago +
    + Re: Woulda Coulda Shoulda + + micah6vs8 + + GratefulDead + + 0 + 34 minutes ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 1 + 41 minutes ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + Dudley Dead + + GratefulDead + + 0 + 59 minutes ago +
    + Re: Woulda Coulda Shoulda + + Diamondhead + + GratefulDead + + 1 + 1 hour ago +
    + Re: Non Dead related :Lit. advice . Neil Gaiman etc. + + micah6vs8 + + GratefulDead + + 1 + 1 hour ago +
    + Re: What's in a name? 'The Grateful Dead' + + unclejohn52 + + GratefulDead + + 0 + 1 hour ago +
    +
    +
    +
    + + + + + + + +

    +

    + Terms of Use (10 Mar 2001) +

    + + + + + +0 + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:6PZOFZFFZRY7XJOJ2325DNXHG7LEP3G6 +Content-Length: 178 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:JN3EE5W7CY5PSNTEJ7A6ORMLNNMNWS3J +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 441 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/logoc.jpg +Expires: Tue, 22 Oct 2013 03:53:14 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/logoc.jpg +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:14Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:Q6EXPKA6ECDPIEX3MXCWAH2S4JEO4ZHI +Content-Length: 174 + +GET /images/logoc.jpg HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/logoc.jpg +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:6ESWUQAIQPTXYPDSKA2NGLDTHEFS6FLK +WARC-Payload-Digest: sha1:UZY6ND6CCHXETFVJD2MSS7ZENMWF7KQ2 +Content-Type: application/http;msgtype=response +Content-Length: 1951 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:14 GMT +Content-Type: image/jpeg +Content-Length: 1662 +Last-Modified: Wed, 13 Feb 2013 16:33:25 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:14 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +JFIFddAdobe ImageReadyDucky<Adobed   + + + +     8F !1AQqa"B2R#Sc$T%'!1qAa2"B3 ?P@a@€ Pj,($@ %i Q6;eH0Yz[,3TRhL0AR:(cq ?0SBrJҋ$3&9BAPH +ƱUOAv_O77\Q]Ɣ,) +R7ŠU4ٗшeB:%n'Eq y- )H[%TR{;4*26n.IQp7;|-F8N}|tInތ}RDwPΡ1&L`{Ԋި'w Jb$ I>b] +-z;%ԭKY<*sjJ=}.?]Qn*bg?Fǟ/fi__:V۪?'\xdP5GKu:㶱罠~jcas&AsZdX +Pؑ̚G-,VoC/#%>TwIDmr9%'F $O?w}OFӋ*#{%Zy +W rs]2Ƅ&.5)ISd)7J[G}IYGMͪި,*4oP ʱjzJP17 p%]ɁiK31kAiP$90kVD1wmĞ"F2rXmˤFxp_ƩO.=ίsm|j}K~`e)Ru ^ַVPC%J> ʽ4}<6K +xfv"_2[?ۺ@ *4p3Q{rJ5yk'>c 艂]pCSjyH*O:`<) +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I7TKK5MVPSOGRVZYP6L37NTE35F25HJQ +Content-Length: 190 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:14Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:H5UU46OLZY33AQRBCM7R4BKJBMMAPKHB +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 453 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/go-button-gateway.gif +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:RDRO3REQIV4EDZDNVASSXC6W72SXUQSP +Content-Length: 186 + +GET /images/go-button-gateway.gif HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/go-button-gateway.gif +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:OV6P7Y4LCKQ6R7B5EWRGUHFYUGNN2NV7 +WARC-Payload-Digest: sha1:72MRTMYOLSPFXGOTSETEJKAANDRDIE5O +Content-Type: application/http;msgtype=response +Content-Length: 1412 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: image/gif +Content-Length: 1124 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:15 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +GIF89aXWSݡQ͗KʔJYWמOZ_ozăƄŊERNjFŒďӋE~?WvݑH͆Cj׆CۉEu;r9L컓Xߔ]}yw@ݖnߟzᦃ寑Ĭf3d2c1c1b0a0^/\.Z-W+e3c2a1_0[.Y-W,V+h5h7l9l;n=p?pArCtFvIyLzL{N}R}R~SԀUՁW׆]׈`َiےn۔pܖsޚxޜ{ߝ|ࢃ⦈⧉岘洛縟躣ȵf5h7n?䮔!,@Coz0>Wq& y'HAFp@Ǔ(=.YB$ q'M/0q@A¨&QR48In``iC +1܄礞=V04Sn\"mxTQPA a$ +pI$[$R C㕐IUHYc̖-_Ȍ):l(q>)t' ׉ Dh3{% ,&d6D3H`(GC4JP@C; + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:I5U6LAWZPGDDZOSTJEHZT2BWCOPFKDLV +Content-Length: 177 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:5CUEYG4YEO3H5SKHN4UGZDLKCDXJTP2W +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 440 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:15 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/images/star.png +Expires: Tue, 22 Oct 2013 03:53:15 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/images/star.png +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:15Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:5OLRWKI5GCDS6JF4CCKLWJM23GJZBQOQ +Content-Length: 173 + +GET /images/star.png HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/images/star.png +WARC-Date: 2013-10-21T21:53:15Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:LUMN34VHUXETNH36JOWNHIBNR4DOO2I5 +WARC-Payload-Digest: sha1:CECJCMQ6SXDRBZX5COV7RTTQTHTY653H +Content-Type: application/http;msgtype=response +Content-Length: 1304 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: image/png +Content-Length: 1016 +Last-Modified: Wed, 13 Feb 2013 16:33:26 GMT +Connection: keep-alive +Expires: Mon, 28 Oct 2013 21:53:16 GMT +Cache-Control: max-age=604800 +Accept-Ranges: bytes + +PNG + + IHDRagAMA asRGB cHRMz&u0`:pQ<bKGDC pHYs   vpAg\ƭIDAT8˕yHAzo]4BJۥ\)[eJ̲RH5,(%K4R+s. ="йNMWZ#3ٌQ}s"~GS.DN x.5B v~cQU2\(~_5s8jW)-a]|@['Ի[{^jOO%fCy $8f.?Z(&%мǮLT3Rv ;nlCy)E/ Һ`PTeru8|$>_@?J'"ڱQm~@&łǠOR 4Ųn]dBŶORPb%tEXtdate:create2012-03-28T02:07:14+00:005%tEXtdate:modify2012-03-25T15:54:33+00:00oIENDB` + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:CTALP42WLFIFYU44MXGJNNLYA45BUQVG +Content-Length: 189 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: www.archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://www.archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:A24EOC2MZA4SHKQTCFE5RWLN3EG3WSO2 +WARC-Payload-Digest: sha1:U32DBUPBIGUHJ4QE32J6G7BWBRHTBNE4 +Content-Type: application/http;msgtype=response +Content-Length: 452 + +HTTP/1.1 302 Moved Temporarily +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/html +Content-Length: 161 +Connection: keep-alive +Location: http://archive.org/services/collection-rss.php +Expires: Tue, 22 Oct 2013 03:53:16 GMT +Cache-Control: max-age=21600 + + +302 Found + +

    302 Found

    +
    nginx/1.1.19
    + + + + +WARC/1.0 +WARC-Type: request +WARC-Target-URI: http://archive.org/services/collection-rss.php +Content-Type: application/http;msgtype=request +WARC-Date: 2013-10-21T21:53:16Z +WARC-Record-ID: +WARC-IP-Address: 207.241.224.2 +WARC-Warcinfo-ID: +WARC-Block-Digest: sha1:VD75DH7UF5EHTROCENIJAVOE6HWRMYAL +Content-Length: 185 + +GET /services/collection-rss.php HTTP/1.1 +User-Agent: Wget/1.14 (darwin11.4.0) +Accept: */* +Host: archive.org +Connection: Keep-Alive +Cookie: PHPSESSID=b55lt1a1d8g9fkeokku32loo87 + + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: http://archive.org/services/collection-rss.php +WARC-Date: 2013-10-21T21:53:16Z +WARC-IP-Address: 207.241.224.2 +WARC-Block-Digest: sha1:GVOLA26JHJVANCRZ545PTRNG6HMT7PWW +WARC-Payload-Digest: sha1:CBRMZGMT7IQRUCDW23ABAL6RN7H6MGIE +Content-Type: application/http;msgtype=response +Content-Length: 78007 + +HTTP/1.1 200 OK +Server: nginx/1.1.19 +Date: Mon, 21 Oct 2013 21:53:16 GMT +Content-Type: text/xml;charset=UTF-8 +Transfer-Encoding: chunked +Connection: keep-alive +X-Powered-By: PHP/5.3.10-1ubuntu3.2 + +7fa0 + + + + https://archive.org + Internet Archive + The most recent additions to the Internet Archive collections. This RSS feed is generated dynamically + info@archive.org (Info Box) + Mon, 21 Oct 2013 21:48:07 GMT + + https://archive.org/images/glogo.png + Internet Archive + https://archive.org + + + gov.uscourts.mnd.126519 + gov.uscourts.mnd.126519 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.mnd.126519&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.mnd.126519 + https://archive.org/details/gov.uscourts.mnd.126519 + Mon, 21 Oct 2013 19:50:15 GMT + texts/usfederalcourts + + 03337F0F2C418DC4A098F37A8F17A528536B75A2 + + + gov.uscourts.ded.45655 + gov.uscourts.ded.45655 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ded.45655&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Image Container PDF, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.ded.45655 + https://archive.org/details/gov.uscourts.ded.45655 + Mon, 21 Oct 2013 18:08:34 GMT + texts/usfederalcourts + + 3E074A11E37C54C6725043593A22AAFC4A71EAB8 + + + PC Longplay 319 Spec Ops The Line + PC Longplay 319 Spec Ops The Line + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=PC_Longplay_319_Spec_Ops_The_Line&mediatype=movies&collection=opensource_movies"/><p>This game was a fairly decent shooter. Can't say I would play it again but it had a worth while story to play through with the ability to choose your own destiny..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Matroska, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + https://archive.org/details/PC_Longplay_319_Spec_Ops_The_Line + Mon, 21 Oct 2013 15:19:16 GMT + movies/opensource_movies + + + + + 38F8748912CF1483DA9505B3D41C65D76990B4A4 + + + gov.uscourts.ohsd.166725 + gov.uscourts.ohsd.166725 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.ohsd.166725&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata</p> + https://archive.org/details/gov.uscourts.ohsd.166725 + https://archive.org/details/gov.uscourts.ohsd.166725 + Mon, 21 Oct 2013 14:52:49 GMT + texts/usfederalcourts + + 1CFB2CEA490A23F960F63FE4B2996FC5073A752B + + + George Griffin Pt 5 + George Griffin Pt 5 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-315234-georgegriffinpt5&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-315234-georgegriffinpt5 + https://archive.org/details/scm-315234-georgegriffinpt5 + Mon, 21 Oct 2013 08:32:22 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/SeattleCommunityMedia + + + + + + + History + + A4CAB132D3017E520D5D2BCFC40AE162C8FFEA5A + + + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + عذب النسيل في تفسير كلام الوكيل / تفسير سورة العصر 6/6 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=3dbo_nnassil_el3asr&mediatype=audio&collection=opensource_audio"/><p>No description available.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/3dbo_nnassil_el3asr + https://archive.org/details/3dbo_nnassil_el3asr + Mon, 21 Oct 2013 07:55:35 GMT + audio/opensource_audio + + + + 245CEC3E026E543DB855113D5DA639411258594F + + + gov.uscourts.dcd.153973 + gov.uscourts.dcd.153973 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.dcd.153973&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.dcd.153973 + https://archive.org/details/gov.uscourts.dcd.153973 + Mon, 21 Oct 2013 06:45:41 GMT + texts/usfederalcourts + + 5C897BB4B02ADE078AE7C399EA38897EEC76C265 + + + فلم 19 + فلم 19 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=MezaaGe_234F067D-&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG4, Metadata, Ogg Video, Thumbnail</p> + https://archive.org/details/MezaaGe_234F067D- + https://archive.org/details/MezaaGe_234F067D- + Mon, 21 Oct 2013 05:36:21 GMT + movies/opensource_movies + + + + + EE2E9BF9883DC1DECD99A039F9B2CCB2EB87C56F + + + Katsaus Journal + Katsaus Journal + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Katsaus_Journal&mediatype=movies&collection=opensource_movies"/><p>Finnish continuation war newsreel.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/Katsaus_Journal + https://archive.org/details/Katsaus_Journal + Mon, 21 Oct 2013 05:19:10 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + movies/opensource_movies + + + + + + + 1941-1945 + + 71801FCE2832C222C013CF73A06054D012F17025 + + + AwPT - SHADE + AwPT - SHADE + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368707-awpt-shade&mediatype=movies&collection=SeattleCommunityMedia"/><p>  The first 57 minutes of a great  new 1:33:30 minute film  - available here: http://12160.info/video/shade-the-motion-picture-full-video-documentary More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368707-awpt-shade + https://archive.org/details/scm-368707-awpt-shade + Mon, 21 Oct 2013 04:46:41 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Documentary + + 96BAB900271137D5B85E4CD48744C6BF43F2FAC5 + + + When Trouble Comes My Way - Part 1 + When Trouble Comes My Way - Part 1 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WhenTroubleComesMyWay-Part1_289&mediatype=movies&collection=opensource_movies"/><p>Sermon delivered by Pastor David Vos at Lake Palms Community Church, 380 Fulton Drive SE, Largo, FL 33771 on October 20, 2013..</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + https://archive.org/details/WhenTroubleComesMyWay-Part1_289 + Mon, 21 Oct 2013 04:34:48 GMT + http://creativecommons.org/licenses/by-nc-nd/3.0/ + movies/opensource_movies + + + + Pastor David Vos, Sermon, Lake Palms Community Church + + 02D291F021ED97C7B8AAC171695C5488C6E8B740 + + + Appreciation + Appreciation + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=scm-368703-appreciation&mediatype=movies&collection=SeattleCommunityMedia"/><p>More information about this show available at: Seattle Community Media.</p><p>This item belongs to: movies/SeattleCommunityMedia.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, MPEG2, Metadata, Ogg Video, Thumbnail, Video Index, h.264</p> + https://archive.org/details/scm-368703-appreciation + https://archive.org/details/scm-368703-appreciation + Mon, 21 Oct 2013 04:17:17 GMT + http://creativecommons.org/licenses/by-sa/3.0/ + movies/SeattleCommunityMedia + + + + + + + Self improvement + + F9CA049C2858C148EE3A81011A48AC5248708D4B + + + دروس عامة للشيخ مصطفى العدوي + دروس عامة للشيخ مصطفى العدوي + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=4-islamic-1151&mediatype=movies&collection=opensource_movies"/><p>No description available.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Ogg Vorbis, Thumbnail, VBR MP3, Windows Media, h.264</p> + https://archive.org/details/4-islamic-1151 + https://archive.org/details/4-islamic-1151 + Mon, 21 Oct 2013 04:15:51 GMT + movies/opensource_movies + + + + + + + + C9F0489AC735484AE388B39D1E509B5FC0AE2E9F + + + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + Chris Whitley Live at Hanbury Ballroom on 2003-09-15 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=cw2003-09-15.flac16&mediatype=etree&collection=ChrisWhitley"/><p>Chris Whitley Hanbury Ballroom, Brighton 15 September 2003 Bandridge BMC530 stereo condenser mic > Sony MZ-R91 MD > Philips CDR-760 > EAC >FLAC disc one: 01. new lost world 02. to joy 03. crystal ship 04....</p><p>This item belongs to: etree/ChrisWhitley.</p><p>This item has files of the following types: Archive BitTorrent, Checksums, Flac, Flac FingerPrint, Metadata, Ogg Vorbis, Text, VBR MP3</p> + https://archive.org/details/cw2003-09-15.flac16 + https://archive.org/details/cw2003-09-15.flac16 + Mon, 21 Oct 2013 04:12:23 GMT + etree/ChrisWhitley + + + + 0232B264B2349F7D97B6FD4656D8A42342918DC3 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE CONTRACT (13452213099445)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418097-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418097-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + https://archive.org/details/418097-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67A8F0DB3FDC8602264446066F98925EBB5DDC46 + + + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + WBZ RNC R PRESIDENT ORD58090 FEDNATL INVOICE (13461642108071)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418098-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418098-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + https://archive.org/details/418098-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:02:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 344D9552AE1A4B7655275C3556123B8471722045 + + + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + WBZ REPUBLICAN NATIONAL COMMITEE R MULTI ORD58090 ISSUE ORDER (13452213088682)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418099-collect-files-25456-political-file-2012-non&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418099-collect-files-25456-political-file-2012-non.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + https://archive.org/details/418099-collect-files-25456-political-file-2012-non + Mon, 21 Oct 2013 04:01:56 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 68CE81DE137E079A4566C4FCF3ED4C5A8EC82082 + + + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikisource on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikisource-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikisource that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikisource-20131019 + https://archive.org/details/incr-svwikisource-20131019 + Mon, 21 Oct 2013 04:01:54 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikisource, Swedish, Wikisource + + D5AAE1198D3729DF6C3D4FC61130F966863BA389 + + + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wiktionary on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswiktionary-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wiktionary that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswiktionary-20131020 + https://archive.org/details/incr-eswiktionary-20131020 + Mon, 21 Oct 2013 04:01:49 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswiktionary, Spanish, Wiktionary + + 333665884E6DA297C78B0A10A7B9A729210D8DF4 + + + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + WBZ JOE KENNEDY III D HOUSEMACD4 FED PIQ (13448697161381)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418100-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418100-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + https://archive.org/details/418100-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:45 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + D6D73BBC1BFD646AF32AF7EC187565304C7FCA89 + + + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + <img width="160" style="padding-rig +8000 +ht:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516&mediatype=image&collection=coverartarchive"/><p>No description available.</p><p>This item belongs to: image/coverartarchive.</p><p>This item has files of the following types: Archive BitTorrent, JPEG, JPEG 250px Thumb, JPEG 500px Thumb, JPEG Thumb, JSON, Metadata, Metadata Log, MusicBrainz Metadata</p> + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + https://archive.org/details/mbid-f1219b8d-4113-4a18-9b44-fe3125ffa516 + Mon, 21 Oct 2013 04:01:38 GMT + image/coverartarchive + + 37636FB9D3094A1F605BB78DB6B786F8ABB29BCC + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED CONTRACT (13449762506021)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418101-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418101-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + https://archive.org/details/418101-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:34 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 258C467633F6C486084321E1E9542D3D1478A539 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58040 FED ORDER (13448697139947)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418102-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418102-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + https://archive.org/details/418102-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:20 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 8D8869D528E2E6622287400A580DC388A58CAEF4 + + + alexa20131017-24 + alexa20131017-24 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=alexa20131017-24&mediatype=web&collection=alexacrawls"/><p>Alexa crawl.</p><p>This item belongs to: web/alexacrawls.</p><p>This item has files of the following types: Metadata</p> + https://archive.org/details/alexa20131017-24 + https://archive.org/details/alexa20131017-24 + Mon, 21 Oct 2013 04:01:09 GMT + web/alexacrawls + crawldata + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED CONTRACT (13449762527174)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418103-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418103-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + https://archive.org/details/418103-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:01:06 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F9687CF40B8065FB8AF30042BFBB2ED3F62ED6F4 + + + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + WBZ JOE KENNEDY D HOUSEMACD4 ORD58044 FED ORDER (13448697150704)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418104-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418104-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + https://archive.org/details/418104-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:53 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + CDCB99998DB6C9E281F90B69C7315F3192C9D8D9 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED CONTRACT (13449771554387)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418105-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418105-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + https://archive.org/details/418105-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:39 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F85C5F1543BCB96FF4CE64F33C704095934452AC + + + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikivoyage on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikivoyage-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikivoyage that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikivoyage-20131020 + https://archive.org/details/incr-eswikivoyage-20131020 + Mon, 21 Oct 2013 04:00:33 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikivoyage, Spanish, Wikivoyage + + F2E6A9CCC64109E4A1F1548F23A5AC55AE317E84 + + + Eso No 16 10 13 Nota Gari + Eso No 16 10 13 Nota Gari + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=EsoNo161013NotaGari&mediatype=audio&collection=opensource_audio"/><p>Programa Eso no!! nota con Gary de Mr. White!!!.</p><p>This item belongs to: audio/opensource_audio.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Ogg Vorbis, VBR MP3</p> + https://archive.org/details/EsoNo161013NotaGari + https://archive.org/details/EsoNo161013NotaGari + Mon, 21 Oct 2013 04:00:32 GMT + audio/opensource_audio + + + "audios eso no" + + 97ACF3820CD8DEE4DBE7AD0F46AA00EAE8F735BB + + + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikiquote on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikiquote-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikiquote that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikiquote-20131019 + https://archive.org/details/incr-svwikiquote-20131019 + Mon, 21 Oct 2013 04:00:29 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikiquote, Swedish, Wikiquote + + 43439D621A3C1BB74D39948590A9193B2D165D4B + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED INVOICE (13455816090527)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418106-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418106-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + https://archive.org/details/418106-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:25 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 4A0A6C8BAECB9A545B3BDC40465ABB74442319A0 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58025 FED ORDER (13449696816301)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418107-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418107-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + https://archive.org/details/418107-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:13 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + F76FD396A322CC62A4197DE088CA6DCF833558A7 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED CONTRACT (13449777167988)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418108-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418108-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + https://archive.org/details/418108-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 04:00:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + E2E238B9DF57CC47B4982E375A0F89E75CA9EAC2 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FEDNATL INVOICE (13461639080781)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418109-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418109-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + https://archive.org/details/418109-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:47 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 78B49781C5AADB75726B576A00313D2C145074AE + + + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58029 FED ORDER (13449696816990)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418110-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418110-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + https://archive.org/details/418110-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:35 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 221C3ED39EA1046DA1442C242B2EDF35114991FE + + + Silo 2.2 + Silo 2.2 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Silo2.2_201310&mediatype=texts&collection=opensource_media"/><p>Unbiased reviews of the Arc'teryx Silo 50 winter pack by real people. Silo 2.2 is now available If you have not already updated, you can follow the link below to download the latest version, which is a free upgrade for all Silo 2 owners....</p><p>This item belongs to: texts/opensource_media.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Windows Executable</p> + https://archive.org/details/Silo2.2_201310 + https://archive.org/details/Silo2.2_201310 + Mon, 21 Oct 2013 03:59:34 GMT + texts/opensource_media + Silo 2.2 + + 2E63B46A804C6EDF3A3200D0202E1DE357B2FF12 + + + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + Rocket Power 3x 17 Losers Weepers ~ Reggie The Movie [ Unknown Encoder] + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder&mediatype=movies&collection=opensource_movies"/><p>Season 3 Episode 17.</p><p>This item belongs to: movies/opensource_movies.</p><p>This item has files of the following types: Animated GIF, Archive BitTorrent, Cinepack, Metadata, Ogg Video, Thumbnail, h.264</p> + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + https://archive.org/details/RocketPower3x17LosersWeepersReggieTheMovieUnknownEncoder + Mon, 21 Oct 2013 03:59:26 GMT + movies/opensource_movies + + + + animation + + 7C9CAF48A8554A9E68AB6F7B25FD2EC2A93C8632 + + + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + WBZ ELIZABETH WARREN D SENATEMA ORD58120 FED CONTRACT (13460805069185)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418111-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418111-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + https://archive.org/details/418111-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:21 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 0E9E3AAD85082031C2525F7A9568048F8B1E8E7E + + + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikiversity on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikiversity-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikiversity that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikiversity-20131020 + https://archive.org/details/incr-eswikiversity-20131020 + Mon, 21 Oct 2013 03:59:17 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikiversity, Spanish, Wikiversity + + AD372EA58CF3006D8471BA41026C62BD1861078A + + + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58120 FED ORDER (13457355056808)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418112-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418112-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + https://archive.org/details/418112-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:59:08 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 012A94D6BD2802A88F233C09E8C70437682D2131 + + + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + Wikimedia incremental dump files for the Swedish Wikinews on October 19, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-svwikinews-20131019&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Swedish Wikinews that is generated by Wikimedia on October 19, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, BZIP2, GZIP, Metadata, Text</p> + https://archive.org/details/incr-svwikinews-20131019 + https://archive.org/details/incr-svwikinews-20131019 + Mon, 21 Oct 2013 03:59:05 GMT + web/wikimediadownloads + wiki, incremental, dumps, svwikinews, Swedish, Wikinews + + E501281C8B708DE164F23964725A87773F582478 + + + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + Webwide Crawldata 2013-10-20T22:03:54PDT to 2013-10-20T16:43:21PDT + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=WIDE-20131020220354-crawl422&mediatype=web&collection=wide00009"/><p>Internet Archive crawldata from Webwide Crawl, captured by crawl422.us.archive.org:wide from Sun Oct 20 22:03:54 PDT 2013 to Sun Oct 20 16:43:21 PDT 2013..</p><p>This item belongs to: web/wide00009.</p><p>This item has files of the following types: Item CDX Index, Item CDX Meta-Index, Metadata, Text, WARC CDX Index, Web ARChive GZ</p> + https://archive.org/details/WIDE-20131020220354-crawl422 + https://archive.org/details/WIDE-20131020220354-crawl422 + Mon, 21 Oct 2013 03:59:01 GMT + web/wide00009 + crawldata + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED CONTRACT (13460805058183)_.pdf + <img width="160" style="padding-right:3px;floa +302d +t:left;" src="https://archive.org/services/get-item-image.php?identifier=418113-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418113-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + https://archive.org/details/418113-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:54 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 96238A946E099625F0281BEEF4A336EF1A1C447F + + + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + WBZ WARREN FOR SENATE D SENATEMA ORD58126 FED ORDER (13457355068812)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418114-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418114-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + https://archive.org/details/418114-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:41 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 67E3C7FAF4AC13E3E8AD70AEBFDBDED581CE4BA0 + + + gov.uscourts.flmd.283032 + gov.uscourts.flmd.283032 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=gov.uscourts.flmd.283032&mediatype=texts&collection=usfederalcourts"/><p>Click here to see available docket information and document downloads for this case. If you need the complete docket, you should consult PACER directly..</p><p>This item belongs to: texts/usfederalcourts.</p><p>This item has files of the following types: Archive BitTorrent, HTML, Metadata, Text PDF</p> + https://archive.org/details/gov.uscourts.flmd.283032 + https://archive.org/details/gov.uscourts.flmd.283032 + Mon, 21 Oct 2013 03:58:35 GMT + texts/usfederalcourts + + 6252A903C01875100635D5011543196896010583 + + + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD57975 FED CONTRACT (13448868050432)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418115-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418115-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + https://archive.org/details/418115-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:26 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 98F1A74C04D215AC2175D27B24C73C24E1F77A43 + + + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + WBZ SCOTT BROWN FOR US SENATE R SENATEMA ORD57975 FED INVOICE (13457541352753)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418116-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418116-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF</p> + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + https://archive.org/details/418116-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:12 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 1A93DFBE2B84324B987F7C3EF91FDC3299E5BB2A + + + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + Wikimedia incremental dump files for the Spanish Wikisource on October 20, 2013 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=incr-eswikisource-20131020&mediatype=web&collection=wikimediadownloads"/><p>This is the incremental dump files for the Spanish Wikisource that is generated by Wikimedia on October 20, 2013..</p><p>This item belongs to: web/wikimediadownloads.</p><p>This item has files of the following types: Archive BitTorrent, Metadata, Text</p> + https://archive.org/details/incr-eswikisource-20131020 + https://archive.org/details/incr-eswikisource-20131020 + Mon, 21 Oct 2013 03:58:01 GMT + web/wikimediadownloads + wiki, incremental, dumps, eswikisource, Spanish, Wikisource + + BF69AA565071874D71ABB5D3D4FECFA85448C261 + + + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + WBZ SCOTT BROWN R SENATEMA ORD58022 FED CONTRACT_.pdf (13449774486241)_.pdf + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=418117-collect-files-25456-political-file-2012-federal&mediatype=texts&collection=opensource"/><p>Unofficial mirror of http://www.documentcloud.org/documents/418117-collect-files-25456-political-file-2012-federal.html.</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Additional Text PDF, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Image Container PDF, Metadata, Scandata, Single Page Processed JP2 ZIP</p> + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + https://archive.org/details/418117-collect-files-25456-political-file-2012-federal + Mon, 21 Oct 2013 03:58:00 GMT + texts/opensource + documentcloud, propublica, 5290-jeremy-merrill + + 6B645B4FBAD190864C10A471F96FA25AA67B88EE + + + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 + <img width="160" style="padding-right:3px;float:left;" src="https://archive.org/services/get-item-image.php?identifier=Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__&mediatype=texts&collection=opensource"/><p>Alain Le Bussy - Fata Care Se Temea De Apa 0.9 07 Romanian Book. Aceasta carte face parte din Colectia 10.000 de carti. Puteti downloada aici un fisier zip cu intreaga colectie 10.000 de carti (2 GB) sau puteti alege doar cartea dorita....</p><p>This item belongs to: texts/opensource.</p><p>This item has files of the following types: Abbyy GZ, Animated GIF, Archive BitTorrent, DjVu, DjVuTXT, Djvu XML, EPUB, Metadata, Scandata, Single Page Processed JP2 ZIP, Text PDF, Word Document</p> + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + https://archive.org/details/Alain_Le_Bussy-Fata_Care_Se_Temea_De_Apa_0_9_07__ + Mon, 21 Oct 2013 03:57:59 GMT + http://creativecommons.org/publicdomain/zero/1.0/ + texts/opensource + 10000 carti, Alain Le Bussy, Fata Care Se Temea De Apa 0.9 07, carti, carte, online, pdf, download, romana, carti in limba romana, romania, romanian, carti pdf, Books in Romanian language, ro-books, kjb, ftp.kjb.ro, 10000, 10000 carti + + 6F89BFC6BE691DC13FE417E2B09D81BE90DA24EA + + + + +0 + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/MANIFEST.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:MDYPVAS3DVOGNNDRGXW6RHSIQJHDUL6H +Content-Type: text/plain +Content-Length: 48 + + + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget_arguments.txt +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:4RRGL67PFUKCW27T2JYSPL7U2WU3UVAP +Content-Type: text/plain +Content-Length: 54 + +"-i" "urls.txt" "-O" "-" "--warc-file=IAH-urls-wget" + + +WARC/1.0 +WARC-Type: resource +WARC-Record-ID: +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-Target-URI: metadata://gnu.org/software/wget/warc/wget.log +WARC-Date: 2013-10-21T21:53:18Z +WARC-Block-Digest: sha1:WTDCWU2CDUXK2GWFEVX7NWJNCTC25MVC +Content-Type: text/plain +Content-Length: 5647 + +Opening WARC file ‘IAH-urls-wget.warc.gz’. + +urls.txt: Invalid URL dns:www.archive.org: Unsupported scheme ‘dns’ +--2013-10-21 22:53:06-- http://www.archive.org/robots.txt +Resolving www.archive.org... 207.241.224.2 +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/robots.txt [following] + + 0K 100% 9.03M=0s + +--2013-10-21 22:53:06-- http://archive.org/robots.txt +Resolving archive.org... 207.241.224.2 +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 727 [text/plain] +Saving to: ‘STDOUT’ + + 0K 100% 826K=0.001s + +2013-10-21 22:53:07 (826 KB/s) - written to stdout [727/727] + +--2013-10-21 22:53:07-- http://www.archive.org/ +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 19.2M=0s + +--2013-10-21 22:53:07-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:08-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 9.59M=0.003s + +2013-10-21 22:53:10 (9.59 MB/s) - written to stdout [30550] + +--2013-10-21 22:53:10-- http://www.archive.org/index.php +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/index.php [following] + + 0K 100% 10.2M=0s + +--2013-10-21 22:53:11-- http://archive.org/index.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 301 Moved Permanently +Location: https://archive.org [following] + + 0K 0.00 =0s + +--2013-10-21 22:53:11-- https://archive.org/ +Connecting to archive.org|207.241.224.2|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/html] +Saving to: ‘STDOUT’ + + 0K .......... .......... ......... 96.9K=0.3s + +2013-10-21 22:53:14 (96.9 KB/s) - written to stdout [30459] + +--2013-10-21 22:53:14-- http://www.archive.org/images/logoc.jpg +Connecting to www.archive.org|207.241.224.2|:80... connected. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/logoc.jpg [following] + + 0K 100% 15.4M=0s + +--2013-10-21 22:53:14-- http://archive.org/images/logoc.jpg +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1662 (1.6K) [image/jpeg] +Saving to: ‘STDOUT’ + + 0K . 100% 122M=0s + +2013-10-21 22:53:14 (122 MB/s) - written to stdout [1662/1662] + +--2013-10-21 22:53:14-- http://www.archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/go-button-gateway.gif [following] + + 0K 100% 11.0M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/go-button-gateway.gif +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1124 (1.1K) [image/gif] +Saving to: ‘STDOUT’ + + 0K . 100% 97.4M=0s + +2013-10-21 22:53:15 (97.4 MB/s) - written to stdout [1124/1124] + +--2013-10-21 22:53:15-- http://www.archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/images/star.png [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:15-- http://archive.org/images/star.png +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: 1016 [image/png] +Saving to: ‘STDOUT’ + + 0K 100% 74.5M=0s + +2013-10-21 22:53:16 (74.5 MB/s) - written to stdout [1016/1016] + +--2013-10-21 22:53:16-- http://www.archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 302 Moved Temporarily +Location: http://archive.org/services/collection-rss.php [following] + + 0K 100% 17.1M=0s + +--2013-10-21 22:53:16-- http://archive.org/services/collection-rss.php +Reusing existing connection to www.archive.org:80. +HTTP request sent, awaiting response... 200 OK +Length: unspecified [text/xml] +Saving to: ‘STDOUT’ + + 0K .......... .......... .......... .......... .......... 54.2K + 50K .......... .......... ..... 92.7K=1.2s + +2013-10-21 22:53:18 (63.2 KB/s) - written to stdout [77773] + +FINISHED --2013-10-21 22:53:18-- +Total wall clock time: 12s +Downloaded: 7 files, 140K in 1.5s (92.5 KB/s) + + diff --git a/src/test/resources/org/archive/format/warc/mutliple-headers.warc b/src/test/resources/org/archive/format/warc/mutliple-headers.warc new file mode 100644 index 00000000..861f67f1 --- /dev/null +++ b/src/test/resources/org/archive/format/warc/mutliple-headers.warc @@ -0,0 +1,47 @@ +WARC/1.0 +WARC-Type: response +WARC-Date: 2024-09-27T10:47:02Z +WARC-Record-ID: +Content-Length: 971 +Content-Type: application/http; msgtype=response +WARC-Warcinfo-ID: +WARC-Concurrent-To: +WARC-IP-Address: 172.67.184.105 +WARC-Target-URI: https://www.example.com/index.html/ +WARC-Protocol: h2 +WARC-Protocol: tls/1.3 +WARC-Cipher-Suite: TLS_AES_256_GCM_SHA384 +WARC-Payload-Digest: sha1:70FB81039DCE25916E0E0CB48CF6662E3F27FFFC +WARC-Block-Digest: sha1:80573371A8271BE6B3AA26FD9DB72E9AD9F316D9 +WARC-Identified-Payload-Type: text/html + +HTTP/1.1 200 +date: Fri, 27 Sep 2024 10:47:02 GMT +content-type: text/html; charset=UTF-8 +x-powered-by: PHP/8.3.11 +x-powered-by: PleskLin +x-pingback: https://www.example.com/xmlrpc.php +link: ; rel="https://api.w.org/" +link: ; rel="alternate"; title="JSON"; type="application/json" +link: ; rel=shortlink +x-litespeed-cache: miss +vary: Accept-Encoding +x-turbo-charged-by: LiteSpeed +cf-cache-status: DYNAMIC +report-to: {"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v4?s=XXtestYY"}],"group":"cf-nel","max_age":604800} +nel: {"success_fraction":0,"report_to":"cf-nel","max_age":604800} +server: cloudflare +cf-ray: 8bf61e4afb9e7f9e-IAD +X-Crawler-content-encoding: br +alt-svc: h3=":443"; ma=86400 +Content-Length: 108 + + + + + + Test + + + + diff --git a/src/test/resources/org/archive/resource/html/link-extraction-test.warc b/src/test/resources/org/archive/resource/html/link-extraction-test.warc new file mode 100644 index 00000000..1a30598e --- /dev/null +++ b/src/test/resources/org/archive/resource/html/link-extraction-test.warc @@ -0,0 +1,362 @@ +WARC/1.0 +WARC-Type: warcinfo +Content-Type: application/warc-fields +WARC-Date: 2017-02-20T14:00:56Z +Content-Length: 128 + +format: WARC File Format 1.0 +conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf +robots: classic + + + +WARC/1.0 +WARC-Type: response +WARC-Date: 2017-02-20T14:00:56Z +WARC-Target-URI: http://www.example.com/html4.html +Content-Type: application/http; msgtype=response +Content-Length: 1243 + +HTTP/1.1 200 OK +Date: Mon, 20 Feb 2017 14:00:56 GMT +Content-Length: 1125 +Content-Type: application/xhtml+xml + + + + + + + +Test XHTML Link Extraction + + +A@/href +

    + anchor only + IMG@/src + IMG@/longdesc + +

    +

    + To be or not to be. +

    +
    +To be, or not to be, that is the question:
    +Whether 'tis nobler in the mind to suffer
    +The slings and arrows of outrageous fortune, … +
    + + + + +WARC/1.0 +WARC-Type: response +WARC-Target-URI: http://www.example.com/link-extraction-test-html5-video.html +WARC-Date: 2017-02-20T21:35:03Z +Content-Type: application/http; msgtype=response +Content-Length: 890 + +HTTP/1.1 200 OK +Date: Mon, 20 Feb 2017 21:35:03 GMT +Content-Length: 789 +Content-Type: text/html + + + + +Test HTML5 Video Tag + + + + + + + + + + + +WARC/1.0 +WARC-Type: response +WARC-Target-URI: http://www.example.com/poor_html5.html +WARC-Date: 2017-02-21T15:50:40Z +Content-Type: application/http; msgtype=response +Content-Length: 594 + +HTTP/1.1 200 OK +Date: Tue, 21 Feb 2017 15:50:40 GMT +Content-Length: 486 +Content-Type: text/html + + +Testing poor HTML5 + + + + + +This is valid HTML5! + + + +
    header
    + +

    headline

    + +

    paragraph one with link. + + +WARC/1.0 +WARC-Type: response +WARC-Target-URI: http://www.example.com/fb-video.html +WARC-Date: 2017-02-20T16:58:50Z +Content-Type: application/http; msgtype=response +Content-Length: 1330 + +HTTP/1.1 200 OK +Date: Mon, 20 Feb 2017 16:58:50 GMT +Content-Length: 1194 +Content-Type: text/html + + + + + fb-video - Embedded Videos - Social Plugins + + + + +

    + + + +
    +
    +
    + How to Share With Just Friends +

    How to share with just friends.

    + Posted by Facebook on Friday, December 5, 2014 +
    +
    +
    + + + + + +WARC/1.0 +WARC-Type: response +WARC-Target-URI: http://www.example.com/data-href.examples.html +WARC-Date: 2017-02-21T21:05:10Z +Content-Type: application/http; msgtype=response +Content-Length: 3160 + +HTTP/1.1 200 OK +Date: Tue, 21 Feb 2017 21:05:10 GMT +Content-Length: 3057 +Content-Type: text/html + + + + + + + + + + + + +
    + + +
    +
    +
    + How to Share With Just Friends +

    How to share with just friends.

    + Posted by Facebook on Friday, December 5, 2014 +
    +
    +
    + + +
    + +
    +

    + +

    + + + + + +
    + + + + responsive lightbox + + + +venobox + + + + + +WARC/1.0 +WARC-Type: response +WARC-Target-URI: http://www.example.com/fb-social-plugins.html +WARC-Date: 2017-02-22T09:33:02Z +Content-Type: application/http; msgtype=response +Content-Length: 1870 + +HTTP/1.1 200 OK +Date: Wed, 22 Feb 2017 09:33:02 GMT +Content-Length: 1767 +Content-Type: text/html + + +
    +
    + + +
    + + +
    + + +
    + + +
    + + + + + +
    +
    + + +WARC/1.0 +WARC-Type: response +WARC-Date: 2017-08-23T13:54:59Z +Content-Type: application/http;msgtype=response +Content-Length: 1279 + +HTTP/1.1 200 OK +Date: Wed, 23 Aug 2017 13:54:59 GMT +Server: Apache/2.4.18 (Ubuntu) +Last-Modified: Wed, 23 Aug 2017 13:54:03 GMT +ETag: "3ca-5576c0b718ab3" +Accept-Ranges: bytes +Content-Length: 971 +Vary: Accept-Encoding +Keep-Alive: timeout=5, max=100 +Connection: Keep-Alive +Content-Type: text/html + + + +Test Extraction of URLs from INPUT onClick Attributes + + + + +
    Click to load webpage
    + +
    + + + + + + +
    + + + + + +