From f7cd67d9f4ac252385517770fce63dea767f9203 Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Tue, 23 Sep 2014 13:06:44 +0200 Subject: [PATCH 1/5] - Replaced direct references to "/tmp" with generic temporary directory reference using File.createTempFile(). Fixes bug reported in iipc/webarchive-commons Issue #2. --- .../archive/format/gzip/GZIPMemberWriterTest.java | 4 ++-- .../util/binsearch/SortedTextFileTest.java | 2 +- .../iterator/SortedCompositeIteratorTest.java | 15 ++++++--------- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java index 5cd75ccf..483d2baf 100644 --- a/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java +++ b/src/test/java/org/archive/format/gzip/GZIPMemberWriterTest.java @@ -12,8 +12,8 @@ public class GZIPMemberWriterTest extends TestCase { public void testWrite() throws IOException { - String outPath = "/tmp/tmp.gz"; - GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(new File(outPath))); + File outFile = File.createTempFile("tmp", ".gz"); + GZIPMemberWriter gzw = new GZIPMemberWriter(new FileOutputStream(outFile)); gzw.write(new ByteArrayInputStream("Here is record 1".getBytes(IAUtils.UTF8))); gzw.write(new ByteArrayInputStream("Here is record 2".getBytes(IAUtils.UTF8))); } diff --git a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java index 2c9d19e8..8f812b75 100644 --- a/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java +++ b/src/test/java/org/archive/util/binsearch/SortedTextFileTest.java @@ -25,7 +25,7 @@ private void createFile(File target, int max) throws FileNotFoundException { public void testGetRecordIteratorStringBoolean() throws IOException { - File test = new File("/tmp/test.tmp"); + File test = File.createTempFile("test", null); int max = 1000000; createFile(test,max); RandomAccessFileSeekableLineReaderFactory factory = diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index f1c2a0ec..0f4dc68a 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -4,6 +4,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; +import java.io.IOException; import java.io.PrintWriter; import java.util.Comparator; @@ -11,21 +12,16 @@ public class SortedCompositeIteratorTest extends TestCase { - public void testHasNext() throws FileNotFoundException { + public void testHasNext() throws FileNotFoundException, IOException { long t = 210000; long c = 134; float f = (float)c / (float)t; System.err.format("F(%f)\n",f); - File a = new File("/tmp/a"); - File b = new File("/tmp/b"); - if(a.isFile()) { - a.delete(); - } - if(b.isFile()) { - b.delete(); - } + File a = File.createTempFile("filea", null); + File b = File.createTempFile("fileb", null); + PrintWriter apw = new PrintWriter(a); PrintWriter bpw = new PrintWriter(b); apw.println("1"); @@ -38,6 +34,7 @@ public void testHasNext() throws FileNotFoundException { BufferedReader bbr = new BufferedReader(new FileReader(b)); SortedCompositeIterator sci = new SortedCompositeIterator(new Comparator() { + @Override public int compare(String o1, String o2) { return o1.compareTo(o2); } From 077abb783d77b8a556112a6617911d0ee7006595 Mon Sep 17 00:00:00 2001 From: thomase Date: Tue, 23 Sep 2014 14:48:48 +0200 Subject: [PATCH 2/5] * changed newline to System.lineSeparator --- .../org/archive/net/PublicSuffixesTest.java | 386 +++++++++--------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index b88acb6d..a82bab22 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -1,193 +1,193 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.net; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.net.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)\n" + - " \"ac,\"\n", dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"\"\n", dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"com,\"\n" + - " \"edu,\"\n" + - " \"\"\n", dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"*,\"\n", dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)\n" + - " \"ac,\"\n" + - " \"!hoge,\"\n" + - " \"*,\"\n" + - " \"\"\n", dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.net; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.net.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + private final String NL = System.lineSeparator(); + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,yokohama,public,assigned,www,", + "jp,yokohama,public,assigned,"); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From 5054060e27da6fef0816efc8b90af06e4e998d9a Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Wed, 24 Sep 2014 10:04:32 +0200 Subject: [PATCH 3/5] Updated release notes. --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 65d24814..db09a463 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +1.1.5 +----- +* [Removed direct reference to Unix TMP-path](https://github.com/iipc/webarchive-commons/issues/2) + 1.1.4 ----- * [All dates should be independent of locale settings](https://github.com/iipc/webarchive-commons/pull/22) From f3e12da0bb53cb4ffb0d21b2d13cda1b6918b1d1 Mon Sep 17 00:00:00 2001 From: Thomas Edvardsen Date: Wed, 24 Sep 2014 10:26:38 +0200 Subject: [PATCH 4/5] * changed newline from 0d0a to 0a in sourcfile --- .../org/archive/net/PublicSuffixesTest.java | 386 +++++++++--------- 1 file changed, 193 insertions(+), 193 deletions(-) diff --git a/src/test/java/org/archive/net/PublicSuffixesTest.java b/src/test/java/org/archive/net/PublicSuffixesTest.java index a82bab22..7528bbe1 100644 --- a/src/test/java/org/archive/net/PublicSuffixesTest.java +++ b/src/test/java/org/archive/net/PublicSuffixesTest.java @@ -1,193 +1,193 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.net; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.regex.Matcher; - -import junit.framework.TestCase; - -import org.archive.net.PublicSuffixes.Node; - -/** - * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches - * from constructed regex. - * - * @author gojomo - */ -public class PublicSuffixesTest extends TestCase { - // test of low level implementation - private final String NL = System.lineSeparator(); - - public void testCompare() { - Node n = new Node("hoge"); - assertTrue(n.compareTo('a') > 0); - assertEquals(-1, n.compareTo('*')); - assertEquals(-1, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(-1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("*,"); - assertEquals(1, n.compareTo('a')); - assertEquals(0, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node("*,"))); - assertEquals(1, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node("!hoge"); - assertEquals(1, n.compareTo('a')); - assertEquals(-1, n.compareTo('*')); - assertEquals(0, n.compareTo('!')); - assertEquals(-1, n.compareTo(new Node("*,"))); - assertEquals(0, n.compareTo(new Node("!muga,"))); - assertEquals(-1, n.compareTo(new Node(""))); - - n = new Node(""); - assertEquals(1, n.compareTo('a')); - assertEquals(1, n.compareTo('*')); - assertEquals(1, n.compareTo('!')); - assertEquals(0, n.compareTo(new Node(""))); - } - - protected String dump(Node alt) { - StringWriter w = new StringWriter(); - PublicSuffixes.dump(alt, 0, new PrintWriter(w)); - return w.toString(); - } - public void testTrie1() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - // specifically, should not have empty string as match. - assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); - alt.addBranch("ac,com,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"com,\"" + NL + - " \"\"" + NL, dump(alt)); - alt.addBranch("ac,edu,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"com,\"" + NL + - " \"edu,\"" + NL + - " \"\"" + NL, dump(alt)); - } - public void testTrie2() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("*,"); - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"*,\"" + NL, dump(alt)); - } - - public void testTrie3() { - Node alt = new Node(null, new ArrayList()); - alt.addBranch("ac,"); - alt.addBranch("ac,!hoge,"); - alt.addBranch("ac,*,"); - // exception goes first. - assertEquals("(null)" + NL + - " \"ac,\"" + NL + - " \"!hoge,\"" + NL + - " \"*,\"" + NL + - " \"\"" + NL, dump(alt)); - } - - // test of higher-level functionality - - Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() - .matcher(""); - - public void testBasics() { - matchPrefix("com,example,www,", "com,example,"); - matchPrefix("com,example,", "com,example,"); - matchPrefix("org,archive,www,", "org,archive,"); - matchPrefix("org,archive,", "org,archive,"); - matchPrefix("fr,yahoo,www,", "fr,yahoo,"); - matchPrefix("fr,yahoo,", "fr,yahoo,"); - matchPrefix("au,com,foobar,www,", "au,com,foobar,"); - matchPrefix("au,com,foobar,", "au,com,foobar,"); - matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); - matchPrefix("uk,co,virgin,", "uk,co,virgin,"); - matchPrefix("au,com,example,www,", "au,com,example,"); - matchPrefix("au,com,example,", "au,com,example,"); - matchPrefix("jp,yokohama,public,assigned,www,", - "jp,yokohama,public,assigned,"); - matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); - } - - public void testDomainWithDash() { - matchPrefix("de,bad-site,www", "de,bad-site,"); - } - - public void testDomainWithNumbers() { - matchPrefix("de,archive4u,www", "de,archive4u,"); - } - - public void testIPV4() { - assertEquals("unexpected reduction", - "1.2.3.4", - PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); - } - - public void testIPV6() { - assertEquals("unexpected reduction", - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", - PublicSuffixes.reduceSurtToAssignmentLevel( - "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); - } - - public void testExceptions() { - matchPrefix("uk,bl,www,", "uk,bl,"); - matchPrefix("uk,bl,", "uk,bl,"); - matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); - matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); - } - - public void testFakeTLD() { - // we assume any new/unknonwn TLD should be assumed as 2-level; - // this is preferable for our grouping purpose but might not be - // for a cookie-assigning browser (original purpose of publicsuffixlist) - matchPrefix("zzz,example,www,", "zzz,example,"); - } - - public void testUnsegmentedHostname() { - m.reset("example"); - assertFalse("unexpected match found in 'example'", m.find()); - } - - public void testTopmostAssignedCaching() { - assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); - assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); - } - - // TODO: test UTF domains? - - protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { - m.reset(surtDomain); - assertTrue("expected match not found in '" + surtDomain, m.find()); - assertEquals("expected match not found", expectedAssignedPrefix, m - .group()); - } -} +/* + * This file is part of the Heritrix web crawler (crawler.archive.org). + * + * Licensed to the Internet Archive (IA) by one or more individual + * contributors. + * + * The IA licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.archive.net; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import junit.framework.TestCase; + +import org.archive.net.PublicSuffixes.Node; + +/** + * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches + * from constructed regex. + * + * @author gojomo + */ +public class PublicSuffixesTest extends TestCase { + // test of low level implementation + private final String NL = System.lineSeparator(); + + public void testCompare() { + Node n = new Node("hoge"); + assertTrue(n.compareTo('a') > 0); + assertEquals(-1, n.compareTo('*')); + assertEquals(-1, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(-1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("*,"); + assertEquals(1, n.compareTo('a')); + assertEquals(0, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node("*,"))); + assertEquals(1, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node("!hoge"); + assertEquals(1, n.compareTo('a')); + assertEquals(-1, n.compareTo('*')); + assertEquals(0, n.compareTo('!')); + assertEquals(-1, n.compareTo(new Node("*,"))); + assertEquals(0, n.compareTo(new Node("!muga,"))); + assertEquals(-1, n.compareTo(new Node(""))); + + n = new Node(""); + assertEquals(1, n.compareTo('a')); + assertEquals(1, n.compareTo('*')); + assertEquals(1, n.compareTo('!')); + assertEquals(0, n.compareTo(new Node(""))); + } + + protected String dump(Node alt) { + StringWriter w = new StringWriter(); + PublicSuffixes.dump(alt, 0, new PrintWriter(w)); + return w.toString(); + } + public void testTrie1() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + // specifically, should not have empty string as match. + assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); + alt.addBranch("ac,com,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"\"" + NL, dump(alt)); + alt.addBranch("ac,edu,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"com,\"" + NL + + " \"edu,\"" + NL + + " \"\"" + NL, dump(alt)); + } + public void testTrie2() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("*,"); + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"*,\"" + NL, dump(alt)); + } + + public void testTrie3() { + Node alt = new Node(null, new ArrayList()); + alt.addBranch("ac,"); + alt.addBranch("ac,!hoge,"); + alt.addBranch("ac,*,"); + // exception goes first. + assertEquals("(null)" + NL + + " \"ac,\"" + NL + + " \"!hoge,\"" + NL + + " \"*,\"" + NL + + " \"\"" + NL, dump(alt)); + } + + // test of higher-level functionality + + Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() + .matcher(""); + + public void testBasics() { + matchPrefix("com,example,www,", "com,example,"); + matchPrefix("com,example,", "com,example,"); + matchPrefix("org,archive,www,", "org,archive,"); + matchPrefix("org,archive,", "org,archive,"); + matchPrefix("fr,yahoo,www,", "fr,yahoo,"); + matchPrefix("fr,yahoo,", "fr,yahoo,"); + matchPrefix("au,com,foobar,www,", "au,com,foobar,"); + matchPrefix("au,com,foobar,", "au,com,foobar,"); + matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); + matchPrefix("uk,co,virgin,", "uk,co,virgin,"); + matchPrefix("au,com,example,www,", "au,com,example,"); + matchPrefix("au,com,example,", "au,com,example,"); + matchPrefix("jp,yokohama,public,assigned,www,", + "jp,yokohama,public,assigned,"); + matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); + } + + public void testDomainWithDash() { + matchPrefix("de,bad-site,www", "de,bad-site,"); + } + + public void testDomainWithNumbers() { + matchPrefix("de,archive4u,www", "de,archive4u,"); + } + + public void testIPV4() { + assertEquals("unexpected reduction", + "1.2.3.4", + PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); + } + + public void testIPV6() { + assertEquals("unexpected reduction", + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", + PublicSuffixes.reduceSurtToAssignmentLevel( + "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); + } + + public void testExceptions() { + matchPrefix("uk,bl,www,", "uk,bl,"); + matchPrefix("uk,bl,", "uk,bl,"); + matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); + matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); + } + + public void testFakeTLD() { + // we assume any new/unknonwn TLD should be assumed as 2-level; + // this is preferable for our grouping purpose but might not be + // for a cookie-assigning browser (original purpose of publicsuffixlist) + matchPrefix("zzz,example,www,", "zzz,example,"); + } + + public void testUnsegmentedHostname() { + m.reset("example"); + assertFalse("unexpected match found in 'example'", m.find()); + } + + public void testTopmostAssignedCaching() { + assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); + assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); + } + + // TODO: test UTF domains? + + protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { + m.reset(surtDomain); + assertTrue("expected match not found in '" + surtDomain, m.find()); + assertEquals("expected match not found", expectedAssignedPrefix, m + .group()); + } +} From faec599fc4a1cc8f09523e78cab073ed570b8adc Mon Sep 17 00:00:00 2001 From: RogerMathisen Date: Wed, 24 Sep 2014 11:03:55 +0200 Subject: [PATCH 5/5] - Removed pointless code. --- .../archive/util/iterator/SortedCompositeIteratorTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java index 0f4dc68a..11ea1229 100644 --- a/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java +++ b/src/test/java/org/archive/util/iterator/SortedCompositeIteratorTest.java @@ -14,11 +14,6 @@ public class SortedCompositeIteratorTest extends TestCase { public void testHasNext() throws FileNotFoundException, IOException { - long t = 210000; - long c = 134; - float f = (float)c / (float)t; - System.err.format("F(%f)\n",f); - File a = File.createTempFile("filea", null); File b = File.createTempFile("fileb", null);