diff --git a/CHANGES.md b/CHANGES.md
index 478238bf..19c26b2f 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,8 +1,73 @@
-Unreleased
-----------
-
-#### Dependency upgrades
+2.0.0
+-----
+### Removals
+
+#### Removed Apache HttpClient 3.1
+
+`HTTPSeekableLineReaderFactory` and `ZipNumBlockLoader` now default to HttpClient 4.3.
+
+| Removed | Replacement |
+|-----------------------------------------------------------|--------------------------------------|
+| `org.apache.commons.httpclient.URIException` | `org.archive.url.URIException` |
+| `org.apache.commons.httpclient.Header` | `org.archive.format.http.HttpHeader` |
+| `org.archive.httpclient.HttpRecorderGetMethod` | |
+| `org.archive.httpclient.HttpRecorderMethod` | |
+| `org.archive.httpclient.HttpRecorderPostMethod` | |
+| `org.archive.httpclient.SingleHttpConnectionManager` | |
+| `org.archive.httpclient.ThreadLocalHttpConnectionManager` | |
+
+#### Removed deprecated versions of renamed classes
+
+| Removed | Replacement |
+|-----------------------------------------------|--------------------------------------------------|
+| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` |
+| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` |
+| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` |
+| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` |
+| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` |
+| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` |
+| `org.archive.url.DefaultIACanonicalizerRules` | `org.archive.url.AggressiveIACanonicalizerRules` |
+| `org.archive.url.DefaultIAURLCanonicalizer` | `org.archive.url.AggressiveIAURLCanonicalizer` |
+| `org.archive.url.GoogleURLCanonicalizer` | `org.archive.url.BasicURLCanonicalizer` |
+
+#### Removed deprecated methods
+
+| Removed | Replacement |
+|-----------------------------------------------|-------------------------------------------|
+| `ANVLRecord(int)` | `ANVLRecord()` |
+| `DevUtils.betterPrintStack(RuntimeException)` | `Throwable.printStackStrace()` |
+| `Recorder.getReplayCharSequence()` | `Recorder.getContentReplayCharSequence()` |
+| `Reporter.shortReportLineTo(PrintWriter)` | `Reporter.reportTo(PrintWriter)` |
+
+##### Removed usages of constant interfaces
+
+Static imports should be used instead.
+
+* `ArchiveFileConstants` is no longer implemented by:
+ * `ArchiveReader`
+ * `ArchiveReaderFactory`
+ * `WARCWriter`
+ * `WriterPool`
+ * `WriterPoolMember`
+* `ARCConstants` is no longer implemented by:
+ * `ARCReader`
+ * `ARCReaderFactory`
+ * `ARCRecord`
+ * `ARCRecordMetaData`
+ * `ARCUtils`
+ * `ARCWriter`
+* `WARCConstants` is no longer implemented by:
+ * `WARCReader`
+ * `WARCReaderFactory`
+ * `WARCRecord`
+ * `WARCWriter`
+
+### Dependency upgrades
+
+- **commons-io**: 2.18.0 → 2.19.0
+- **guava**: 33.3.1-jre → 33.4.8-jre
+- **json**: 20240303 → 20250517
- **junit**: 4.13.2 → 5.12.2
1.3.0
diff --git a/pom.xml b/pom.xml
index 81bd9b32..22f83428 100644
--- a/pom.xml
+++ b/pom.xml
@@ -61,13 +61,13 @@
com.google.guava
guava
- 33.3.1-jre
+ 33.4.8-jre
org.json
json
- 20240303
+ 20250517
org.htmlparser
@@ -141,7 +141,7 @@
commons-io
commons-io
- 2.18.0
+ 2.19.0
@@ -162,7 +162,7 @@
org.apache.maven.plugins
maven-compiler-plugin
- 2.3.2
+ 3.14.0
8
8
@@ -265,6 +265,15 @@
+
+ jdk9-plus
+
+ [9,)
+
+
+ 8
+
+
diff --git a/src/main/java/org/archive/io/ArchiveFileConstants.java b/src/main/java/org/archive/io/ArchiveFileConstants.java
deleted file mode 100644
index b1a39194..00000000
--- a/src/main/java/org/archive/io/ArchiveFileConstants.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.archive.io;
-
-@Deprecated
-public interface ArchiveFileConstants extends org.archive.format.ArchiveFileConstants {
-}
diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java
index 0038cccf..449cdc24 100644
--- a/src/main/java/org/archive/io/ArchiveReader.java
+++ b/src/main/java/org/archive/io/ArchiveReader.java
@@ -42,13 +42,15 @@
import com.google.common.io.CountingInputStream;
+import static org.archive.format.ArchiveFileConstants.*;
+
/**
* Reader for an Archive file of Archive {@link ArchiveRecord}s.
* @author stack
* @version $Date$ $Version$
*/
-public abstract class ArchiveReader implements ArchiveFileConstants, Iterable, Closeable {
+public abstract class ArchiveReader implements Iterable, Closeable {
/**
* Is this Archive file compressed?
*/
@@ -601,8 +603,7 @@ public String getStrippedFileName() {
*/
public static String getStrippedFileName(String name,
final String dotFileExtension) {
- name = stripExtension(name,
- ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION);
+ name = stripExtension(name, DOT_COMPRESSED_FILE_EXTENSION);
return stripExtension(name, dotFileExtension);
}
@@ -699,7 +700,7 @@ public boolean outputRecord(final String format)
boolean result = true;
if (format.equals(CDX)) {
System.out.println(get().outputCdx(getStrippedFileName()));
- } else if(format.equals(ArchiveFileConstants.DUMP)) {
+ } else if(format.equals(DUMP)) {
// No point digesting if dumping content.
setDigest(false);
get().dump();
diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java
index 17f14d3a..bc316893 100644
--- a/src/main/java/org/archive/io/ArchiveReaderFactory.java
+++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java
@@ -33,6 +33,7 @@
import org.archive.url.UsableURI;
import org.archive.util.FileUtils;
+import static org.archive.format.ArchiveFileConstants.*;
/**
* Factory that returns an Archive file Reader.
@@ -40,7 +41,7 @@
* @author stack
* @version $Date$ $Revision$
*/
-public class ArchiveReaderFactory implements ArchiveFileConstants {
+public class ArchiveReaderFactory {
// Static block to enable S3 URLs
static {
if (System.getProperty("java.protocol.handler.pkgs") != null) {
diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java
index 63bfe628..4bd1fa02 100644
--- a/src/main/java/org/archive/io/ArchiveRecord.java
+++ b/src/main/java/org/archive/io/ArchiveRecord.java
@@ -25,6 +25,7 @@
import java.security.NoSuchAlgorithmException;
import java.util.logging.Level;
+import org.archive.format.ArchiveFileConstants;
import org.archive.util.Base32;
/**
diff --git a/src/main/java/org/archive/io/GZIPMembersInputStream.java b/src/main/java/org/archive/io/GZIPMembersInputStream.java
deleted file mode 100644
index 35fb9e90..00000000
--- a/src/main/java/org/archive/io/GZIPMembersInputStream.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.io;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * @deprecated use {@link org.archive.util.zip.GZIPMembersInputStream}
- */
-@Deprecated
-public class GZIPMembersInputStream extends org.archive.util.zip.GZIPMembersInputStream {
-
- public GZIPMembersInputStream(InputStream in) throws IOException {
- super(in);
- }
-
- public GZIPMembersInputStream(InputStream in, int size) throws IOException {
- super(in, size);
- }
-
-}
\ No newline at end of file
diff --git a/src/main/java/org/archive/io/GzipHeader.java b/src/main/java/org/archive/io/GzipHeader.java
deleted file mode 100644
index 6b8263bc..00000000
--- a/src/main/java/org/archive/io/GzipHeader.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.io;
-
-/**
- * @deprecated use {@link org.archive.util.zip.GzipHeader}
- */
-@Deprecated
-public class GzipHeader extends org.archive.util.zip.GzipHeader {
-}
diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java
index ac4b82f6..809a9e54 100644
--- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java
+++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java
@@ -27,7 +27,7 @@
import java.io.PrintStream;
import org.archive.format.http.HttpHeader;
-import org.archive.io.arc.ARCConstants;
+import org.archive.format.arc.ARCConstants;
import org.archive.util.LaxHttpParser;
/**
diff --git a/src/main/java/org/archive/io/NoGzipMagicException.java b/src/main/java/org/archive/io/NoGzipMagicException.java
deleted file mode 100644
index 27d1058a..00000000
--- a/src/main/java/org/archive/io/NoGzipMagicException.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.io;
-
-/**
- * @deprecated use {@link org.archive.util.zip.NoGzipMagicException}
- */
-@Deprecated
-public class NoGzipMagicException extends org.archive.util.zip.NoGzipMagicException {
-}
diff --git a/src/main/java/org/archive/io/WriterPool.java b/src/main/java/org/archive/io/WriterPool.java
index db184c5f..79da16c0 100644
--- a/src/main/java/org/archive/io/WriterPool.java
+++ b/src/main/java/org/archive/io/WriterPool.java
@@ -30,6 +30,7 @@
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.archive.format.ArchiveFileConstants;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
@@ -215,7 +216,7 @@ public synchronized void invalidateFile(WriterPoolMember f)
// gets attention.
File file = f.getFile();
file.renameTo(new File(file.getAbsoluteFile() +
- WriterPoolMember.INVALID_SUFFIX));
+ ArchiveFileConstants.INVALID_SUFFIX));
}
/**
diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java
index e10d443b..a488354a 100644
--- a/src/main/java/org/archive/io/WriterPoolMember.java
+++ b/src/main/java/org/archive/io/WriterPoolMember.java
@@ -38,6 +38,7 @@
import org.archive.util.FileUtils;
import org.archive.util.PropertyUtils;
+import static org.archive.format.ArchiveFileConstants.*;
/**
@@ -48,7 +49,7 @@
* @author stack
* @version $Date$ $Revision$
*/
-public abstract class WriterPoolMember implements ArchiveFileConstants {
+public abstract class WriterPoolMember {
private final Logger logger = Logger.getLogger(this.getClass().getName());
public static final String UTF8 = "UTF-8";
diff --git a/src/main/java/org/archive/io/arc/ARCConstants.java b/src/main/java/org/archive/io/arc/ARCConstants.java
deleted file mode 100644
index c44cfef7..00000000
--- a/src/main/java/org/archive/io/arc/ARCConstants.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.archive.io.arc;
-
-
-/**
- * Constants used by ARC files and in ARC file processing.
- *
- * @author stack
- * @deprecated
- */
-public interface ARCConstants extends org.archive.format.arc.ARCConstants {
-}
diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java
index 7f85cc2a..c9a88415 100644
--- a/src/main/java/org/archive/io/arc/ARCReader.java
+++ b/src/main/java/org/archive/io/arc/ARCReader.java
@@ -43,6 +43,7 @@
import org.archive.io.WriterPoolMember;
import org.archive.util.ArchiveUtils;
+import static org.archive.format.arc.ARCConstants.*;
/**
* Get an iterator on an ARC file or get a record by absolute position.
@@ -66,7 +67,7 @@
* @version $Date$ $Revision$
*/
public abstract class ARCReader extends ArchiveReader
-implements ARCConstants, Closeable {
+implements Closeable {
private final Logger logger = Logger.getLogger(ARCReader.class.getName());
/**
diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java
index 44437ed7..d2f10842 100644
--- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java
+++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java
@@ -40,6 +40,7 @@
import com.google.common.io.CountingInputStream;
+import static org.archive.format.arc.ARCConstants.*;
/**
* Factory that returns an ARCReader.
@@ -48,8 +49,7 @@
*
* @author stack
*/
-public class ARCReaderFactory extends ArchiveReaderFactory
-implements ARCConstants {
+public class ARCReaderFactory extends ArchiveReaderFactory {
/**
* This factory instance.
*/
diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java
index d3c036ba..dafc63b6 100644
--- a/src/main/java/org/archive/io/arc/ARCRecord.java
+++ b/src/main/java/org/archive/io/arc/ARCRecord.java
@@ -42,12 +42,14 @@
import org.archive.util.LaxHttpParser;
import org.archive.util.TextUtils;
+import static org.archive.format.arc.ARCConstants.*;
+
/**
* An ARC file record.
* Does not compass the ARCRecord metadata line, just the record content.
* @author stack
*/
-public class ARCRecord extends ArchiveRecord implements ARCConstants {
+public class ARCRecord extends ArchiveRecord {
/**
* Http status code.
*
@@ -590,7 +592,7 @@ private InputStream readHttpHeader() throws IOException {
}
statusLine = new String(statusBytes, 0,
- statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING);
+ statusBytes.length - eolCharCount, DEFAULT_ENCODING);
// If a null or DELETED break immediately
if ((statusLine == null) || statusLine.startsWith("DELETED")) {
@@ -681,8 +683,7 @@ private InputStream readHttpHeader() throws IOException {
// Read the status line. Don't let it into the parseHeaders function.
// It doesn't know what to do with it.
bais.read(statusBytes, 0, statusBytes.length);
- this.httpHeaders = LaxHttpParser.parseHeaders(bais,
- ARCConstants.DEFAULT_ENCODING);
+ this.httpHeaders = LaxHttpParser.parseHeaders(bais, DEFAULT_ENCODING);
this.getMetaData().setStatusCode(Integer.toString(getStatusCode()));
bais.reset();
return bais;
diff --git a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java
index 02b368e4..2a187477 100644
--- a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java
+++ b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java
@@ -27,13 +27,14 @@
import org.archive.io.ArchiveRecordHeader;
+import static org.archive.format.arc.ARCConstants.*;
/**
* An immutable class to hold an ARC record meta data.
*
* @author stack
*/
-public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants {
+public class ARCRecordMetaData implements ArchiveRecordHeader {
/**
* Map of record header fields.
*
diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java
index 985457e2..5bcb4cc3 100644
--- a/src/main/java/org/archive/io/arc/ARCUtils.java
+++ b/src/main/java/org/archive/io/arc/ARCUtils.java
@@ -32,7 +32,9 @@
import org.archive.util.zip.GzipHeader;
import org.archive.util.zip.NoGzipMagicException;
-public class ARCUtils implements ARCConstants {
+import static org.archive.format.arc.ARCConstants.*;
+
+public class ARCUtils {
/**
* @param pathOrUri Path or URI to extract arc filename from.
* @return Extracted arc file name.
diff --git a/src/main/java/org/archive/io/arc/ARCWriter.java b/src/main/java/org/archive/io/arc/ARCWriter.java
index c7042943..82d13e9f 100644
--- a/src/main/java/org/archive/io/arc/ARCWriter.java
+++ b/src/main/java/org/archive/io/arc/ARCWriter.java
@@ -42,6 +42,7 @@
import org.archive.util.DevUtils;
import org.archive.util.MimetypeUtils;
+import static org.archive.format.arc.ARCConstants.*;
/**
* Write ARC files.
@@ -110,7 +111,7 @@
*
* @author stack
*/
-public class ARCWriter extends WriterPoolMember implements ARCConstants, Closeable {
+public class ARCWriter extends WriterPoolMember implements Closeable {
private static final Logger logger =
Logger.getLogger(ARCWriter.class.getName());
diff --git a/src/main/java/org/archive/io/warc/WARCConstants.java b/src/main/java/org/archive/io/warc/WARCConstants.java
deleted file mode 100644
index 83cc8a6d..00000000
--- a/src/main/java/org/archive/io/warc/WARCConstants.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is part of the Heritrix web crawler (crawler.archive.org).
- *
- * Licensed to the Internet Archive (IA) by one or more individual
- * contributors.
- *
- * The IA licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.archive.io.warc;
-
-@Deprecated
-public interface WARCConstants extends org.archive.format.warc.WARCConstants {
-}
diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java
index a34854ef..f9b41af7 100644
--- a/src/main/java/org/archive/io/warc/WARCReader.java
+++ b/src/main/java/org/archive/io/warc/WARCReader.java
@@ -35,13 +35,15 @@
import org.archive.io.ArchiveReader;
import org.archive.io.ArchiveRecord;
+import static org.archive.format.warc.WARCConstants.*;
+
/**
* WARCReader.
* Go via {@link WARCReaderFactory} to get instance.
* @author stack
* @version $Date: 2006-11-27 18:03:03 -0800 (Mon, 27 Nov 2006) $ $Version$
*/
-public class WARCReader extends ArchiveReader implements WARCConstants {
+public class WARCReader extends ArchiveReader {
protected WARCReader() {
super();
}
diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
index c3e5baa0..881da869 100644
--- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java
+++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java
@@ -30,13 +30,14 @@
import org.archive.io.ArchiveReader;
import org.archive.io.ArchiveReaderFactory;
import org.archive.io.ArchiveRecord;
-import org.archive.io.warc.WARCConstants;
import org.archive.util.ArchiveUtils;
import org.archive.util.FileUtils;
import org.archive.util.zip.GZIPMembersInputStream;
import com.google.common.io.CountingInputStream;
+import static org.archive.format.warc.WARCConstants.*;
+
/**
* Factory for WARC Readers.
* Figures whether to give out a compressed file Reader or an uncompressed
@@ -44,8 +45,7 @@
* @author stack
* @version $Date: 2006-08-23 17:59:04 -0700 (Wed, 23 Aug 2006) $ $Version$
*/
-public class WARCReaderFactory extends ArchiveReaderFactory
-implements WARCConstants {
+public class WARCReaderFactory extends ArchiveReaderFactory {
private static final WARCReaderFactory factory = new WARCReaderFactory();
/**
diff --git a/src/main/java/org/archive/io/warc/WARCRecord.java b/src/main/java/org/archive/io/warc/WARCRecord.java
index cf106270..21f662ea 100644
--- a/src/main/java/org/archive/io/warc/WARCRecord.java
+++ b/src/main/java/org/archive/io/warc/WARCRecord.java
@@ -34,13 +34,17 @@
import org.archive.io.ArchiveRecordHeader;
import org.archive.util.LaxHttpParser;
+import static org.archive.format.ArchiveFileConstants.ABSOLUTE_OFFSET_KEY;
+import static org.archive.format.ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY;
+import static org.archive.format.warc.WARCConstants.*;
+
/**
* A WARC file Record.
*
* @author stack
*/
-public class WARCRecord extends ArchiveRecord implements WARCConstants {
+public class WARCRecord extends ArchiveRecord {
private Pattern WHITESPACE = Pattern.compile("\\s");
/**
diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java
index 982b8bc4..1e6135c8 100644
--- a/src/main/java/org/archive/io/warc/WARCWriter.java
+++ b/src/main/java/org/archive/io/warc/WARCWriter.java
@@ -37,12 +37,14 @@
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
-import org.archive.io.ArchiveFileConstants;
+import org.archive.format.ArchiveFileConstants;
import org.archive.io.UTF8Bytes;
import org.archive.io.WriterPoolMember;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.Element;
+import static org.archive.format.warc.WARCConstants.*;
+
/**
* WARC implementation.
@@ -56,8 +58,7 @@
* @author stack
* @version $Revision: 4604 $ $Date: 2006-09-05 22:38:18 -0700 (Tue, 05 Sep 2006) $
*/
-public class WARCWriter extends WriterPoolMember
-implements WARCConstants {
+public class WARCWriter extends WriterPoolMember {
public static final String TOTALS = "totals";
public static final String SIZE_ON_DISK = "sizeOnDisk";
public static final String TOTAL_BYTES = "totalBytes";
@@ -343,9 +344,9 @@ public URI writeWarcinfoRecord(String filename, final String description)
recordInfo.setMimetype("application/warc-fields");
// Strip .open suffix if present.
- if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) {
+ if (filename.endsWith(ArchiveFileConstants.OCCUPIED_SUFFIX)) {
filename = filename.substring(0,
- filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length());
+ filename.length() - ArchiveFileConstants.OCCUPIED_SUFFIX.length());
}
recordInfo.addExtraHeader(HEADER_KEY_FILENAME, filename);
if (description != null && description.length() > 0) {
diff --git a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java b/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java
deleted file mode 100644
index 3d4d8581..00000000
--- a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package org.archive.url;
-
-/**
- * @deprecated use AggressiveIACanonicalizerRules
- */
-public class DefaultIACanonicalizerRules extends AggressiveIACanonicalizerRules {
-}
diff --git a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java b/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java
deleted file mode 100644
index 3d1f985d..00000000
--- a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package org.archive.url;
-
-/**
- * @deprecated use AggressiveIAURLCanonicalizer
- */
-public class DefaultIAURLCanonicalizer extends AggressiveIAURLCanonicalizer {
-}
diff --git a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java b/src/main/java/org/archive/url/GoogleURLCanonicalizer.java
deleted file mode 100644
index 388db8aa..00000000
--- a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java
+++ /dev/null
@@ -1,7 +0,0 @@
-package org.archive.url;
-
-/**
- * @deprecated use {@link BasicURLCanonicalizer}
- */
-public class GoogleURLCanonicalizer extends BasicURLCanonicalizer {
-}
diff --git a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java
index cd579eb0..830b7b92 100644
--- a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java
+++ b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java
@@ -1,10 +1,10 @@
package org.archive.url;
public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer {
- private static final GoogleURLCanonicalizer google =
- new GoogleURLCanonicalizer();
+ private static final BasicURLCanonicalizer basic =
+ new BasicURLCanonicalizer();
private static CanonicalizeRules nonMassagingRules =
- new DefaultIACanonicalizerRules();
+ new AggressiveIACanonicalizerRules();
static {
nonMassagingRules.setRule(CanonicalizeRules.HOST_SETTINGS,
CanonicalizeRules.HOST_LOWERCASE);
@@ -14,7 +14,7 @@ public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer {
public void canonicalize(HandyURL url) {
// just google's stuff, followed by the IA default stuff:
- google.canonicalize(url);
+ basic.canonicalize(url);
ia.canonicalize(url);
}
}
diff --git a/src/main/java/org/archive/url/WaybackURLKeyMaker.java b/src/main/java/org/archive/url/WaybackURLKeyMaker.java
index 99fb92e9..56f51b49 100644
--- a/src/main/java/org/archive/url/WaybackURLKeyMaker.java
+++ b/src/main/java/org/archive/url/WaybackURLKeyMaker.java
@@ -5,7 +5,7 @@
public class WaybackURLKeyMaker implements URLKeyMaker {
// URLCanonicalizer canonicalizer = new NonMassagingIAURLCanonicalizer();
- URLCanonicalizer canonicalizer = new DefaultIAURLCanonicalizer();
+ URLCanonicalizer canonicalizer = new AggressiveIAURLCanonicalizer();
public URLCanonicalizer getCanonicalizer() {
return canonicalizer;
diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java
index d630a0b1..f2a1d044 100644
--- a/src/main/java/org/archive/util/DevUtils.java
+++ b/src/main/java/org/archive/util/DevUtils.java
@@ -78,15 +78,6 @@ public static String extraInfo() {
return sw.toString();
}
- /**
- * Nothing to see here, move along.
- * @deprecated This method was never used.
- */
- @Deprecated
- public static void betterPrintStack(RuntimeException re) {
- re.printStackTrace(System.err);
- }
-
/**
* Send this JVM process a SIGQUIT; giving a thread dump and possibly
* a heap histogram (if using -XX:+PrintClassHistogram).
diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java
index 61cbf871..e67cfb48 100644
--- a/src/main/java/org/archive/util/Recorder.java
+++ b/src/main/java/org/archive/util/Recorder.java
@@ -351,16 +351,6 @@ public void setContentEncoding(String contentEncoding) {
public String getContentEncoding() {
return this.contentEncoding;
}
-
-
- /**
- * @return
- * @throws IOException
- * @deprecated use getContentReplayCharSequence
- */
- public ReplayCharSequence getReplayCharSequence() throws IOException {
- return getContentReplayCharSequence();
- }
/**
* @return A ReplayCharSequence. Caller may call
diff --git a/src/main/java/org/archive/util/Reporter.java b/src/main/java/org/archive/util/Reporter.java
index 3f4ea5e5..dd21b53d 100644
--- a/src/main/java/org/archive/util/Reporter.java
+++ b/src/main/java/org/archive/util/Reporter.java
@@ -32,15 +32,6 @@ public interface Reporter {
*/
public void reportTo(PrintWriter writer) throws IOException;
- /**
- * Write a short single-line summary report
- *
- * @param pw writer to receive report
- */
- @Deprecated
- public void shortReportLineTo(PrintWriter pw) throws IOException;
-
-
/**
* @return Same data that's in the single line report, as key-value pairs
*/
diff --git a/src/main/java/org/archive/util/anvl/ANVLRecord.java b/src/main/java/org/archive/util/anvl/ANVLRecord.java
index 06603914..e548f432 100644
--- a/src/main/java/org/archive/util/anvl/ANVLRecord.java
+++ b/src/main/java/org/archive/util/anvl/ANVLRecord.java
@@ -72,11 +72,6 @@ public ANVLRecord(Collection extends Element> c) {
super(c);
}
- /** @deprecated */
- public ANVLRecord(int initialCapacity) {
- super();
- }
-
public boolean addLabel(final String l) {
return super.add(new Element(new Label(l)));
}
diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
index 07548b4c..954da636 100644
--- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
+++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java
@@ -31,7 +31,7 @@
import org.junit.jupiter.api.io.TempDir;
import static org.junit.jupiter.api.Assertions.assertEquals;
-
+import static org.archive.format.arc.ARCConstants.*;
/**
* Test ARCWriterPool
@@ -119,7 +119,7 @@ private WriterPoolSettings getSettings(final boolean isCompressed) {
return new WriterPoolSettingsData(
"TEST",
"${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}",
- ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE,
+ DEFAULT_MAX_ARC_FILE_SIZE,
isCompressed,
Arrays.asList(files),
null);
diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java
index 84539391..ca300697 100644
--- a/src/test/java/org/archive/io/arc/ARCWriterTest.java
+++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java
@@ -49,6 +49,7 @@
import static org.junit.jupiter.api.Assertions.*;
+import static org.archive.format.arc.ARCConstants.*;
/**
* Test ARCWriter class.
@@ -58,7 +59,7 @@
*
* @author stack
*/
-public class ARCWriterTest implements ARCConstants {
+public class ARCWriterTest {
/**
* Utility class for writing bad ARCs (with trailing junk)
*/
diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java
index 1039119e..c0ace5f0 100644
--- a/src/test/java/org/archive/io/warc/WARCWriterTest.java
+++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java
@@ -44,12 +44,14 @@
import static org.junit.jupiter.api.Assertions.*;
+import static org.archive.format.warc.WARCConstants.*;
+
/**
* Test Writer and Reader.
* @author stack
* @version $Date: 2006-08-29 19:35:48 -0700 (Tue, 29 Aug 2006) $ $Version$
*/
-public class WARCWriterTest implements WARCConstants {
+public class WARCWriterTest {
private static final AtomicInteger SERIAL_NO = new AtomicInteger();
@@ -153,7 +155,7 @@ private void writeWarcinfoRecord(WARCWriter writer)
recordInfo.setContentStream(new ByteArrayInputStream(bytes));
recordInfo.setContentLength((long) bytes.length);
- final URI recordid = writer.generateRecordId(WARCWriter.TYPE, WARCRecordType.warcinfo.toString());
+ final URI recordid = writer.generateRecordId(TYPE, WARCRecordType.warcinfo.toString());
recordInfo.setRecordId(recordid);
writer.writeRecord(recordInfo);
diff --git a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java
index 974bdd22..aecddb3b 100644
--- a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java
+++ b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java
@@ -10,7 +10,7 @@ public class IAURLCanonicalizerTest {
@Test
public void testFull() throws URISyntaxException {
- IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules());
+ IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules());
compCan(iaC,"http://www.archive.org:80/","http://archive.org/");
compCan(iaC,"https://www.archive.org:80/","https://archive.org:80/");
compCan(iaC,"http://www.archive.org:443/","http://archive.org:443/");
@@ -63,7 +63,7 @@ public void testGetDefaultPort() {
@Test
public void testStripSessionId() throws URISyntaxException {
- IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules());
+ IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules());
compCan(iaC,
"http://www.nsf.gov/statistics/sed/2009/SED_2009.zip?CFID=14387305&CFTOKEN=72942008&jsessionid=f030eacc7e49c4ca0b077922347418418766",
"http://nsf.gov/statistics/sed/2009/sed_2009.zip?jsessionid=f030eacc7e49c4ca0b077922347418418766");