diff --git a/CHANGES.md b/CHANGES.md index 478238bf..19c26b2f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,73 @@ -Unreleased ----------- - -#### Dependency upgrades +2.0.0 +----- +### Removals + +#### Removed Apache HttpClient 3.1 + +`HTTPSeekableLineReaderFactory` and `ZipNumBlockLoader` now default to HttpClient 4.3. + +| Removed | Replacement | +|-----------------------------------------------------------|--------------------------------------| +| `org.apache.commons.httpclient.URIException` | `org.archive.url.URIException` | +| `org.apache.commons.httpclient.Header` | `org.archive.format.http.HttpHeader` | +| `org.archive.httpclient.HttpRecorderGetMethod` | | +| `org.archive.httpclient.HttpRecorderMethod` | | +| `org.archive.httpclient.HttpRecorderPostMethod` | | +| `org.archive.httpclient.SingleHttpConnectionManager` | | +| `org.archive.httpclient.ThreadLocalHttpConnectionManager` | | + +#### Removed deprecated versions of renamed classes + +| Removed | Replacement | +|-----------------------------------------------|--------------------------------------------------| +| `org.archive.io.ArchiveFileConstants` | `org.archive.format.ArchiveFileConstants` | +| `org.archive.io.GzipHeader` | `org.archive.util.zip.GzipHeader` | +| `org.archive.io.GZIPMembersInputStream` | `org.archive.util.zip.GZIPMembersInputStream` | +| `org.archive.io.NoGzipMagicException` | `org.archive.util.zip.NoGzipMagicException` | +| `org.archive.io.arc.ARCConstants` | `org.archive.format.arc.ARCConstants` | +| `org.archive.io.warc.WARCConstants` | `org.archive.format.warc.WARCConstants` | +| `org.archive.url.DefaultIACanonicalizerRules` | `org.archive.url.AggressiveIACanonicalizerRules` | +| `org.archive.url.DefaultIAURLCanonicalizer` | `org.archive.url.AggressiveIAURLCanonicalizer` | +| `org.archive.url.GoogleURLCanonicalizer` | `org.archive.url.BasicURLCanonicalizer` | + +#### Removed deprecated methods + +| Removed | Replacement | +|-----------------------------------------------|-------------------------------------------| +| `ANVLRecord(int)` | `ANVLRecord()` | +| `DevUtils.betterPrintStack(RuntimeException)` | `Throwable.printStackStrace()` | +| `Recorder.getReplayCharSequence()` | `Recorder.getContentReplayCharSequence()` | +| `Reporter.shortReportLineTo(PrintWriter)` | `Reporter.reportTo(PrintWriter)` | + +##### Removed usages of constant interfaces + +Static imports should be used instead. + +* `ArchiveFileConstants` is no longer implemented by: + * `ArchiveReader` + * `ArchiveReaderFactory` + * `WARCWriter` + * `WriterPool` + * `WriterPoolMember` +* `ARCConstants` is no longer implemented by: + * `ARCReader` + * `ARCReaderFactory` + * `ARCRecord` + * `ARCRecordMetaData` + * `ARCUtils` + * `ARCWriter` +* `WARCConstants` is no longer implemented by: + * `WARCReader` + * `WARCReaderFactory` + * `WARCRecord` + * `WARCWriter` + +### Dependency upgrades + +- **commons-io**: 2.18.0 → 2.19.0 +- **guava**: 33.3.1-jre → 33.4.8-jre +- **json**: 20240303 → 20250517 - **junit**: 4.13.2 → 5.12.2 1.3.0 diff --git a/pom.xml b/pom.xml index 81bd9b32..22f83428 100644 --- a/pom.xml +++ b/pom.xml @@ -61,13 +61,13 @@ com.google.guava guava - 33.3.1-jre + 33.4.8-jre org.json json - 20240303 + 20250517 org.htmlparser @@ -141,7 +141,7 @@ commons-io commons-io - 2.18.0 + 2.19.0 @@ -162,7 +162,7 @@ org.apache.maven.plugins maven-compiler-plugin - 2.3.2 + 3.14.0 8 8 @@ -265,6 +265,15 @@ + + jdk9-plus + + [9,) + + + 8 + + diff --git a/src/main/java/org/archive/io/ArchiveFileConstants.java b/src/main/java/org/archive/io/ArchiveFileConstants.java deleted file mode 100644 index b1a39194..00000000 --- a/src/main/java/org/archive/io/ArchiveFileConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io; - -@Deprecated -public interface ArchiveFileConstants extends org.archive.format.ArchiveFileConstants { -} diff --git a/src/main/java/org/archive/io/ArchiveReader.java b/src/main/java/org/archive/io/ArchiveReader.java index 0038cccf..449cdc24 100644 --- a/src/main/java/org/archive/io/ArchiveReader.java +++ b/src/main/java/org/archive/io/ArchiveReader.java @@ -42,13 +42,15 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.ArchiveFileConstants.*; + /** * Reader for an Archive file of Archive {@link ArchiveRecord}s. * @author stack * @version $Date$ $Version$ */ -public abstract class ArchiveReader implements ArchiveFileConstants, Iterable, Closeable { +public abstract class ArchiveReader implements Iterable, Closeable { /** * Is this Archive file compressed? */ @@ -601,8 +603,7 @@ public String getStrippedFileName() { */ public static String getStrippedFileName(String name, final String dotFileExtension) { - name = stripExtension(name, - ArchiveFileConstants.DOT_COMPRESSED_FILE_EXTENSION); + name = stripExtension(name, DOT_COMPRESSED_FILE_EXTENSION); return stripExtension(name, dotFileExtension); } @@ -699,7 +700,7 @@ public boolean outputRecord(final String format) boolean result = true; if (format.equals(CDX)) { System.out.println(get().outputCdx(getStrippedFileName())); - } else if(format.equals(ArchiveFileConstants.DUMP)) { + } else if(format.equals(DUMP)) { // No point digesting if dumping content. setDigest(false); get().dump(); diff --git a/src/main/java/org/archive/io/ArchiveReaderFactory.java b/src/main/java/org/archive/io/ArchiveReaderFactory.java index 17f14d3a..bc316893 100644 --- a/src/main/java/org/archive/io/ArchiveReaderFactory.java +++ b/src/main/java/org/archive/io/ArchiveReaderFactory.java @@ -33,6 +33,7 @@ import org.archive.url.UsableURI; import org.archive.util.FileUtils; +import static org.archive.format.ArchiveFileConstants.*; /** * Factory that returns an Archive file Reader. @@ -40,7 +41,7 @@ * @author stack * @version $Date$ $Revision$ */ -public class ArchiveReaderFactory implements ArchiveFileConstants { +public class ArchiveReaderFactory { // Static block to enable S3 URLs static { if (System.getProperty("java.protocol.handler.pkgs") != null) { diff --git a/src/main/java/org/archive/io/ArchiveRecord.java b/src/main/java/org/archive/io/ArchiveRecord.java index 63bfe628..4bd1fa02 100644 --- a/src/main/java/org/archive/io/ArchiveRecord.java +++ b/src/main/java/org/archive/io/ArchiveRecord.java @@ -25,6 +25,7 @@ import java.security.NoSuchAlgorithmException; import java.util.logging.Level; +import org.archive.format.ArchiveFileConstants; import org.archive.util.Base32; /** diff --git a/src/main/java/org/archive/io/GZIPMembersInputStream.java b/src/main/java/org/archive/io/GZIPMembersInputStream.java deleted file mode 100644 index 35fb9e90..00000000 --- a/src/main/java/org/archive/io/GZIPMembersInputStream.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -import java.io.IOException; -import java.io.InputStream; - -/** - * @deprecated use {@link org.archive.util.zip.GZIPMembersInputStream} - */ -@Deprecated -public class GZIPMembersInputStream extends org.archive.util.zip.GZIPMembersInputStream { - - public GZIPMembersInputStream(InputStream in) throws IOException { - super(in); - } - - public GZIPMembersInputStream(InputStream in, int size) throws IOException { - super(in, size); - } - -} \ No newline at end of file diff --git a/src/main/java/org/archive/io/GzipHeader.java b/src/main/java/org/archive/io/GzipHeader.java deleted file mode 100644 index 6b8263bc..00000000 --- a/src/main/java/org/archive/io/GzipHeader.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.GzipHeader} - */ -@Deprecated -public class GzipHeader extends org.archive.util.zip.GzipHeader { -} diff --git a/src/main/java/org/archive/io/HeaderedArchiveRecord.java b/src/main/java/org/archive/io/HeaderedArchiveRecord.java index ac4b82f6..809a9e54 100644 --- a/src/main/java/org/archive/io/HeaderedArchiveRecord.java +++ b/src/main/java/org/archive/io/HeaderedArchiveRecord.java @@ -27,7 +27,7 @@ import java.io.PrintStream; import org.archive.format.http.HttpHeader; -import org.archive.io.arc.ARCConstants; +import org.archive.format.arc.ARCConstants; import org.archive.util.LaxHttpParser; /** diff --git a/src/main/java/org/archive/io/NoGzipMagicException.java b/src/main/java/org/archive/io/NoGzipMagicException.java deleted file mode 100644 index 27d1058a..00000000 --- a/src/main/java/org/archive/io/NoGzipMagicException.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io; - -/** - * @deprecated use {@link org.archive.util.zip.NoGzipMagicException} - */ -@Deprecated -public class NoGzipMagicException extends org.archive.util.zip.NoGzipMagicException { -} diff --git a/src/main/java/org/archive/io/WriterPool.java b/src/main/java/org/archive/io/WriterPool.java index db184c5f..79da16c0 100644 --- a/src/main/java/org/archive/io/WriterPool.java +++ b/src/main/java/org/archive/io/WriterPool.java @@ -30,6 +30,7 @@ import java.util.logging.Level; import java.util.logging.Logger; +import org.archive.format.ArchiveFileConstants; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -215,7 +216,7 @@ public synchronized void invalidateFile(WriterPoolMember f) // gets attention. File file = f.getFile(); file.renameTo(new File(file.getAbsoluteFile() + - WriterPoolMember.INVALID_SUFFIX)); + ArchiveFileConstants.INVALID_SUFFIX)); } /** diff --git a/src/main/java/org/archive/io/WriterPoolMember.java b/src/main/java/org/archive/io/WriterPoolMember.java index e10d443b..a488354a 100644 --- a/src/main/java/org/archive/io/WriterPoolMember.java +++ b/src/main/java/org/archive/io/WriterPoolMember.java @@ -38,6 +38,7 @@ import org.archive.util.FileUtils; import org.archive.util.PropertyUtils; +import static org.archive.format.ArchiveFileConstants.*; /** @@ -48,7 +49,7 @@ * @author stack * @version $Date$ $Revision$ */ -public abstract class WriterPoolMember implements ArchiveFileConstants { +public abstract class WriterPoolMember { private final Logger logger = Logger.getLogger(this.getClass().getName()); public static final String UTF8 = "UTF-8"; diff --git a/src/main/java/org/archive/io/arc/ARCConstants.java b/src/main/java/org/archive/io/arc/ARCConstants.java deleted file mode 100644 index c44cfef7..00000000 --- a/src/main/java/org/archive/io/arc/ARCConstants.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.archive.io.arc; - - -/** - * Constants used by ARC files and in ARC file processing. - * - * @author stack - * @deprecated - */ -public interface ARCConstants extends org.archive.format.arc.ARCConstants { -} diff --git a/src/main/java/org/archive/io/arc/ARCReader.java b/src/main/java/org/archive/io/arc/ARCReader.java index 7f85cc2a..c9a88415 100644 --- a/src/main/java/org/archive/io/arc/ARCReader.java +++ b/src/main/java/org/archive/io/arc/ARCReader.java @@ -43,6 +43,7 @@ import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Get an iterator on an ARC file or get a record by absolute position. @@ -66,7 +67,7 @@ * @version $Date$ $Revision$ */ public abstract class ARCReader extends ArchiveReader -implements ARCConstants, Closeable { +implements Closeable { private final Logger logger = Logger.getLogger(ARCReader.class.getName()); /** diff --git a/src/main/java/org/archive/io/arc/ARCReaderFactory.java b/src/main/java/org/archive/io/arc/ARCReaderFactory.java index 44437ed7..d2f10842 100644 --- a/src/main/java/org/archive/io/arc/ARCReaderFactory.java +++ b/src/main/java/org/archive/io/arc/ARCReaderFactory.java @@ -40,6 +40,7 @@ import com.google.common.io.CountingInputStream; +import static org.archive.format.arc.ARCConstants.*; /** * Factory that returns an ARCReader. @@ -48,8 +49,7 @@ * * @author stack */ -public class ARCReaderFactory extends ArchiveReaderFactory -implements ARCConstants { +public class ARCReaderFactory extends ArchiveReaderFactory { /** * This factory instance. */ diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index d3c036ba..dafc63b6 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -42,12 +42,14 @@ import org.archive.util.LaxHttpParser; import org.archive.util.TextUtils; +import static org.archive.format.arc.ARCConstants.*; + /** * An ARC file record. * Does not compass the ARCRecord metadata line, just the record content. * @author stack */ -public class ARCRecord extends ArchiveRecord implements ARCConstants { +public class ARCRecord extends ArchiveRecord { /** * Http status code. * @@ -590,7 +592,7 @@ private InputStream readHttpHeader() throws IOException { } statusLine = new String(statusBytes, 0, - statusBytes.length - eolCharCount, ARCConstants.DEFAULT_ENCODING); + statusBytes.length - eolCharCount, DEFAULT_ENCODING); // If a null or DELETED break immediately if ((statusLine == null) || statusLine.startsWith("DELETED")) { @@ -681,8 +683,7 @@ private InputStream readHttpHeader() throws IOException { // Read the status line. Don't let it into the parseHeaders function. // It doesn't know what to do with it. bais.read(statusBytes, 0, statusBytes.length); - this.httpHeaders = LaxHttpParser.parseHeaders(bais, - ARCConstants.DEFAULT_ENCODING); + this.httpHeaders = LaxHttpParser.parseHeaders(bais, DEFAULT_ENCODING); this.getMetaData().setStatusCode(Integer.toString(getStatusCode())); bais.reset(); return bais; diff --git a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java index 02b368e4..2a187477 100644 --- a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java +++ b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java @@ -27,13 +27,14 @@ import org.archive.io.ArchiveRecordHeader; +import static org.archive.format.arc.ARCConstants.*; /** * An immutable class to hold an ARC record meta data. * * @author stack */ -public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants { +public class ARCRecordMetaData implements ArchiveRecordHeader { /** * Map of record header fields. * diff --git a/src/main/java/org/archive/io/arc/ARCUtils.java b/src/main/java/org/archive/io/arc/ARCUtils.java index 985457e2..5bcb4cc3 100644 --- a/src/main/java/org/archive/io/arc/ARCUtils.java +++ b/src/main/java/org/archive/io/arc/ARCUtils.java @@ -32,7 +32,9 @@ import org.archive.util.zip.GzipHeader; import org.archive.util.zip.NoGzipMagicException; -public class ARCUtils implements ARCConstants { +import static org.archive.format.arc.ARCConstants.*; + +public class ARCUtils { /** * @param pathOrUri Path or URI to extract arc filename from. * @return Extracted arc file name. diff --git a/src/main/java/org/archive/io/arc/ARCWriter.java b/src/main/java/org/archive/io/arc/ARCWriter.java index c7042943..82d13e9f 100644 --- a/src/main/java/org/archive/io/arc/ARCWriter.java +++ b/src/main/java/org/archive/io/arc/ARCWriter.java @@ -42,6 +42,7 @@ import org.archive.util.DevUtils; import org.archive.util.MimetypeUtils; +import static org.archive.format.arc.ARCConstants.*; /** * Write ARC files. @@ -110,7 +111,7 @@ * * @author stack */ -public class ARCWriter extends WriterPoolMember implements ARCConstants, Closeable { +public class ARCWriter extends WriterPoolMember implements Closeable { private static final Logger logger = Logger.getLogger(ARCWriter.class.getName()); diff --git a/src/main/java/org/archive/io/warc/WARCConstants.java b/src/main/java/org/archive/io/warc/WARCConstants.java deleted file mode 100644 index 83cc8a6d..00000000 --- a/src/main/java/org/archive/io/warc/WARCConstants.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of the Heritrix web crawler (crawler.archive.org). - * - * Licensed to the Internet Archive (IA) by one or more individual - * contributors. - * - * The IA licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.archive.io.warc; - -@Deprecated -public interface WARCConstants extends org.archive.format.warc.WARCConstants { -} diff --git a/src/main/java/org/archive/io/warc/WARCReader.java b/src/main/java/org/archive/io/warc/WARCReader.java index a34854ef..f9b41af7 100644 --- a/src/main/java/org/archive/io/warc/WARCReader.java +++ b/src/main/java/org/archive/io/warc/WARCReader.java @@ -35,13 +35,15 @@ import org.archive.io.ArchiveReader; import org.archive.io.ArchiveRecord; +import static org.archive.format.warc.WARCConstants.*; + /** * WARCReader. * Go via {@link WARCReaderFactory} to get instance. * @author stack * @version $Date: 2006-11-27 18:03:03 -0800 (Mon, 27 Nov 2006) $ $Version$ */ -public class WARCReader extends ArchiveReader implements WARCConstants { +public class WARCReader extends ArchiveReader { protected WARCReader() { super(); } diff --git a/src/main/java/org/archive/io/warc/WARCReaderFactory.java b/src/main/java/org/archive/io/warc/WARCReaderFactory.java index c3e5baa0..881da869 100644 --- a/src/main/java/org/archive/io/warc/WARCReaderFactory.java +++ b/src/main/java/org/archive/io/warc/WARCReaderFactory.java @@ -30,13 +30,14 @@ import org.archive.io.ArchiveReader; import org.archive.io.ArchiveReaderFactory; import org.archive.io.ArchiveRecord; -import org.archive.io.warc.WARCConstants; import org.archive.util.ArchiveUtils; import org.archive.util.FileUtils; import org.archive.util.zip.GZIPMembersInputStream; import com.google.common.io.CountingInputStream; +import static org.archive.format.warc.WARCConstants.*; + /** * Factory for WARC Readers. * Figures whether to give out a compressed file Reader or an uncompressed @@ -44,8 +45,7 @@ * @author stack * @version $Date: 2006-08-23 17:59:04 -0700 (Wed, 23 Aug 2006) $ $Version$ */ -public class WARCReaderFactory extends ArchiveReaderFactory -implements WARCConstants { +public class WARCReaderFactory extends ArchiveReaderFactory { private static final WARCReaderFactory factory = new WARCReaderFactory(); /** diff --git a/src/main/java/org/archive/io/warc/WARCRecord.java b/src/main/java/org/archive/io/warc/WARCRecord.java index cf106270..21f662ea 100644 --- a/src/main/java/org/archive/io/warc/WARCRecord.java +++ b/src/main/java/org/archive/io/warc/WARCRecord.java @@ -34,13 +34,17 @@ import org.archive.io.ArchiveRecordHeader; import org.archive.util.LaxHttpParser; +import static org.archive.format.ArchiveFileConstants.ABSOLUTE_OFFSET_KEY; +import static org.archive.format.ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY; +import static org.archive.format.warc.WARCConstants.*; + /** * A WARC file Record. * * @author stack */ -public class WARCRecord extends ArchiveRecord implements WARCConstants { +public class WARCRecord extends ArchiveRecord { private Pattern WHITESPACE = Pattern.compile("\\s"); /** diff --git a/src/main/java/org/archive/io/warc/WARCWriter.java b/src/main/java/org/archive/io/warc/WARCWriter.java index 982b8bc4..1e6135c8 100644 --- a/src/main/java/org/archive/io/warc/WARCWriter.java +++ b/src/main/java/org/archive/io/warc/WARCWriter.java @@ -37,12 +37,14 @@ import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; -import org.archive.io.ArchiveFileConstants; +import org.archive.format.ArchiveFileConstants; import org.archive.io.UTF8Bytes; import org.archive.io.WriterPoolMember; import org.archive.util.ArchiveUtils; import org.archive.util.anvl.Element; +import static org.archive.format.warc.WARCConstants.*; + /** * WARC implementation. @@ -56,8 +58,7 @@ * @author stack * @version $Revision: 4604 $ $Date: 2006-09-05 22:38:18 -0700 (Tue, 05 Sep 2006) $ */ -public class WARCWriter extends WriterPoolMember -implements WARCConstants { +public class WARCWriter extends WriterPoolMember { public static final String TOTALS = "totals"; public static final String SIZE_ON_DISK = "sizeOnDisk"; public static final String TOTAL_BYTES = "totalBytes"; @@ -343,9 +344,9 @@ public URI writeWarcinfoRecord(String filename, final String description) recordInfo.setMimetype("application/warc-fields"); // Strip .open suffix if present. - if (filename.endsWith(WriterPoolMember.OCCUPIED_SUFFIX)) { + if (filename.endsWith(ArchiveFileConstants.OCCUPIED_SUFFIX)) { filename = filename.substring(0, - filename.length() - WriterPoolMember.OCCUPIED_SUFFIX.length()); + filename.length() - ArchiveFileConstants.OCCUPIED_SUFFIX.length()); } recordInfo.addExtraHeader(HEADER_KEY_FILENAME, filename); if (description != null && description.length() > 0) { diff --git a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java b/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java deleted file mode 100644 index 3d4d8581..00000000 --- a/src/main/java/org/archive/url/DefaultIACanonicalizerRules.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIACanonicalizerRules - */ -public class DefaultIACanonicalizerRules extends AggressiveIACanonicalizerRules { -} diff --git a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java b/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java deleted file mode 100644 index 3d1f985d..00000000 --- a/src/main/java/org/archive/url/DefaultIAURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use AggressiveIAURLCanonicalizer - */ -public class DefaultIAURLCanonicalizer extends AggressiveIAURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java b/src/main/java/org/archive/url/GoogleURLCanonicalizer.java deleted file mode 100644 index 388db8aa..00000000 --- a/src/main/java/org/archive/url/GoogleURLCanonicalizer.java +++ /dev/null @@ -1,7 +0,0 @@ -package org.archive.url; - -/** - * @deprecated use {@link BasicURLCanonicalizer} - */ -public class GoogleURLCanonicalizer extends BasicURLCanonicalizer { -} diff --git a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java index cd579eb0..830b7b92 100644 --- a/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java +++ b/src/main/java/org/archive/url/NonMassagingIAURLCanonicalizer.java @@ -1,10 +1,10 @@ package org.archive.url; public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { - private static final GoogleURLCanonicalizer google = - new GoogleURLCanonicalizer(); + private static final BasicURLCanonicalizer basic = + new BasicURLCanonicalizer(); private static CanonicalizeRules nonMassagingRules = - new DefaultIACanonicalizerRules(); + new AggressiveIACanonicalizerRules(); static { nonMassagingRules.setRule(CanonicalizeRules.HOST_SETTINGS, CanonicalizeRules.HOST_LOWERCASE); @@ -14,7 +14,7 @@ public class NonMassagingIAURLCanonicalizer implements URLCanonicalizer { public void canonicalize(HandyURL url) { // just google's stuff, followed by the IA default stuff: - google.canonicalize(url); + basic.canonicalize(url); ia.canonicalize(url); } } diff --git a/src/main/java/org/archive/url/WaybackURLKeyMaker.java b/src/main/java/org/archive/url/WaybackURLKeyMaker.java index 99fb92e9..56f51b49 100644 --- a/src/main/java/org/archive/url/WaybackURLKeyMaker.java +++ b/src/main/java/org/archive/url/WaybackURLKeyMaker.java @@ -5,7 +5,7 @@ public class WaybackURLKeyMaker implements URLKeyMaker { // URLCanonicalizer canonicalizer = new NonMassagingIAURLCanonicalizer(); - URLCanonicalizer canonicalizer = new DefaultIAURLCanonicalizer(); + URLCanonicalizer canonicalizer = new AggressiveIAURLCanonicalizer(); public URLCanonicalizer getCanonicalizer() { return canonicalizer; diff --git a/src/main/java/org/archive/util/DevUtils.java b/src/main/java/org/archive/util/DevUtils.java index d630a0b1..f2a1d044 100644 --- a/src/main/java/org/archive/util/DevUtils.java +++ b/src/main/java/org/archive/util/DevUtils.java @@ -78,15 +78,6 @@ public static String extraInfo() { return sw.toString(); } - /** - * Nothing to see here, move along. - * @deprecated This method was never used. - */ - @Deprecated - public static void betterPrintStack(RuntimeException re) { - re.printStackTrace(System.err); - } - /** * Send this JVM process a SIGQUIT; giving a thread dump and possibly * a heap histogram (if using -XX:+PrintClassHistogram). diff --git a/src/main/java/org/archive/util/Recorder.java b/src/main/java/org/archive/util/Recorder.java index 61cbf871..e67cfb48 100644 --- a/src/main/java/org/archive/util/Recorder.java +++ b/src/main/java/org/archive/util/Recorder.java @@ -351,16 +351,6 @@ public void setContentEncoding(String contentEncoding) { public String getContentEncoding() { return this.contentEncoding; } - - - /** - * @return - * @throws IOException - * @deprecated use getContentReplayCharSequence - */ - public ReplayCharSequence getReplayCharSequence() throws IOException { - return getContentReplayCharSequence(); - } /** * @return A ReplayCharSequence. Caller may call diff --git a/src/main/java/org/archive/util/Reporter.java b/src/main/java/org/archive/util/Reporter.java index 3f4ea5e5..dd21b53d 100644 --- a/src/main/java/org/archive/util/Reporter.java +++ b/src/main/java/org/archive/util/Reporter.java @@ -32,15 +32,6 @@ public interface Reporter { */ public void reportTo(PrintWriter writer) throws IOException; - /** - * Write a short single-line summary report - * - * @param pw writer to receive report - */ - @Deprecated - public void shortReportLineTo(PrintWriter pw) throws IOException; - - /** * @return Same data that's in the single line report, as key-value pairs */ diff --git a/src/main/java/org/archive/util/anvl/ANVLRecord.java b/src/main/java/org/archive/util/anvl/ANVLRecord.java index 06603914..e548f432 100644 --- a/src/main/java/org/archive/util/anvl/ANVLRecord.java +++ b/src/main/java/org/archive/util/anvl/ANVLRecord.java @@ -72,11 +72,6 @@ public ANVLRecord(Collection c) { super(c); } - /** @deprecated */ - public ANVLRecord(int initialCapacity) { - super(); - } - public boolean addLabel(final String l) { return super.add(new Element(new Label(l))); } diff --git a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java index 07548b4c..954da636 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterPoolTest.java @@ -31,7 +31,7 @@ import org.junit.jupiter.api.io.TempDir; import static org.junit.jupiter.api.Assertions.assertEquals; - +import static org.archive.format.arc.ARCConstants.*; /** * Test ARCWriterPool @@ -119,7 +119,7 @@ private WriterPoolSettings getSettings(final boolean isCompressed) { return new WriterPoolSettingsData( "TEST", "${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}", - ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE, + DEFAULT_MAX_ARC_FILE_SIZE, isCompressed, Arrays.asList(files), null); diff --git a/src/test/java/org/archive/io/arc/ARCWriterTest.java b/src/test/java/org/archive/io/arc/ARCWriterTest.java index 84539391..ca300697 100644 --- a/src/test/java/org/archive/io/arc/ARCWriterTest.java +++ b/src/test/java/org/archive/io/arc/ARCWriterTest.java @@ -49,6 +49,7 @@ import static org.junit.jupiter.api.Assertions.*; +import static org.archive.format.arc.ARCConstants.*; /** * Test ARCWriter class. @@ -58,7 +59,7 @@ * * @author stack */ -public class ARCWriterTest implements ARCConstants { +public class ARCWriterTest { /** * Utility class for writing bad ARCs (with trailing junk) */ diff --git a/src/test/java/org/archive/io/warc/WARCWriterTest.java b/src/test/java/org/archive/io/warc/WARCWriterTest.java index 1039119e..c0ace5f0 100644 --- a/src/test/java/org/archive/io/warc/WARCWriterTest.java +++ b/src/test/java/org/archive/io/warc/WARCWriterTest.java @@ -44,12 +44,14 @@ import static org.junit.jupiter.api.Assertions.*; +import static org.archive.format.warc.WARCConstants.*; + /** * Test Writer and Reader. * @author stack * @version $Date: 2006-08-29 19:35:48 -0700 (Tue, 29 Aug 2006) $ $Version$ */ -public class WARCWriterTest implements WARCConstants { +public class WARCWriterTest { private static final AtomicInteger SERIAL_NO = new AtomicInteger(); @@ -153,7 +155,7 @@ private void writeWarcinfoRecord(WARCWriter writer) recordInfo.setContentStream(new ByteArrayInputStream(bytes)); recordInfo.setContentLength((long) bytes.length); - final URI recordid = writer.generateRecordId(WARCWriter.TYPE, WARCRecordType.warcinfo.toString()); + final URI recordid = writer.generateRecordId(TYPE, WARCRecordType.warcinfo.toString()); recordInfo.setRecordId(recordid); writer.writeRecord(recordInfo); diff --git a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java index 974bdd22..aecddb3b 100644 --- a/src/test/java/org/archive/url/IAURLCanonicalizerTest.java +++ b/src/test/java/org/archive/url/IAURLCanonicalizerTest.java @@ -10,7 +10,7 @@ public class IAURLCanonicalizerTest { @Test public void testFull() throws URISyntaxException { - IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); compCan(iaC,"http://www.archive.org:80/","http://archive.org/"); compCan(iaC,"https://www.archive.org:80/","https://archive.org:80/"); compCan(iaC,"http://www.archive.org:443/","http://archive.org:443/"); @@ -63,7 +63,7 @@ public void testGetDefaultPort() { @Test public void testStripSessionId() throws URISyntaxException { - IAURLCanonicalizer iaC = new IAURLCanonicalizer(new DefaultIACanonicalizerRules()); + IAURLCanonicalizer iaC = new IAURLCanonicalizer(new AggressiveIACanonicalizerRules()); compCan(iaC, "http://www.nsf.gov/statistics/sed/2009/SED_2009.zip?CFID=14387305&CFTOKEN=72942008&jsessionid=f030eacc7e49c4ca0b077922347418418766", "http://nsf.gov/statistics/sed/2009/sed_2009.zip?jsessionid=f030eacc7e49c4ca0b077922347418418766");