From 483bd4f25c4607fc3a1d08922d25c08f8b6d9f7c Mon Sep 17 00:00:00 2001
From: Hande Celikkanat <7702228+handecelikkanat@users.noreply.github.com>
Date: Thu, 18 Jun 2026 22:05:03 +0300
Subject: [PATCH 1/2] chore(pom.xml): migrate to AWS SDK v2
---
pom.xml | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/pom.xml b/pom.xml
index 69b7487..e031aad 100644
--- a/pom.xml
+++ b/pom.xml
@@ -40,7 +40,7 @@ under the License.
UTF-8
3.6.0
2.8.8
- 1.12.797
+ 2.46.7
2.21.3
1.6
5.23.0
@@ -50,6 +50,18 @@ under the License.
true
+
+
+
+ software.amazon.awssdk
+ bom
+ ${aws.version}
+ pom
+ import
+
+
+
+
org.apache.stormcrawler
@@ -86,9 +98,8 @@ under the License.
- com.amazonaws
- aws-java-sdk-s3
- ${aws.version}
+ software.amazon.awssdk
+ s3
From 89a826fda4e92700fdbcf56e3885b945752f6e17 Mon Sep 17 00:00:00 2001
From: Hande Celikkanat <7702228+handecelikkanat@users.noreply.github.com>
Date: Mon, 22 Jun 2026 18:00:49 +0300
Subject: [PATCH 2/2] chore(FastURLFilter.java): migrate to AWS SDK v2
---
.../stormcrawler/filter/FastURLFilter.java | 34 ++++++++++++-------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/src/main/java/org/commoncrawl/stormcrawler/filter/FastURLFilter.java b/src/main/java/org/commoncrawl/stormcrawler/filter/FastURLFilter.java
index 9b2ee32..881f4ad 100644
--- a/src/main/java/org/commoncrawl/stormcrawler/filter/FastURLFilter.java
+++ b/src/main/java/org/commoncrawl/stormcrawler/filter/FastURLFilter.java
@@ -16,11 +16,6 @@
*/
package org.commoncrawl.stormcrawler.filter;
-import com.amazonaws.services.s3.AmazonS3;
-import com.amazonaws.services.s3.AmazonS3ClientBuilder;
-import com.amazonaws.services.s3.model.GetObjectRequest;
-import com.amazonaws.services.s3.model.ObjectMetadata;
-import com.amazonaws.services.s3.model.S3Object;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
@@ -46,6 +41,10 @@
import org.apache.stormcrawler.util.ConfUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.HeadObjectRequest;
+import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
/**
* Version of the FastURLFilter that can load from a text representation instead of the JSON that
@@ -170,11 +169,11 @@ public void run() {
@Override
public void loadJSONResources() throws Exception {
InputStream inputStream = null;
- AmazonS3 s3client = null;
+ S3Client s3client = null;
try {
if (getResourceFile().startsWith("s3://")) {
// try loading from S3
- s3client = AmazonS3ClientBuilder.standard().build();
+ s3client = S3Client.builder().build();
java.net.URI uri = new java.net.URI(getResourceFile());
String bucketName = uri.getHost();
@@ -182,8 +181,14 @@ public void loadJSONResources() throws Exception {
String path = uri.getPath().substring(1);
// optimisation - avoid a full reload if the resource has not changed
- ObjectMetadata metadata = s3client.getObjectMetadata(bucketName, path);
- final String ETAG = metadata.getETag();
+ HeadObjectResponse headResponse =
+ s3client.headObject(
+ HeadObjectRequest.builder()
+ .bucket(bucketName)
+ .key(path)
+ .build()
+ );
+ final String ETAG = headResponse.eTag();
if (ETAG != null && ETAG.equals(resourceETAG)) {
LOG.info("Unchanged ETAG for {} - skipping reload", getResourceFile());
return;
@@ -191,8 +196,13 @@ public void loadJSONResources() throws Exception {
resourceETAG = ETAG;
}
- final S3Object object = s3client.getObject(new GetObjectRequest(bucketName, path));
- inputStream = object.getObjectContent();
+ inputStream =
+ s3client.getObject(
+ GetObjectRequest.builder()
+ .bucket(bucketName)
+ .key(path)
+ .build()
+ );
} else {
inputStream = getClass().getClassLoader().getResourceAsStream(getResourceFile());
if (inputStream == null) {
@@ -210,7 +220,7 @@ public void loadJSONResources() throws Exception {
inputStream.close();
}
if (s3client != null) {
- s3client.shutdown();
+ s3client.close();
}
}
}