From 053fc1d2a1b8189a18bccf817c87ca761bfe30d9 Mon Sep 17 00:00:00 2001 From: Sebastian Nagel Date: Tue, 14 Apr 2026 19:06:57 +0200 Subject: [PATCH] WAT extractor not to fail on metadata records without WARC-Target-URI The WARC spec does not require a WARC-Target-URI for metadata records. The WAT extractor should not fail if a metadata record has no target URI, but simply not add one to the JSON blob. --- src/main/java/org/archive/extract/WATExtractorOutput.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index 621656b7..f695796f 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -152,6 +152,9 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException { String targetURI; if(warcType.equals("warcinfo")) { targetURI = JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); + } else if (warcType.equals("metadata")) { + // WARC-Target-URI is optional in metadata records + targetURI = JSONUtils.extractSingle(md, "Envelope.Metadata-Header-Metadata.WARC-Target-URI"); } else { targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI"); }