Skip to content

Commit 21c5cc4

Browse files
authored
Merge pull request iipc#89 from sebastian-nagel/webarchive-commons-88
WAT extractor: do not fail on missing WARC-Filename in warcinfo record
2 parents 3d0cdd7 + 04e1039 commit 21c5cc4

3 files changed

Lines changed: 12 additions & 2 deletions

File tree

CHANGES.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
1.1.10
2+
------
3+
* [WAT extractor: do not fail on missing WARC-Filename in warcinfo record](https://github.com/iipc/webarchive-commons/pull/89)
4+
* [ExtractingParseObserver: extract rel, hreflang and type attributes](https://github.com/iipc/webarchive-commons/pull/86)
5+
* [ExtractingParseObserver: extract links from onClick attributes](https://github.com/iipc/webarchive-commons/pull/85)
6+
* [Update TravisCI config](https://github.com/iipc/webarchive-commons/pull/83)
7+
18
1.1.9
29
-----
310
* [Use commons-collections v3.2.2 to avoid v3.2.1 vulnerability](https://github.com/iipc/webarchive-commons/pull/77)

src/main/java/org/archive/extract/WATExtractorOutput.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException {
151151
String warcType = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Type");
152152
String targetURI;
153153
if(warcType.equals("warcinfo")) {
154-
targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Filename");
154+
targetURI = JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename");
155155
} else {
156156
targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI");
157157
}

src/main/java/org/archive/format/warc/WARCRecordWriter.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ public void writeJSONMetadataRecord( OutputStream out,
8888
{
8989
HttpHeaders headers = new HttpHeaders();
9090
headers.add(HEADER_KEY_TYPE, WARCRecordType.metadata.name());
91-
headers.add(HEADER_KEY_URI, targetURI);
91+
if (targetURI != null) {
92+
// WARC-Target-URI is optional in metadata records
93+
headers.add(HEADER_KEY_URI, targetURI);
94+
}
9295
headers.add(HEADER_KEY_DATE, DateUtils.getLog14Date(originalDate));
9396
headers.add(HEADER_KEY_ID, makeRecordId());
9497
headers.add(HEADER_KEY_REFERS_TO, origRecordId);

0 commit comments

Comments
 (0)