diff --git a/CHANGES.md b/CHANGES.md index dcb598d9..bf985ada 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +1.1.10 +------ +* [WAT extractor: do not fail on missing WARC-Filename in warcinfo record](https://github.com/iipc/webarchive-commons/pull/89) +* [ExtractingParseObserver: extract rel, hreflang and type attributes](https://github.com/iipc/webarchive-commons/pull/86) +* [ExtractingParseObserver: extract links from onClick attributes](https://github.com/iipc/webarchive-commons/pull/85) +* [Update TravisCI config](https://github.com/iipc/webarchive-commons/pull/83) + 1.1.9 ----- * [Use commons-collections v3.2.2 to avoid v3.2.1 vulnerability](https://github.com/iipc/webarchive-commons/pull/77) diff --git a/src/main/java/org/archive/extract/WATExtractorOutput.java b/src/main/java/org/archive/extract/WATExtractorOutput.java index 3bcfa924..4b5f72ed 100644 --- a/src/main/java/org/archive/extract/WATExtractorOutput.java +++ b/src/main/java/org/archive/extract/WATExtractorOutput.java @@ -151,7 +151,7 @@ private void writeWARC(OutputStream recOut, MetaData md) throws IOException { String warcType = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Type"); String targetURI; if(warcType.equals("warcinfo")) { - targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); + targetURI = JSONUtils.extractSingle(md, "Envelope.WARC-Header-Metadata.WARC-Filename"); } else { targetURI = extractOrIO(md, "Envelope.WARC-Header-Metadata.WARC-Target-URI"); } diff --git a/src/main/java/org/archive/format/warc/WARCRecordWriter.java b/src/main/java/org/archive/format/warc/WARCRecordWriter.java index 0aab83b7..3278b289 100644 --- a/src/main/java/org/archive/format/warc/WARCRecordWriter.java +++ b/src/main/java/org/archive/format/warc/WARCRecordWriter.java @@ -88,7 +88,10 @@ public void writeJSONMetadataRecord( OutputStream out, { HttpHeaders headers = new HttpHeaders(); headers.add(HEADER_KEY_TYPE, WARCRecordType.metadata.name()); - headers.add(HEADER_KEY_URI, targetURI); + if (targetURI != null) { + // WARC-Target-URI is optional in metadata records + headers.add(HEADER_KEY_URI, targetURI); + } headers.add(HEADER_KEY_DATE, DateUtils.getLog14Date(originalDate)); headers.add(HEADER_KEY_ID, makeRecordId()); headers.add(HEADER_KEY_REFERS_TO, origRecordId);