diff --git a/CHANGES.md b/CHANGES.md index 70f9b052..3c9f4c8b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +1.1.7 +----- +* [Store origin-code of ARC file header](https://github.com/iipc/webarchive-commons/pull/52/) + 1.1.6 ----- * [Handle empty String argument in CharsetDetector.trimAttrValue](https://github.com/iipc/webarchive-commons/pull/49) diff --git a/src/main/java/org/archive/format/ArchiveFileConstants.java b/src/main/java/org/archive/format/ArchiveFileConstants.java index b0b8aa66..89e1308c 100644 --- a/src/main/java/org/archive/format/ArchiveFileConstants.java +++ b/src/main/java/org/archive/format/ArchiveFileConstants.java @@ -44,6 +44,11 @@ public interface ArchiveFileConstants { * Key for the Archive File version field. */ public static final String VERSION_FIELD_KEY = "version"; + + /** + * Key for the Archive File origin-code field. This value is often hard-coded, so use with care. + */ + public static final String ORIGIN_FIELD_KEY = "origin"; /** * Key for the Archive File length field. @@ -80,7 +85,7 @@ public interface ArchiveFileConstants { * Key for the Archive Record absolute offset into Archive file. */ public static final String ABSOLUTE_OFFSET_KEY = "absolute-offset"; - + public static final String READER_IDENTIFIER_FIELD_KEY = "reader-identifier"; diff --git a/src/main/java/org/archive/format/arc/ARCConstants.java b/src/main/java/org/archive/format/arc/ARCConstants.java index a336ddeb..5987b49f 100755 --- a/src/main/java/org/archive/format/arc/ARCConstants.java +++ b/src/main/java/org/archive/format/arc/ARCConstants.java @@ -196,7 +196,7 @@ public interface ARCConstants extends ArchiveFileConstants { .asList(new String[] { URL_FIELD_KEY, IP_HEADER_FIELD_KEY, DATE_FIELD_KEY, MIMETYPE_FIELD_KEY, LENGTH_FIELD_KEY, VERSION_FIELD_KEY, - ABSOLUTE_OFFSET_KEY }); + ORIGIN_FIELD_KEY, ABSOLUTE_OFFSET_KEY }); /** * Minimum possible record length. diff --git a/src/main/java/org/archive/io/arc/ARCRecord.java b/src/main/java/org/archive/io/arc/ARCRecord.java index 21bea07c..2d9c9bf4 100644 --- a/src/main/java/org/archive/io/arc/ARCRecord.java +++ b/src/main/java/org/archive/io/arc/ARCRecord.java @@ -200,7 +200,7 @@ public ARCRecord(InputStream in, ArchiveRecordHeader metaData, public ARCRecord(InputStream in, final String identifier, final long offset, boolean digest, boolean strict, final boolean parseHttpHeaders, - final boolean isAlignedOnFirstRecord, String version) + final boolean isAlignedOnFirstRecord, String version) throws IOException { super(in, null, 0, digest, strict); setHeader(parseHeaders(in, identifier, offset, strict, isAlignedOnFirstRecord, version)); @@ -243,6 +243,7 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, getTokenizedHeaderLine(in, firstLineValues); int bodyOffset = 0; + String origin = ""; if (offset == 0 && isAlignedOnFirstRecord) { // If offset is zero and we were aligned at first record on // creation (See #alignedOnFirstRecord for more on this), then no @@ -263,6 +264,7 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, bodyOffset += getTokenizedHeaderLine(in, secondLineValues); version = ((String)secondLineValues.get(0) + "." + (String)secondLineValues.get(1)); + origin = (String)secondLineValues.get(2); // Just read over the 3rd line. We used to parse it and use // values found here but now we just hardcode them to avoid // having to read this 3rd line even for random arc file accesses. @@ -271,7 +273,8 @@ private ArchiveRecordHeader parseHeaders(final InputStream in, } setBodyOffset(bodyOffset); - return computeMetaData(this.headerFieldNameKeys, firstLineValues, version, offset, identifier); + return computeMetaData(this.headerFieldNameKeys, firstLineValues, + version, origin, offset, identifier); } /** @@ -362,7 +365,8 @@ private int getTokenizedHeaderLine(final InputStream stream, * @exception IOException If no. of keys doesn't match no. of values. */ private ARCRecordMetaData computeMetaData(List keys, - List values, String v, long offset, final String identifier) + List values, String v, String origin, + long offset, final String identifier) throws IOException { if (keys.size() != values.size()) { List originalValues = values; @@ -423,6 +427,7 @@ private ARCRecordMetaData computeMetaData(List keys, } headerFields.put(VERSION_FIELD_KEY, v); + headerFields.put(ORIGIN_FIELD_KEY, origin); headerFields.put(ABSOLUTE_OFFSET_KEY, new Long(offset)); return new ARCRecordMetaData(identifier, headerFields); @@ -832,4 +837,4 @@ protected String getDigest4Cdx(ArchiveRecordHeader h) { } return (result != null) ? result: super.getDigest4Cdx(h); } -} \ No newline at end of file +} diff --git a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java index 3f617041..02b368e4 100644 --- a/src/main/java/org/archive/io/arc/ARCRecordMetaData.java +++ b/src/main/java/org/archive/io/arc/ARCRecordMetaData.java @@ -168,6 +168,13 @@ public String getVersion() { return (String)this.headerFields.get(VERSION_FIELD_KEY); } + /** + * @return Arcfile origin code. + */ + public String getOrigin() { + return (String)this.headerFields.get(ORIGIN_FIELD_KEY); + } + /** * @return Offset into arcfile at which this record begins. */ @@ -264,4 +271,4 @@ public int getContentBegin() { protected void setContentBegin(final int offset) { this.contentBegin = offset; } -} \ No newline at end of file +}