Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The <action> type attribute can be add,update,fix,remove.
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Inkeet, Gary Gregory, Wolff Bock von Wuelfingen" issue="CODEC-326">Add Base58 support.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add BaseNCodecInputStream.AbstracBuilder.setByteArray(byte[]).</action>
<action type="add" issue="CODEC-335" dev="pkarwasz" due-to="Piotr P. Karwasz">Add DigestUtils.gitBlob() and DigestUtils.gitTree() to compute Git blob and tree object identifiers.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 96 to 97.</action>
</release>
Expand Down
132 changes: 132 additions & 0 deletions src/main/java/org/apache/commons/codec/digest/DigestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,24 @@
package org.apache.commons.codec.digest;

import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.binary.StringUtils;
Expand Down Expand Up @@ -139,6 +146,131 @@ public static byte[] digest(final MessageDigest messageDigest, final RandomAcces
return updateDigest(messageDigest, data).digest();
}

/**
* Reads through a byte array and return a generalized Git blob identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @return A generalized Git blob identifier.
* @since 1.22.0
*/
public static byte[] gitBlob(final MessageDigest messageDigest, final byte[] data) {
updateDigest(messageDigest, gitBlobPrefix(data.length));
return digest(messageDigest, data);
}

/**
* Reads through a byte array and return a generalized Git blob identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#52-contents">SWHID contents identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git blob identifier and SWHID contents identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @param options Options how to open the file
* @return A generalized Git blob identifier.
* @throws IOException On error accessing the file
* @since 1.22.0
*/
public static byte[] gitBlob(final MessageDigest messageDigest, final Path data, final OpenOption... options) throws IOException {
updateDigest(messageDigest, gitBlobPrefix(Files.size(data)));
return updateDigest(messageDigest, data, options).digest();
}

private static byte[] gitBlobPrefix(final long dataSize) {
return ("blob " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
}

/**
* Returns a generalized Git tree identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1)
* @param entries The directory entries
* @return A generalized Git tree identifier.
*/
static byte[] gitTree(final MessageDigest messageDigest, final Collection<GitDirectoryEntry> entries) {
final TreeSet<GitDirectoryEntry> treeSet = new TreeSet<>(entries);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (final GitDirectoryEntry entry : treeSet) {
final byte[] treeEntryBytes = entry.toTreeEntryBytes();
baos.write(treeEntryBytes, 0, treeEntryBytes.length);
}
updateDigest(messageDigest, gitTreePrefix(baos.size()));
return updateDigest(messageDigest, baos.toByteArray()).digest();
}

/**
* Reads through a byte array and return a generalized Git tree identifier
*
* <p>The identifier is computed in the way described by the
* <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID directory identifier</a>, but it can use any hash
* algorithm.</p>
*
* <p>When the hash algorithm is SHA-1, the identifier is identical to Git tree identifier and SWHID directory identifier.</p>
*
* @param messageDigest The MessageDigest to use (for example SHA-1).
* @param data Data to digest.
* @param options Options how to open the file
* @return A generalized Git tree identifier.
* @throws IOException On error accessing the file
* @since 1.22.0
*/
public static byte[] gitTree(final MessageDigest messageDigest, final Path data, final OpenOption...options) throws IOException {
final List<GitDirectoryEntry> entries = new ArrayList<>();
try (DirectoryStream<Path> files = Files.newDirectoryStream(data)) {
for (final Path path : files) {
final GitDirectoryEntry.Type type = getGitDirectoryEntryType(path);
final byte[] rawObjectId;
if (type == GitDirectoryEntry.Type.DIRECTORY) {
rawObjectId = gitTree(messageDigest, path, options);
} else {
rawObjectId = gitBlob(messageDigest, path, options);
}
entries.add(new GitDirectoryEntry(path, type, rawObjectId));
}
}
return gitTree(messageDigest, entries);
}

/**
* Returns the {@link GitDirectoryEntry.Type} of a file.
*
* @param path The file to check.
* @return A {@link GitDirectoryEntry.Type}
*/
private static GitDirectoryEntry.Type getGitDirectoryEntryType(final Path path) {
// Symbolic links first
if (Files.isSymbolicLink(path)) {
return GitDirectoryEntry.Type.SYMBOLIC_LINK;
}
if (Files.isDirectory(path)) {
return GitDirectoryEntry.Type.DIRECTORY;
}
if (Files.isExecutable(path)) {
return GitDirectoryEntry.Type.EXECUTABLE;
}
return GitDirectoryEntry.Type.REGULAR;
}

private static byte[] gitTreePrefix(final long dataSize) {
return ("tree " + dataSize + "\0").getBytes(StandardCharsets.UTF_8);
}

/**
* Gets a {@code MessageDigest} for the given {@code algorithm}.
*
Expand Down
158 changes: 158 additions & 0 deletions src/main/java/org/apache/commons/codec/digest/GitDirectoryEntry.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.codec.digest;

import java.nio.charset.StandardCharsets;
import java.nio.file.Path;

/**
* Represents a single entry in a Git tree object.
*
* <p>A Git tree object encodes a directory snapshot. Each entry holds:</p>
* <ul>
* <li>a {@link Type} that determines the Unix file mode (e.g. {@code 100644} for a regular file),</li>
* <li>the entry name (file or directory name, without a path separator),</li>
* <li>the raw object id of the referenced blob or sub-tree.</li>
* </ul>
*
* <p>Entries are ordered by {@link #compareTo} using Git's tree-sort rule: directory names are compared as if they ended with {@code '/'}, so that {@code foo/}
* sorts after {@code foobar}.</p>
*
* <p>Call {@link #toTreeEntryBytes()} to obtain the binary encoding that Git feeds to its hash function when computing the tree object identifier.</p>
*
* @see <a href="https://git-scm.com/book/en/v2/Git-Internals-Git-Objects">Git Internals – Git Objects</a>
* @see <a href="https://www.swhid.org/swhid-specification/v1.2/5.Core_identifiers/#53-directories">SWHID Directory Identifier</a>
*/
class GitDirectoryEntry implements Comparable<GitDirectoryEntry> {

/**
* The entry name (file or directory name, no path separator).
*/
private final String name;

/**
* The key used for ordering entries within a tree object.
*
* <p>>Git appends {@code '/'} to directory names before comparing.</p>
*/
private final String sortKey;

/**
* The Git object type, which determines the Unix file-mode prefix.
*/
private final Type type;

/**
* The raw object id of the referenced blob or sub-tree.
*/
private final byte[] rawObjectId;

private GitDirectoryEntry(final String name, final Type type, final byte[] rawObjectId) {
this.name = name;
this.type = type;
this.sortKey = type == Type.DIRECTORY ? name + "/" : name;
this.rawObjectId = rawObjectId;
}

GitDirectoryEntry(final Path path, final Type type, final byte[] rawObjectId) {
this(path.getFileName().toString(), type, rawObjectId);
}

/**
* Returns the binary encoding of this entry as it appears inside a Git tree object.
*
* <p>The format follows the Git tree entry layout:</p>
* <pre>
* &lt;mode&gt; SP &lt;name&gt; NUL &lt;20-byte-object-id&gt;
* </pre>
*
* @return the binary tree-entry encoding; never {@code null}
*/
byte[] toTreeEntryBytes() {
final byte[] nameBytes = name.getBytes(StandardCharsets.UTF_8);
final byte[] result = new byte[type.mode.length + nameBytes.length + rawObjectId.length + 2];
System.arraycopy(type.mode, 0, result, 0, type.mode.length);
result[type.mode.length] = ' ';
System.arraycopy(nameBytes, 0, result, type.mode.length + 1, nameBytes.length);
result[type.mode.length + nameBytes.length + 1] = '\0';
System.arraycopy(rawObjectId, 0, result, type.mode.length + nameBytes.length + 2, rawObjectId.length);
return result;
}

@Override
public int compareTo(GitDirectoryEntry o) {
return sortKey.compareTo(o.sortKey);
}

@Override
public int hashCode() {
return name.hashCode();
}

@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof GitDirectoryEntry)) {
return false;
}
final GitDirectoryEntry other = (GitDirectoryEntry) obj;
return name.equals(other.name);
}

/**
* The type of a Git tree entry, which maps to a Unix file-mode string.
*
* <p>Git encodes the file type and permission bits as an ASCII octal string that precedes the entry name in the binary tree format. The values defined here
* cover the four entry types that Git itself produces.</p>
*
* <p>This enum is package-private. If it were made public, {@link #mode} would need to be wrapped in an immutable copy to prevent external mutation.</p>
*/
enum Type {

/**
* A sub-directory (Git sub-tree)
*/
DIRECTORY("40000"),

/**
* An executable file
*/
EXECUTABLE("100755"),

/**
* A regular (non-executable) file
*/
REGULAR("100644"),

/**
* A symbolic link
*/
SYMBOLIC_LINK("120000");

/**
* The ASCII-encoded octal mode string as it appears in the binary tree entry.
*/
private final byte[] mode;

Type(final String mode) {
this.mode = mode.getBytes(StandardCharsets.US_ASCII);
}
}
}
Loading
Loading