Skip to content

Commit

Permalink
Add go wrapper around git diff-tree --raw -r -M
Browse files Browse the repository at this point in the history
 * Implemented calling git diff-tree
 * Ensures wrapper function is called with valid arguments
 * Parses output into go struct, using strong typing when possible
  • Loading branch information
McRaeAlex committed Jan 24, 2025
1 parent e94f37f commit da24f58
Show file tree
Hide file tree
Showing 4 changed files with 566 additions and 14 deletions.
16 changes: 3 additions & 13 deletions modules/git/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,9 @@ func parseLsTreeLine(line []byte) (*LsTreeEntry, error) {
entry.Size = optional.Some(size)
}

switch string(entryMode) {
case "100644":
entry.EntryMode = EntryModeBlob
case "100755":
entry.EntryMode = EntryModeExec
case "120000":
entry.EntryMode = EntryModeSymlink
case "160000":
entry.EntryMode = EntryModeCommit
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
entry.EntryMode = EntryModeTree
default:
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
entry.EntryMode, err = ParseEntryMode(string(entryMode))
if err != nil || entry.EntryMode == EntryModeNoEntry {
return nil, fmt.Errorf("invalid ls-tree output (invalid mode): %q, err: %w", line, err)
}

entry.ID, err = NewIDFromString(string(entryObjectID))
Expand Down
27 changes: 26 additions & 1 deletion modules/git/tree_entry_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,20 @@

package git

import "strconv"
import (
"fmt"
"strconv"
)

// EntryMode the type of the object in the git tree
type EntryMode int

// There are only a few file modes in Git. They look like unix file modes, but they can only be
// one of these.
const (
// EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of
// added the base commit will not have the file in its tree so a mode of 0o000000 is used.
EntryModeNoEntry EntryMode = 0o000000
// EntryModeBlob
EntryModeBlob EntryMode = 0o100644
// EntryModeExec
Expand All @@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode {
v, _ := strconv.ParseInt(value, 8, 32)
return EntryMode(v)
}

func ParseEntryMode(mode string) (EntryMode, error) {
switch mode {
case "000000":
return EntryModeNoEntry, nil
case "100644":
return EntryModeBlob, nil
case "100755":
return EntryModeExec, nil
case "120000":
return EntryModeSymlink, nil
case "160000":
return EntryModeCommit, nil
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
return EntryModeTree, nil
default:
return 0, fmt.Errorf("unparsable entry mode: %s", mode)
}
}
219 changes: 219 additions & 0 deletions services/gitdiff/git_diff_tree.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// Copyright 2025 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package gitdiff

import (
"bufio"
"context"
"fmt"
"strings"

"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
)

type DiffTree struct {
Files []*DiffTreeRecord
}

type DiffTreeRecord struct {
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied'
Status string

HeadPath string
BasePath string
HeadMode git.EntryMode
BaseMode git.EntryMode
HeadBlobID string
BaseBlobID string
}

// GetDiffTree returns the list of path of the files that have changed between the two commits
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) (*DiffTree, error) {
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, baseSha, headSha)
if err != nil {
return nil, err
}

return &DiffTree{
Files: gitDiffTreeRecords,
}, nil
}

func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) ([]*DiffTreeRecord, error) {
baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, baseSha, headSha)
if err != nil {
return nil, err
}

cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames").AddDynamicArguments(baseCommitID, headCommitID)
stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path})
if runErr != nil {
log.Warn("git diff-tree: %v", runErr)
return nil, runErr
}

return parseGitDiffTree(stdout)
}

func validateGitDiffTreeArguments(gitRepo *git.Repository, baseSha, headSha string) (string, string, error) {
// if the head is empty its an error
if headSha == "" {
return "", "", fmt.Errorf("headSha is empty")
}

// if the head commit doesn't exist its and error
headCommit, err := gitRepo.GetCommit(headSha)
if err != nil {
return "", "", fmt.Errorf("failed to get commit headSha: %v", err)
}
headCommitID := headCommit.ID.String()

// if the base is empty we should use the parent of the head commit
if baseSha == "" {
// if the headCommit has no parent we should use an empty commit
// this can happen when we are generating a diff against an orphaned commit
if headCommit.ParentCount() == 0 {
objectFormat, err := gitRepo.GetObjectFormat()
if err != nil {
return "", "", err
}

return objectFormat.EmptyTree().String(), headCommitID, nil
}

baseCommit, err := headCommit.Parent(0)
if err != nil {
return "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
}
return baseCommit.ID.String(), headCommitID, nil
}

// try and get the base commit
baseCommit, err := gitRepo.GetCommit(baseSha)
// propagate the error if we couldn't get the base commit
if err != nil {
return "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
}

return baseCommit.ID.String(), headCommit.ID.String(), nil
}

func parseGitDiffTree(output string) ([]*DiffTreeRecord, error) {
/*
The output of `git diff-tree --raw -r --find-renames` is of the form:
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
or for renames:
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
*/
if output == "" {
return []*DiffTreeRecord{}, nil
}

results := make([]*DiffTreeRecord, 0)

lines := bufio.NewScanner(strings.NewReader(output))
for lines.Scan() {
line := lines.Text()

if len(line) == 0 {
continue
}

record, err := parseGitDiffTreeLine(line)
if err != nil {
return nil, err
}

results = append(results, record)
}

if err := lines.Err(); err != nil {
return nil, err
}

return results, nil
}

func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
line = strings.TrimPrefix(line, ":")
splitSections := strings.SplitN(line, "\t", 2)
if len(splitSections) < 2 {
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line)
}

fields := strings.Fields(splitSections[0])
if len(fields) < 5 {
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
}

baseMode, err := git.ParseEntryMode(fields[0])
if err != nil {
return nil, err
}

headMode, err := git.ParseEntryMode(fields[1])
if err != nil {
return nil, err
}

baseBlobID := fields[2]
headBlobID := fields[3]

status, err := statusFromLetter(fields[4])
if err != nil {
return nil, err
}

filePaths := strings.Split(splitSections[1], "\t")

var headPath, basePath string
if status == "renamed" {
if len(filePaths) != 2 {
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
}
basePath = filePaths[0]
headPath = filePaths[1]
} else {
basePath = filePaths[0]
headPath = filePaths[0]
}

return &DiffTreeRecord{
Status: status,
BaseMode: baseMode,
HeadMode: headMode,
BaseBlobID: baseBlobID,
HeadBlobID: headBlobID,
BasePath: basePath,
HeadPath: headPath,
}, nil
}

func statusFromLetter(letter string) (string, error) {
if len(letter) < 1 {
return "", fmt.Errorf("empty status letter")
}
switch letter[0] {
case 'A':
return "added", nil
case 'D':
return "deleted", nil
case 'M':
return "modified", nil
case 'R':
// This is of the form "R<score>" but we are choosing to ignore the score
return "renamed", nil
case 'C':
// This is of the form "C<score>" but we are choosing to ignore the score
return "copied", nil
default:
return "", fmt.Errorf("unknown status letter: '%s'", letter)
}
}
Loading

0 comments on commit da24f58

Please sign in to comment.