From da24f58a5d5d87a01bc699b53b46b8d711f3b4a0 Mon Sep 17 00:00:00 2001 From: Alexander McRae Date: Thu, 23 Jan 2025 12:24:24 -0800 Subject: [PATCH] Add go wrapper around git diff-tree --raw -r -M * Implemented calling git diff-tree * Ensures wrapper function is called with valid arguments * Parses output into go struct, using strong typing when possible --- modules/git/parse.go | 16 +- modules/git/tree_entry_mode.go | 27 ++- services/gitdiff/git_diff_tree.go | 219 +++++++++++++++++ services/gitdiff/git_diff_tree_test.go | 318 +++++++++++++++++++++++++ 4 files changed, 566 insertions(+), 14 deletions(-) create mode 100644 services/gitdiff/git_diff_tree.go create mode 100644 services/gitdiff/git_diff_tree_test.go diff --git a/modules/git/parse.go b/modules/git/parse.go index eb26632cc0e5c..a7f5c58e8969d 100644 --- a/modules/git/parse.go +++ b/modules/git/parse.go @@ -46,19 +46,9 @@ func parseLsTreeLine(line []byte) (*LsTreeEntry, error) { entry.Size = optional.Some(size) } - switch string(entryMode) { - case "100644": - entry.EntryMode = EntryModeBlob - case "100755": - entry.EntryMode = EntryModeExec - case "120000": - entry.EntryMode = EntryModeSymlink - case "160000": - entry.EntryMode = EntryModeCommit - case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons - entry.EntryMode = EntryModeTree - default: - return nil, fmt.Errorf("unknown type: %v", string(entryMode)) + entry.EntryMode, err = ParseEntryMode(string(entryMode)) + if err != nil || entry.EntryMode == EntryModeNoEntry { + return nil, fmt.Errorf("invalid ls-tree output (invalid mode): %q, err: %w", line, err) } entry.ID, err = NewIDFromString(string(entryObjectID)) diff --git a/modules/git/tree_entry_mode.go b/modules/git/tree_entry_mode.go index a399118cf855e..ec4487549df53 100644 --- a/modules/git/tree_entry_mode.go +++ b/modules/git/tree_entry_mode.go @@ -3,7 +3,10 @@ package git -import "strconv" +import ( + "fmt" + "strconv" +) // EntryMode the type of the object in the git tree type EntryMode int @@ -11,6 +14,9 @@ type EntryMode int // There are only a few file modes in Git. They look like unix file modes, but they can only be // one of these. const ( + // EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of + // added the base commit will not have the file in its tree so a mode of 0o000000 is used. + EntryModeNoEntry EntryMode = 0o000000 // EntryModeBlob EntryModeBlob EntryMode = 0o100644 // EntryModeExec @@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode { v, _ := strconv.ParseInt(value, 8, 32) return EntryMode(v) } + +func ParseEntryMode(mode string) (EntryMode, error) { + switch mode { + case "000000": + return EntryModeNoEntry, nil + case "100644": + return EntryModeBlob, nil + case "100755": + return EntryModeExec, nil + case "120000": + return EntryModeSymlink, nil + case "160000": + return EntryModeCommit, nil + case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons + return EntryModeTree, nil + default: + return 0, fmt.Errorf("unparsable entry mode: %s", mode) + } +} diff --git a/services/gitdiff/git_diff_tree.go b/services/gitdiff/git_diff_tree.go new file mode 100644 index 0000000000000..fb66555bc9e45 --- /dev/null +++ b/services/gitdiff/git_diff_tree.go @@ -0,0 +1,219 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "bufio" + "context" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" +) + +type DiffTree struct { + Files []*DiffTreeRecord +} + +type DiffTreeRecord struct { + // Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied' + Status string + + HeadPath string + BasePath string + HeadMode git.EntryMode + BaseMode git.EntryMode + HeadBlobID string + BaseBlobID string +} + +// GetDiffTree returns the list of path of the files that have changed between the two commits +func GetDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) (*DiffTree, error) { + gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, baseSha, headSha) + if err != nil { + return nil, err + } + + return &DiffTree{ + Files: gitDiffTreeRecords, + }, nil +} + +func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, baseSha, headSha string) ([]*DiffTreeRecord, error) { + baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, baseSha, headSha) + if err != nil { + return nil, err + } + + cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames").AddDynamicArguments(baseCommitID, headCommitID) + stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path}) + if runErr != nil { + log.Warn("git diff-tree: %v", runErr) + return nil, runErr + } + + return parseGitDiffTree(stdout) +} + +func validateGitDiffTreeArguments(gitRepo *git.Repository, baseSha, headSha string) (string, string, error) { + // if the head is empty its an error + if headSha == "" { + return "", "", fmt.Errorf("headSha is empty") + } + + // if the head commit doesn't exist its and error + headCommit, err := gitRepo.GetCommit(headSha) + if err != nil { + return "", "", fmt.Errorf("failed to get commit headSha: %v", err) + } + headCommitID := headCommit.ID.String() + + // if the base is empty we should use the parent of the head commit + if baseSha == "" { + // if the headCommit has no parent we should use an empty commit + // this can happen when we are generating a diff against an orphaned commit + if headCommit.ParentCount() == 0 { + objectFormat, err := gitRepo.GetObjectFormat() + if err != nil { + return "", "", err + } + + return objectFormat.EmptyTree().String(), headCommitID, nil + } + + baseCommit, err := headCommit.Parent(0) + if err != nil { + return "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err) + } + return baseCommit.ID.String(), headCommitID, nil + } + + // try and get the base commit + baseCommit, err := gitRepo.GetCommit(baseSha) + // propagate the error if we couldn't get the base commit + if err != nil { + return "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err) + } + + return baseCommit.ID.String(), headCommit.ID.String(), nil +} + +func parseGitDiffTree(output string) ([]*DiffTreeRecord, error) { + /* + The output of `git diff-tree --raw -r --find-renames` is of the form: + + : \t + + or for renames: + + : \t\t + + See: for more details + */ + if output == "" { + return []*DiffTreeRecord{}, nil + } + + results := make([]*DiffTreeRecord, 0) + + lines := bufio.NewScanner(strings.NewReader(output)) + for lines.Scan() { + line := lines.Text() + + if len(line) == 0 { + continue + } + + record, err := parseGitDiffTreeLine(line) + if err != nil { + return nil, err + } + + results = append(results, record) + } + + if err := lines.Err(); err != nil { + return nil, err + } + + return results, nil +} + +func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) { + line = strings.TrimPrefix(line, ":") + splitSections := strings.SplitN(line, "\t", 2) + if len(splitSections) < 2 { + return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line) + } + + fields := strings.Fields(splitSections[0]) + if len(fields) < 5 { + return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields)) + } + + baseMode, err := git.ParseEntryMode(fields[0]) + if err != nil { + return nil, err + } + + headMode, err := git.ParseEntryMode(fields[1]) + if err != nil { + return nil, err + } + + baseBlobID := fields[2] + headBlobID := fields[3] + + status, err := statusFromLetter(fields[4]) + if err != nil { + return nil, err + } + + filePaths := strings.Split(splitSections[1], "\t") + + var headPath, basePath string + if status == "renamed" { + if len(filePaths) != 2 { + return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 2 paths found %d", line, len(filePaths)) + } + basePath = filePaths[0] + headPath = filePaths[1] + } else { + basePath = filePaths[0] + headPath = filePaths[0] + } + + return &DiffTreeRecord{ + Status: status, + BaseMode: baseMode, + HeadMode: headMode, + BaseBlobID: baseBlobID, + HeadBlobID: headBlobID, + BasePath: basePath, + HeadPath: headPath, + }, nil +} + +func statusFromLetter(letter string) (string, error) { + if len(letter) < 1 { + return "", fmt.Errorf("empty status letter") + } + switch letter[0] { + case 'A': + return "added", nil + case 'D': + return "deleted", nil + case 'M': + return "modified", nil + case 'R': + // This is of the form "R" but we are choosing to ignore the score + return "renamed", nil + case 'C': + // This is of the form "C" but we are choosing to ignore the score + return "copied", nil + default: + return "", fmt.Errorf("unknown status letter: '%s'", letter) + } +} diff --git a/services/gitdiff/git_diff_tree_test.go b/services/gitdiff/git_diff_tree_test.go new file mode 100644 index 0000000000000..ac703d5a4d35c --- /dev/null +++ b/services/gitdiff/git_diff_tree_test.go @@ -0,0 +1,318 @@ +// Copyright 2025 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package gitdiff + +import ( + "testing" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/modules/git" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGitDiffTree(t *testing.T) { + test := []struct { + Name string + RepoPath string + BaseSha string + HeadSha string + Expected *DiffTree + }{ + { + Name: "happy path", + RepoPath: "./testdata/academic-module", + BaseSha: "4d3d22609b895d43c2ad21096dc44a875ead8248", + HeadSha: "559c156f8e0178b71cb44355428f24001b08fc68", + Expected: &DiffTree{ + Files: []*DiffTreeRecord{ + { + Status: "modified", + HeadPath: "Http/Controllers/CurriculumController.php", + BasePath: "Http/Controllers/CurriculumController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "cb993acce67a5d43d40f0fd321f6be903ed945c2", + BaseBlobID: "0b64a81851e374dcf25348a8f2c337e8993715a5", + }, + { + Status: "modified", + HeadPath: "Http/Controllers/PeopleController.php", + BasePath: "Http/Controllers/PeopleController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "b805a865244ca2615203f7f878fdefe69abf3054", + BaseBlobID: "942504b968c56022543915e08e19781d63f03ab6", + }, + { + Status: "modified", + HeadPath: "Http/Controllers/ProgramController.php", + BasePath: "Http/Controllers/ProgramController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "b21611d5cf3e0d2af82791a8d70a2357f8517c48", + BaseBlobID: "cc0c2f4f511b04f94ef6b2e08de9db8b74092d18", + }, + { + Status: "modified", + HeadPath: "Http/Controllers/ProgramDirectorController.php", + BasePath: "Http/Controllers/ProgramDirectorController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "1f41dc7d61ef1f85ad3f26814d077ea48376808a", + BaseBlobID: "70eefec0be7e0aff7877ed6290acfd0ca7417c79", + }, + }, + }, + }, + { + Name: "first commit (no parent)", + RepoPath: "./testdata/academic-module", + HeadSha: "07901f79ee86272fa8935f2fe546273adaf02c89", + Expected: &DiffTree{ + Files: []*DiffTreeRecord{ + { + Status: "added", + HeadPath: "README.md", + BasePath: "README.md", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeNoEntry, + HeadBlobID: "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + BaseBlobID: "0000000000000000000000000000000000000000", + }, + }, + }, + }, + { + Name: "base and head same", + RepoPath: "./testdata/academic-module", + BaseSha: "07901f79ee86272fa8935f2fe546273adaf02c89", + HeadSha: "07901f79ee86272fa8935f2fe546273adaf02c89", + Expected: &DiffTree{ + Files: []*DiffTreeRecord{}, + }, + }, + { + Name: "file renamed", + RepoPath: "./testdata/academic-module", + HeadSha: "6b8722c210ee91853f77b7bb8b4b3ce706088a03", + Expected: &DiffTree{ + Files: []*DiffTreeRecord{ + { + Status: "renamed", + HeadPath: "Database/Seeders/AcademicDatabaseSeeder.php", + BasePath: "Database/Seeders/AdminDatabaseSeeder.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "97248f79a90aaf81fe7fd74b33c1cb182dd41783", + BaseBlobID: "c8a055cfb45cd39747292983ad1797ceab40f5b1", + }, + }, + }, + }, + } + + for _, tt := range test { + t.Run(tt.Name, func(t *testing.T) { + gitRepo, err := git.OpenRepository(git.DefaultContext, tt.RepoPath) + assert.NoError(t, err) + + diffPaths, err := GetDiffTree(db.DefaultContext, gitRepo, tt.BaseSha, tt.HeadSha) + require.NoError(t, err) + + assert.Equal(t, tt.Expected, diffPaths) + }) + } +} + +func TestGitDiffTreeErrors(t *testing.T) { + test := []struct { + Name string + RepoPath string + BaseSha string + HeadSha string + }{ + { + Name: "head doesn't exist", + RepoPath: "./testdata/academic-module", + BaseSha: "4d3d22609b895d43c2ad21096dc44a875ead8248", + HeadSha: "asdfasdfasdf", + }, + { + Name: "base doesn't exist", + RepoPath: "./testdata/academic-module", + BaseSha: "asdfasdfasdf", + HeadSha: "07901f79ee86272fa8935f2fe546273adaf02c89", + }, + { + Name: "head not set", + RepoPath: "./testdata/academic-module", + BaseSha: "07901f79ee86272fa8935f2fe546273adaf02c89", + }, + } + + for _, tt := range test { + t.Run(tt.Name, func(t *testing.T) { + gitRepo, err := git.OpenRepository(git.DefaultContext, tt.RepoPath) + assert.NoError(t, err) + + diffPaths, err := GetDiffTree(db.DefaultContext, gitRepo, tt.BaseSha, tt.HeadSha) + assert.Error(t, err) + assert.Nil(t, diffPaths) + }) + } +} + +func TestParseGitDiffTree(t *testing.T) { + test := []struct { + Name string + GitOutput string + Expected []*DiffTreeRecord + }{ + { + Name: "file change", + GitOutput: ":100644 100644 64e43d23bcd08db12563a0a4d84309cadb437e1a 5dbc7792b5bb228647cfcc8dfe65fc649119dedc M\tResources/views/curriculum/edit.blade.php", + Expected: []*DiffTreeRecord{ + { + Status: "modified", + HeadPath: "Resources/views/curriculum/edit.blade.php", + BasePath: "Resources/views/curriculum/edit.blade.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "5dbc7792b5bb228647cfcc8dfe65fc649119dedc", + BaseBlobID: "64e43d23bcd08db12563a0a4d84309cadb437e1a", + }, + }, + }, + { + Name: "file added", + GitOutput: ":000000 100644 0000000000000000000000000000000000000000 0063162fb403db15ceb0517b34ab782e4e58b619 A\tResources/views/class/index.blade.php", + Expected: []*DiffTreeRecord{ + { + Status: "added", + HeadPath: "Resources/views/class/index.blade.php", + BasePath: "Resources/views/class/index.blade.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeNoEntry, + HeadBlobID: "0063162fb403db15ceb0517b34ab782e4e58b619", + BaseBlobID: "0000000000000000000000000000000000000000", + }, + }, + }, + { + Name: "file deleted", + GitOutput: ":100644 000000 bac4286303c8c0017ea2f0a48c561ddcc0330a14 0000000000000000000000000000000000000000 D\tResources/views/classes/index.blade.php", + Expected: []*DiffTreeRecord{ + { + Status: "deleted", + HeadPath: "Resources/views/classes/index.blade.php", + BasePath: "Resources/views/classes/index.blade.php", + HeadMode: git.EntryModeNoEntry, + BaseMode: git.EntryModeBlob, + HeadBlobID: "0000000000000000000000000000000000000000", + BaseBlobID: "bac4286303c8c0017ea2f0a48c561ddcc0330a14", + }, + }, + }, + { + Name: "file renamed", + GitOutput: ":100644 100644 c8a055cfb45cd39747292983ad1797ceab40f5b1 97248f79a90aaf81fe7fd74b33c1cb182dd41783 R087\tDatabase/Seeders/AdminDatabaseSeeder.php\tDatabase/Seeders/AcademicDatabaseSeeder.php", + Expected: []*DiffTreeRecord{ + { + Status: "renamed", + HeadPath: "Database/Seeders/AcademicDatabaseSeeder.php", + BasePath: "Database/Seeders/AdminDatabaseSeeder.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "97248f79a90aaf81fe7fd74b33c1cb182dd41783", + BaseBlobID: "c8a055cfb45cd39747292983ad1797ceab40f5b1", + }, + }, + }, + { + Name: "no changes", + GitOutput: ``, + Expected: []*DiffTreeRecord{}, + }, + { + Name: "multiple changes", + GitOutput: ":000000 100644 0000000000000000000000000000000000000000 db736b44533a840981f1f17b7029d0f612b69550 A\tHttp/Controllers/ClassController.php\n" + + ":100644 000000 9a4d2344d4d0145db7c91b3f3e123c74367d4ef4 0000000000000000000000000000000000000000 D\tHttp/Controllers/ClassesController.php\n" + + ":100644 100644 f060d6aede65d423f49e7dc248dfa0d8835ef920 b82c8e39a3602dedadb44669956d6eb5b6a7cc86 M\tHttp/Controllers/ProgramDirectorController.php\n", + Expected: []*DiffTreeRecord{ + { + Status: "added", + HeadPath: "Http/Controllers/ClassController.php", + BasePath: "Http/Controllers/ClassController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeNoEntry, + HeadBlobID: "db736b44533a840981f1f17b7029d0f612b69550", + BaseBlobID: "0000000000000000000000000000000000000000", + }, + { + Status: "deleted", + HeadPath: "Http/Controllers/ClassesController.php", + BasePath: "Http/Controllers/ClassesController.php", + HeadMode: git.EntryModeNoEntry, + BaseMode: git.EntryModeBlob, + HeadBlobID: "0000000000000000000000000000000000000000", + BaseBlobID: "9a4d2344d4d0145db7c91b3f3e123c74367d4ef4", + }, + { + Status: "modified", + HeadPath: "Http/Controllers/ProgramDirectorController.php", + BasePath: "Http/Controllers/ProgramDirectorController.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "b82c8e39a3602dedadb44669956d6eb5b6a7cc86", + BaseBlobID: "f060d6aede65d423f49e7dc248dfa0d8835ef920", + }, + }, + }, + { + Name: "spaces in file path", + GitOutput: ":000000 100644 0000000000000000000000000000000000000000 db736b44533a840981f1f17b7029d0f612b69550 A\tHttp /Controllers/Class Controller.php\n" + + ":100644 000000 9a4d2344d4d0145db7c91b3f3e123c74367d4ef4 0000000000000000000000000000000000000000 D\tHttp/Cont rollers/Classes Controller.php\n" + + ":100644 100644 f060d6aede65d423f49e7dc248dfa0d8835ef920 b82c8e39a3602dedadb44669956d6eb5b6a7cc86 R\tHttp/Controllers/Program Director Controller.php\tHttp/Cont rollers/ProgramDirectorController.php\n", + Expected: []*DiffTreeRecord{ + { + Status: "added", + HeadPath: "Http /Controllers/Class Controller.php", + BasePath: "Http /Controllers/Class Controller.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeNoEntry, + HeadBlobID: "db736b44533a840981f1f17b7029d0f612b69550", + BaseBlobID: "0000000000000000000000000000000000000000", + }, + { + Status: "deleted", + HeadPath: "Http/Cont rollers/Classes Controller.php", + BasePath: "Http/Cont rollers/Classes Controller.php", + HeadMode: git.EntryModeNoEntry, + BaseMode: git.EntryModeBlob, + HeadBlobID: "0000000000000000000000000000000000000000", + BaseBlobID: "9a4d2344d4d0145db7c91b3f3e123c74367d4ef4", + }, + { + Status: "renamed", + HeadPath: "Http/Cont rollers/ProgramDirectorController.php", + BasePath: "Http/Controllers/Program Director Controller.php", + HeadMode: git.EntryModeBlob, + BaseMode: git.EntryModeBlob, + HeadBlobID: "b82c8e39a3602dedadb44669956d6eb5b6a7cc86", + BaseBlobID: "f060d6aede65d423f49e7dc248dfa0d8835ef920", + }, + }, + }, + } + + for _, tt := range test { + t.Run(tt.Name, func(t *testing.T) { + entries, err := parseGitDiffTree(tt.GitOutput) + assert.NoError(t, err) + assert.Equal(t, tt.Expected, entries) + }) + } +}