Skip to content

Commit a02be75

Browse files
committed
Add go wrapper around git diff-tree --raw -r -M
* Implemented calling git diff-tree * Ensures wrapper function is called with valid arguments * Parses output into go struct, using strong typing when possible * Modifies services/gitdiff/testdata/acedemic-module * Makes it a bare repo * Adds a branch which updates readme * Adds a branch which updates the webpack config
1 parent 77d14fb commit a02be75

16 files changed

+690
-16
lines changed

modules/git/parse.go

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,19 +46,9 @@ func parseLsTreeLine(line []byte) (*LsTreeEntry, error) {
4646
entry.Size = optional.Some(size)
4747
}
4848

49-
switch string(entryMode) {
50-
case "100644":
51-
entry.EntryMode = EntryModeBlob
52-
case "100755":
53-
entry.EntryMode = EntryModeExec
54-
case "120000":
55-
entry.EntryMode = EntryModeSymlink
56-
case "160000":
57-
entry.EntryMode = EntryModeCommit
58-
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
59-
entry.EntryMode = EntryModeTree
60-
default:
61-
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
49+
entry.EntryMode, err = ParseEntryMode(string(entryMode))
50+
if err != nil || entry.EntryMode == EntryModeNoEntry {
51+
return nil, fmt.Errorf("invalid ls-tree output (invalid mode): %q, err: %w", line, err)
6252
}
6353

6454
entry.ID, err = NewIDFromString(string(entryObjectID))

modules/git/tree_entry_mode.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,20 @@
33

44
package git
55

6-
import "strconv"
6+
import (
7+
"fmt"
8+
"strconv"
9+
)
710

811
// EntryMode the type of the object in the git tree
912
type EntryMode int
1013

1114
// There are only a few file modes in Git. They look like unix file modes, but they can only be
1215
// one of these.
1316
const (
17+
// EntryModeNoEntry is possible if the file was added or removed in a commit. In the case of
18+
// added the base commit will not have the file in its tree so a mode of 0o000000 is used.
19+
EntryModeNoEntry EntryMode = 0o000000
1420
// EntryModeBlob
1521
EntryModeBlob EntryMode = 0o100644
1622
// EntryModeExec
@@ -33,3 +39,22 @@ func ToEntryMode(value string) EntryMode {
3339
v, _ := strconv.ParseInt(value, 8, 32)
3440
return EntryMode(v)
3541
}
42+
43+
func ParseEntryMode(mode string) (EntryMode, error) {
44+
switch mode {
45+
case "000000":
46+
return EntryModeNoEntry, nil
47+
case "100644":
48+
return EntryModeBlob, nil
49+
case "100755":
50+
return EntryModeExec, nil
51+
case "120000":
52+
return EntryModeSymlink, nil
53+
case "160000":
54+
return EntryModeCommit, nil
55+
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
56+
return EntryModeTree, nil
57+
default:
58+
return 0, fmt.Errorf("unparsable entry mode: %s", mode)
59+
}
60+
}

services/gitdiff/git_diff_tree.go

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package gitdiff
5+
6+
import (
7+
"bufio"
8+
"context"
9+
"fmt"
10+
"io"
11+
"strings"
12+
13+
"code.gitea.io/gitea/modules/git"
14+
"code.gitea.io/gitea/modules/log"
15+
)
16+
17+
type DiffTree struct {
18+
Files []*DiffTreeRecord
19+
}
20+
21+
type DiffTreeRecord struct {
22+
// Status is one of 'added', 'deleted', 'modified', 'renamed', 'copied', 'typechanged', 'unmerged', 'unknown'
23+
Status string
24+
25+
HeadPath string
26+
BasePath string
27+
HeadMode git.EntryMode
28+
BaseMode git.EntryMode
29+
HeadBlobID string
30+
BaseBlobID string
31+
}
32+
33+
// GetDiffTree returns the list of path of the files that have changed between the two commits.
34+
// If useMergeBase is true, the diff will be calculated using the merge base of the two commits.
35+
// This is the same behavior as using a three-dot diff in git diff.
36+
func GetDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (*DiffTree, error) {
37+
gitDiffTreeRecords, err := runGitDiffTree(ctx, gitRepo, useMergeBase, baseSha, headSha)
38+
if err != nil {
39+
return nil, err
40+
}
41+
42+
return &DiffTree{
43+
Files: gitDiffTreeRecords,
44+
}, nil
45+
}
46+
47+
func runGitDiffTree(ctx context.Context, gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) ([]*DiffTreeRecord, error) {
48+
useMergeBase, baseCommitID, headCommitID, err := validateGitDiffTreeArguments(gitRepo, useMergeBase, baseSha, headSha)
49+
if err != nil {
50+
return nil, err
51+
}
52+
53+
cmd := git.NewCommand(ctx, "diff-tree", "--raw", "-r", "--find-renames", "--root")
54+
if useMergeBase {
55+
cmd.AddArguments("--merge-base")
56+
}
57+
cmd.AddDynamicArguments(baseCommitID, headCommitID)
58+
stdout, _, runErr := cmd.RunStdString(&git.RunOpts{Dir: gitRepo.Path})
59+
if runErr != nil {
60+
log.Warn("git diff-tree: %v", runErr)
61+
return nil, runErr
62+
}
63+
64+
return parseGitDiffTree(strings.NewReader(stdout))
65+
}
66+
67+
func validateGitDiffTreeArguments(gitRepo *git.Repository, useMergeBase bool, baseSha, headSha string) (bool, string, string, error) {
68+
// if the head is empty its an error
69+
if headSha == "" {
70+
return false, "", "", fmt.Errorf("headSha is empty")
71+
}
72+
73+
// if the head commit doesn't exist its and error
74+
headCommit, err := gitRepo.GetCommit(headSha)
75+
if err != nil {
76+
return false, "", "", fmt.Errorf("failed to get commit headSha: %v", err)
77+
}
78+
headCommitID := headCommit.ID.String()
79+
80+
// if the base is empty we should use the parent of the head commit
81+
if baseSha == "" {
82+
// if the headCommit has no parent we should use an empty commit
83+
// this can happen when we are generating a diff against an orphaned commit
84+
if headCommit.ParentCount() == 0 {
85+
objectFormat, err := gitRepo.GetObjectFormat()
86+
if err != nil {
87+
return false, "", "", err
88+
}
89+
90+
// We set use merge base to false because we have no base commit
91+
return false, objectFormat.EmptyTree().String(), headCommitID, nil
92+
}
93+
94+
baseCommit, err := headCommit.Parent(0)
95+
if err != nil {
96+
return false, "", "", fmt.Errorf("baseSha is '', attempted to use parent of commit %s, got error: %v", headCommit.ID.String(), err)
97+
}
98+
return useMergeBase, baseCommit.ID.String(), headCommitID, nil
99+
}
100+
101+
// try and get the base commit
102+
baseCommit, err := gitRepo.GetCommit(baseSha)
103+
// propagate the error if we couldn't get the base commit
104+
if err != nil {
105+
return useMergeBase, "", "", fmt.Errorf("failed to get base commit %s: %v", baseSha, err)
106+
}
107+
108+
return useMergeBase, baseCommit.ID.String(), headCommit.ID.String(), nil
109+
}
110+
111+
func parseGitDiffTree(gitOutput io.Reader) ([]*DiffTreeRecord, error) {
112+
/*
113+
The output of `git diff-tree --raw -r --find-renames` is of the form:
114+
115+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<path>
116+
117+
or for renames:
118+
119+
:<old_mode> <new_mode> <old_sha> <new_sha> <status>\t<old_path>\t<new_path>
120+
121+
See: <https://git-scm.com/docs/git-diff-tree#_raw_output_format> for more details
122+
*/
123+
results := make([]*DiffTreeRecord, 0)
124+
125+
lines := bufio.NewScanner(gitOutput)
126+
for lines.Scan() {
127+
line := lines.Text()
128+
129+
if len(line) == 0 {
130+
continue
131+
}
132+
133+
record, err := parseGitDiffTreeLine(line)
134+
if err != nil {
135+
return nil, err
136+
}
137+
138+
results = append(results, record)
139+
}
140+
141+
if err := lines.Err(); err != nil {
142+
return nil, err
143+
}
144+
145+
return results, nil
146+
}
147+
148+
func parseGitDiffTreeLine(line string) (*DiffTreeRecord, error) {
149+
line = strings.TrimPrefix(line, ":")
150+
splitSections := strings.SplitN(line, "\t", 2)
151+
if len(splitSections) < 2 {
152+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`)", line)
153+
}
154+
155+
fields := strings.Fields(splitSections[0])
156+
if len(fields) < 5 {
157+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 5 space delimited values got %d)", line, len(fields))
158+
}
159+
160+
baseMode, err := git.ParseEntryMode(fields[0])
161+
if err != nil {
162+
return nil, err
163+
}
164+
165+
headMode, err := git.ParseEntryMode(fields[1])
166+
if err != nil {
167+
return nil, err
168+
}
169+
170+
baseBlobID := fields[2]
171+
headBlobID := fields[3]
172+
173+
status, err := statusFromLetter(fields[4])
174+
if err != nil {
175+
return nil, err
176+
}
177+
178+
filePaths := strings.Split(splitSections[1], "\t")
179+
180+
var headPath, basePath string
181+
if status == "renamed" {
182+
if len(filePaths) != 2 {
183+
return nil, fmt.Errorf("unparsable output for diff --raw: `%s`, expected 2 paths found %d", line, len(filePaths))
184+
}
185+
basePath = filePaths[0]
186+
headPath = filePaths[1]
187+
} else {
188+
basePath = filePaths[0]
189+
headPath = filePaths[0]
190+
}
191+
192+
return &DiffTreeRecord{
193+
Status: status,
194+
BaseMode: baseMode,
195+
HeadMode: headMode,
196+
BaseBlobID: baseBlobID,
197+
HeadBlobID: headBlobID,
198+
BasePath: basePath,
199+
HeadPath: headPath,
200+
}, nil
201+
}
202+
203+
func statusFromLetter(letter string) (string, error) {
204+
if len(letter) < 1 {
205+
return "", fmt.Errorf("empty status letter")
206+
}
207+
switch letter[0] {
208+
case 'A':
209+
return "added", nil
210+
case 'D':
211+
return "deleted", nil
212+
case 'M':
213+
return "modified", nil
214+
case 'R':
215+
// This is of the form "R<score>" but we are choosing to ignore the score
216+
return "renamed", nil
217+
case 'C':
218+
// This is of the form "C<score>" but we are choosing to ignore the score
219+
return "copied", nil
220+
case 'T':
221+
return "typechanged", nil
222+
case 'U':
223+
return "unmerged", nil
224+
case 'X':
225+
return "unknown", nil
226+
default:
227+
return "", fmt.Errorf("unknown status letter: '%s'", letter)
228+
}
229+
}

0 commit comments

Comments
 (0)