Skip to content

Commit a94e494

Browse files
committed
Refactor iterate git tree
1 parent 1dbf0d7 commit a94e494

File tree

6 files changed

+105
-46
lines changed

6 files changed

+105
-46
lines changed

modules/actions/workflows.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package actions
55

66
import (
77
"bytes"
8+
"context"
89
"io"
910
"strings"
1011

@@ -55,7 +56,7 @@ func ListWorkflows(commit *git.Commit) (git.Entries, error) {
5556
return nil, err
5657
}
5758

58-
entries, err := tree.ListEntriesRecursiveFast()
59+
entries, err := tree.ListEntriesRecursiveFast(context.Background())
5960
if err != nil {
6061
return nil, err
6162
}

modules/git/parse_nogogit.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,15 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
2424
var sepSpace = []byte{' '}
2525

2626
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
27-
var err error
2827
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
28+
return entries, iterateTreeEntries(data, ptree, func(entry *TreeEntry) error {
29+
entries = append(entries, entry)
30+
return nil
31+
})
32+
}
33+
34+
func iterateTreeEntries(data []byte, ptree *Tree, f func(entry *TreeEntry) error) error {
35+
var err error
2936
for pos := 0; pos < len(data); {
3037
// expect line to be of the form:
3138
// <mode> <type> <sha> <space-padded-size>\t<filename>
@@ -39,7 +46,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
3946
line := data[pos:posEnd]
4047
posTab := bytes.IndexByte(line, '\t')
4148
if posTab == -1 {
42-
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
49+
return fmt.Errorf("invalid ls-tree output (no tab): %q", line)
4350
}
4451

4552
entry := new(TreeEntry)
@@ -69,27 +76,29 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
6976
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
7077
entry.entryMode = EntryModeTree
7178
default:
72-
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
79+
return fmt.Errorf("unknown type: %v", string(entryMode))
7380
}
7481

7582
entry.ID, err = NewIDFromString(string(entryObjectID))
7683
if err != nil {
77-
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
84+
return fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
7885
}
7986

8087
if len(entryName) > 0 && entryName[0] == '"' {
8188
entry.name, err = strconv.Unquote(string(entryName))
8289
if err != nil {
83-
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
90+
return fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
8491
}
8592
} else {
8693
entry.name = string(entryName)
8794
}
8895

8996
pos = posEnd + 1
90-
entries = append(entries, entry)
97+
if err := f(entry); err != nil {
98+
return err
99+
}
91100
}
92-
return entries, nil
101+
return nil
93102
}
94103

95104
func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) {

modules/git/repo_language_stats_nogogit.go

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
5757

5858
tree := commit.Tree
5959

60-
entries, err := tree.ListEntriesRecursiveWithSize()
61-
if err != nil {
62-
return nil, err
63-
}
64-
6560
checker, deferable := repo.CheckAttributeReader(commitID)
6661
defer deferable()
6762

@@ -77,18 +72,18 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
7772
firstExcludedLanguage := ""
7873
firstExcludedLanguageSize := int64(0)
7974

80-
for _, f := range entries {
75+
if err := tree.IterateEntriesWithSize(func(f *TreeEntry) error {
8176
select {
8277
case <-repo.Ctx.Done():
83-
return sizes, repo.Ctx.Err()
78+
return repo.Ctx.Err()
8479
default:
8580
}
8681

8782
contentBuf.Reset()
8883
content = contentBuf.Bytes()
8984

9085
if f.Size() == 0 {
91-
continue
86+
return nil
9287
}
9388

9489
isVendored := optional.None[bool]()
@@ -101,22 +96,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
10196
if err == nil {
10297
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
10398
if isVendored.ValueOrDefault(false) {
104-
continue
99+
return nil
105100
}
106101

107102
isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
108103
if isGenerated.ValueOrDefault(false) {
109-
continue
104+
return nil
110105
}
111106

112107
isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
113108
if isDocumentation.ValueOrDefault(false) {
114-
continue
109+
return nil
115110
}
116111

117112
isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
118113
if !isDetectable.ValueOrDefault(true) {
119-
continue
114+
return nil
120115
}
121116

122117
hasLanguage := TryReadLanguageAttribute(attrs)
@@ -131,7 +126,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
131126

132127
// this language will always be added to the size
133128
sizes[language] += f.Size()
134-
continue
129+
return nil
135130
}
136131
}
137132
}
@@ -140,19 +135,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
140135
enry.IsDotFile(f.Name()) ||
141136
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
142137
enry.IsConfiguration(f.Name()) {
143-
continue
138+
return nil
144139
}
145140

146141
// If content can not be read or file is too big just do detection by filename
147142

148143
if f.Size() <= bigFileSize {
149144
if err := writeID(f.ID.String()); err != nil {
150-
return nil, err
145+
return err
151146
}
152147
_, _, size, err := ReadBatchLine(batchReader)
153148
if err != nil {
154149
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
155-
return nil, err
150+
return err
156151
}
157152

158153
sizeToRead := size
@@ -164,22 +159,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
164159

165160
_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead))
166161
if err != nil {
167-
return nil, err
162+
return err
168163
}
169164
content = contentBuf.Bytes()
170165
if err := DiscardFull(batchReader, discard); err != nil {
171-
return nil, err
166+
return err
172167
}
173168
}
174169
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
175-
continue
170+
return nil
176171
}
177172

178173
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
179174
// - eg. do the all the detection tests using filename first before reading content.
180175
language := analyze.GetCodeLanguage(f.Name(), content)
181176
if language == "" {
182-
continue
177+
return nil
183178
}
184179

185180
// group languages, such as Pug -> HTML; SCSS -> CSS
@@ -200,6 +195,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
200195
firstExcludedLanguage = language
201196
firstExcludedLanguageSize += f.Size()
202197
}
198+
return nil
199+
}); err != nil {
200+
return sizes, err
203201
}
204202

205203
// If there are no included languages add the first excluded language

modules/git/tree_nogogit.go

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package git
77

88
import (
9+
"bufio"
10+
"context"
911
"io"
1012
"strings"
1113
)
@@ -91,34 +93,83 @@ func (t *Tree) ListEntries() (Entries, error) {
9193

9294
// listEntriesRecursive returns all entries of current tree recursively including all subtrees
9395
// extraArgs could be "-l" to get the size, which is slower
94-
func (t *Tree) listEntriesRecursive(extraArgs TrustedCmdArgs) (Entries, error) {
96+
func (t *Tree) listEntriesRecursive(ctx context.Context, extraArgs TrustedCmdArgs) (Entries, error) {
9597
if t.entriesRecursiveParsed {
9698
return t.entriesRecursive, nil
9799
}
98100

99-
stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
100-
AddArguments(extraArgs...).
101-
AddDynamicArguments(t.ID.String()).
102-
RunStdBytes(&RunOpts{Dir: t.repo.Path})
103-
if runErr != nil {
104-
return nil, runErr
105-
}
106-
107-
var err error
108-
t.entriesRecursive, err = parseTreeEntries(stdout, t)
101+
t.entriesRecursive = make([]*TreeEntry, 0)
102+
err := t.iterateEntriesRecursive(func(entry *TreeEntry) error {
103+
select {
104+
case <-ctx.Done():
105+
return ctx.Err()
106+
default:
107+
}
108+
t.entriesRecursive = append(t.entriesRecursive, entry)
109+
return nil
110+
}, extraArgs)
109111
if err == nil {
110112
t.entriesRecursiveParsed = true
111113
}
112-
113114
return t.entriesRecursive, err
114115
}
115116

116117
// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
117-
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
118-
return t.listEntriesRecursive(nil)
118+
func (t *Tree) ListEntriesRecursiveFast(ctx context.Context) (Entries, error) {
119+
return t.listEntriesRecursive(ctx, nil)
119120
}
120121

121122
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
122-
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
123-
return t.listEntriesRecursive(TrustedCmdArgs{"--long"})
123+
func (t *Tree) ListEntriesRecursiveWithSize(ctx context.Context) (Entries, error) {
124+
return t.listEntriesRecursive(ctx, TrustedCmdArgs{"--long"})
125+
}
126+
127+
// iterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees
128+
// extraArgs could be "-l" to get the size, which is slower
129+
func (t *Tree) iterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error {
130+
if t.entriesRecursiveParsed {
131+
return nil
132+
}
133+
134+
reader, writer := io.Pipe()
135+
done := make(chan error)
136+
137+
go func(done chan error, writer *io.PipeWriter, reader *io.PipeReader) {
138+
runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
139+
AddArguments(extraArgs...).
140+
AddDynamicArguments(t.ID.String()).
141+
Run(&RunOpts{
142+
Dir: t.repo.Path,
143+
Stdout: writer,
144+
})
145+
146+
_ = writer.Close()
147+
_ = reader.Close()
148+
149+
done <- runErr
150+
}(done, writer, reader)
151+
152+
scanner := bufio.NewScanner(reader)
153+
for scanner.Scan() {
154+
if err := scanner.Err(); err != nil {
155+
return err
156+
}
157+
data := scanner.Bytes()
158+
if err := iterateTreeEntries(data, t, func(entry *TreeEntry) error {
159+
select {
160+
case runErr := <-done:
161+
return runErr
162+
default:
163+
return f(entry)
164+
}
165+
}); err != nil {
166+
return err
167+
}
168+
}
169+
t.entriesRecursiveParsed = true
170+
return nil
171+
}
172+
173+
func (t *Tree) IterateEntriesWithSize(f func(*TreeEntry) error) error {
174+
return t.iterateEntriesRecursive(f, TrustedCmdArgs{"--long"})
124175
}

routers/web/repo/treelist.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func TreeList(ctx *context.Context) {
2121
return
2222
}
2323

24-
entries, err := tree.ListEntriesRecursiveFast()
24+
entries, err := tree.ListEntriesRecursiveFast(ctx)
2525
if err != nil {
2626
ctx.ServerError("ListEntriesRecursiveFast", err)
2727
return

services/repository/files/tree.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
4747
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
4848
var entries git.Entries
4949
if recursive {
50-
entries, err = gitTree.ListEntriesRecursiveWithSize()
50+
entries, err = gitTree.ListEntriesRecursiveWithSize(ctx)
5151
} else {
5252
entries, err = gitTree.ListEntries()
5353
}

0 commit comments

Comments
 (0)