7
7
package git
8
8
9
9
import (
10
+ "bufio"
10
11
"bytes"
11
12
"io"
12
- "io/ioutil"
13
+ "math"
14
+ "strings"
13
15
14
16
"code.gitea.io/gitea/modules/analyze"
15
17
@@ -18,16 +20,60 @@ import (
18
20
19
21
// GetLanguageStats calculates language stats for git repository at specified commit
20
22
func (repo * Repository ) GetLanguageStats (commitID string ) (map [string ]int64 , error ) {
21
- // FIXME: We can be more efficient here...
22
- //
23
- // We're expecting that we will be reading a lot of blobs and the trees
24
- // Thus we should use a shared `cat-file --batch` to get all of this data
25
- // And keep the buffers around with resets as necessary.
26
- //
27
- // It's more complicated so...
28
- commit , err := repo .GetCommit (commitID )
23
+ // We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
24
+ // so let's create a batch stdin and stdout
25
+
26
+ batchStdinReader , batchStdinWriter := io .Pipe ()
27
+ batchStdoutReader , batchStdoutWriter := io .Pipe ()
28
+ defer func () {
29
+ _ = batchStdinReader .Close ()
30
+ _ = batchStdinWriter .Close ()
31
+ _ = batchStdoutReader .Close ()
32
+ _ = batchStdoutWriter .Close ()
33
+ }()
34
+
35
+ go func () {
36
+ stderr := strings.Builder {}
37
+ err := NewCommand ("cat-file" , "--batch" ).RunInDirFullPipeline (repo .Path , batchStdoutWriter , & stderr , batchStdinReader )
38
+ if err != nil {
39
+ _ = batchStdoutWriter .CloseWithError (ConcatenateError (err , (& stderr ).String ()))
40
+ _ = batchStdinReader .CloseWithError (ConcatenateError (err , (& stderr ).String ()))
41
+ } else {
42
+ _ = batchStdoutWriter .Close ()
43
+ _ = batchStdinReader .Close ()
44
+ }
45
+ }()
46
+
47
+ // For simplicities sake we'll us a buffered reader
48
+ batchReader := bufio .NewReader (batchStdoutReader )
49
+
50
+ writeID := func (id string ) error {
51
+ _ , err := batchStdinWriter .Write ([]byte (id ))
52
+ if err != nil {
53
+ return err
54
+ }
55
+ _ , err = batchStdinWriter .Write ([]byte {'\n' })
56
+ return err
57
+ }
58
+
59
+ if err := writeID (commitID ); err != nil {
60
+ return nil , err
61
+ }
62
+ shaBytes , typ , size , err := ReadBatchLine (batchReader )
63
+ if typ != "commit" {
64
+ log ("Unable to get commit for: %s. Err: %v" , commitID , err )
65
+ return nil , ErrNotExist {commitID , "" }
66
+ }
67
+
68
+ sha , err := NewIDFromString (string (shaBytes ))
29
69
if err != nil {
30
- log ("Unable to get commit for: %s" , commitID )
70
+ log ("Unable to get commit for: %s. Err: %v" , commitID , err )
71
+ return nil , ErrNotExist {commitID , "" }
72
+ }
73
+
74
+ commit , err := CommitFromReader (repo , sha , io .LimitReader (batchReader , size ))
75
+ if err != nil {
76
+ log ("Unable to get commit for: %s. Err: %v" , commitID , err )
31
77
return nil , err
32
78
}
33
79
@@ -38,17 +84,45 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
38
84
return nil , err
39
85
}
40
86
87
+ contentBuf := bytes.Buffer {}
88
+ var content []byte
41
89
sizes := make (map [string ]int64 )
42
90
for _ , f := range entries {
91
+ contentBuf .Reset ()
92
+ content = contentBuf .Bytes ()
43
93
if f .Size () == 0 || enry .IsVendor (f .Name ()) || enry .IsDotFile (f .Name ()) ||
44
94
enry .IsDocumentation (f .Name ()) || enry .IsConfiguration (f .Name ()) {
45
95
continue
46
96
}
47
97
48
98
// If content can not be read or file is too big just do detection by filename
49
- var content [] byte
99
+
50
100
if f .Size () <= bigFileSize {
51
- content , _ = readFile (f , fileSizeLimit )
101
+ if err := writeID (f .ID .String ()); err != nil {
102
+ return nil , err
103
+ }
104
+ _ , _ , size , err := ReadBatchLine (batchReader )
105
+ if err != nil {
106
+ log ("Error reading blob: %s Err: %v" , f .ID .String (), err )
107
+ return nil , err
108
+ }
109
+
110
+ sizeToRead := size
111
+ discard := int64 (0 )
112
+ if size > fileSizeLimit {
113
+ sizeToRead = fileSizeLimit
114
+ discard = size - fileSizeLimit
115
+ }
116
+
117
+ _ , err = contentBuf .ReadFrom (io .LimitReader (batchReader , sizeToRead ))
118
+ if err != nil {
119
+ return nil , err
120
+ }
121
+ content = contentBuf .Bytes ()
122
+ err = discardFull (batchReader , discard )
123
+ if err != nil {
124
+ return nil , err
125
+ }
52
126
}
53
127
if enry .IsGenerated (f .Name (), content ) {
54
128
continue
@@ -86,24 +160,20 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
86
160
return sizes , nil
87
161
}
88
162
89
- func readFile (entry * TreeEntry , limit int64 ) ([]byte , error ) {
90
- // FIXME: We can probably be a little more efficient here... see above
91
- r , err := entry .Blob ().DataAsync ()
92
- if err != nil {
93
- return nil , err
94
- }
95
- defer r .Close ()
96
-
97
- if limit <= 0 {
98
- return ioutil .ReadAll (r )
163
+ func discardFull (rd * bufio.Reader , discard int64 ) error {
164
+ if discard > math .MaxInt32 {
165
+ n , err := rd .Discard (math .MaxInt32 )
166
+ discard -= int64 (n )
167
+ if err != nil {
168
+ return err
169
+ }
99
170
}
100
-
101
- size := entry .Size ()
102
- if limit > 0 && size > limit {
103
- size = limit
171
+ for discard > 0 {
172
+ n , err := rd .Discard (int (discard ))
173
+ discard -= int64 (n )
174
+ if err != nil {
175
+ return err
176
+ }
104
177
}
105
- buf := bytes .NewBuffer (nil )
106
- buf .Grow (int (size ))
107
- _ , err = io .Copy (buf , io .LimitReader (r , limit ))
108
- return buf .Bytes (), err
178
+ return nil
109
179
}
0 commit comments