From 5171a39346f45c833c679e7a106b9362068858dd Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 22 Aug 2021 13:55:45 +0100 Subject: [PATCH 01/12] Create repo attribute reader Signed-off-by: Andrew Thornton --- modules/git/repo_attribute.go | 273 ++++++++++++++++++++++++++++- modules/git/repo_attribute_test.go | 159 +++++++++++++++++ 2 files changed, 429 insertions(+), 3 deletions(-) create mode 100644 modules/git/repo_attribute_test.go diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index aa5e4c10e70d8..829b83f33426c 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -6,7 +6,11 @@ package git import ( "bytes" + "context" "fmt" + "io" + "strconv" + "strings" ) // CheckAttributeOpts represents the possible options to CheckAttribute @@ -21,7 +25,7 @@ type CheckAttributeOpts struct { func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[string]string, error) { err := LoadGitVersion() if err != nil { - return nil, fmt.Errorf("Git version missing: %v", err) + return nil, fmt.Errorf("git version missing: %v", err) } stdOut := new(bytes.Buffer) @@ -55,13 +59,14 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ cmd := NewCommand(cmdArgs...) if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil { - return nil, fmt.Errorf("Failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) + return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String()) } + // FIXME: This is incorrect on versions < 1.8.5 fields := bytes.Split(stdOut.Bytes(), []byte{'\000'}) if len(fields)%3 != 1 { - return nil, fmt.Errorf("Wrong number of fields in return from check-attr") + return nil, fmt.Errorf("wrong number of fields in return from check-attr") } var name2attribute2info = make(map[string]map[string]string) @@ -80,3 +85,265 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[ return name2attribute2info, nil } + +// CheckAttributeReader provides a reader for check-attribute content that can be long running +type CheckAttributeReader struct { + // params + Attributes []string + Repo *Repository + IndexFile string + + stdinReader *io.PipeReader + stdinWriter *io.PipeWriter + stdOut attributeWriter + cmd *Command + env []string + ctx context.Context + cancel context.CancelFunc + running chan struct{} +} + +// Init initializes the cmd +func (c *CheckAttributeReader) Init(ctx context.Context) error { + c.running = make(chan struct{}) + cmdArgs := []string{"check-attr", "--stdin", "-z"} + + if len(c.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + cmdArgs = append(cmdArgs, "--cached") + c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile} + } + + if len(c.Attributes) > 0 { + cmdArgs = append(cmdArgs, c.Attributes...) + } else { + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple) + + c.stdOut = lw + c.stdOut.Close() + return fmt.Errorf("no provided Attributes to check") + } + + c.ctx, c.cancel = context.WithCancel(ctx) + c.cmd = NewCommandContext(c.ctx, cmdArgs...) + c.stdinReader, c.stdinWriter = io.Pipe() + + if CheckGitVersionAtLeast("1.8.5") == nil { + lw := new(nulSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, 5) + + c.stdOut = lw + } else { + lw := new(lineSeparatedAttributeWriter) + lw.attributes = make(chan attributeTriple, 5) + + c.stdOut = lw + } + return nil +} + +// Run run cmd +func (c *CheckAttributeReader) Run() error { + stdErr := new(bytes.Buffer) + err := c.cmd.RunInDirTimeoutEnvFullPipelineFunc(c.env, -1, c.Repo.Path, c.stdOut, stdErr, c.stdinReader, func(_ context.Context, _ context.CancelFunc) error { + close(c.running) + return nil + }) + if err != nil && (err != context.Canceled || err != context.DeadlineExceeded) { + defer c.cancel() + c.stdOut.Close() + return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) + } + + return nil +} + +// CheckPath check attr for given path +func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) { + select { + case <-c.ctx.Done(): + return nil, c.ctx.Err() + case <-c.running: + } + _, err := c.stdinWriter.Write([]byte(path + "\x00")) + if err != nil { + defer c.cancel() + return nil, err + } + + rs := make(map[string]string) + for range c.Attributes { + select { + case attr := <-c.stdOut.ReadAttribute(): + rs[attr.Attribute] = attr.Value + case <-c.ctx.Done(): + return nil, c.ctx.Err() + } + } + return rs, nil +} + +// Close close pip after use +func (c *CheckAttributeReader) Close() error { + select { + case <-c.running: + default: + close(c.running) + } + defer c.cancel() + return c.stdinWriter.Close() +} + +type attributeWriter interface { + io.WriteCloser + ReadAttribute() <-chan attributeTriple +} + +type attributeTriple struct { + Filename string + Attribute string + Value string +} + +type nulSeparatedAttributeWriter struct { + tmp []byte + attributes chan attributeTriple + working attributeTriple + pos int +} + +func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) { + if wr.attributes == nil { + wr.attributes = make(chan attributeTriple, 5) + } + + nulIdx := bytes.IndexByte(p, '\x00') + l, read := len(p), 0 + + for nulIdx >= 0 { + wr.tmp = append(wr.tmp, p[:nulIdx]...) + switch wr.pos { + case 0: + wr.working = attributeTriple{ + Filename: string(wr.tmp), + } + case 1: + wr.working.Attribute = string(wr.tmp) + case 2: + wr.working.Value = string(wr.tmp) + } + wr.tmp = wr.tmp[:0] + wr.pos++ + if wr.pos > 2 { + wr.attributes <- wr.working + wr.pos = 0 + } + read += nulIdx + 1 + if l > read { + p = p[nulIdx+1:] + nulIdx = bytes.IndexByte(p, '\x00') + } else { + return l, nil + } + } + wr.tmp = append(wr.tmp, p...) + return len(p), nil +} + +func (wr *nulSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple { + return wr.attributes +} + +func (wr *nulSeparatedAttributeWriter) Close() error { + close(wr.attributes) + return nil +} + +type lineSeparatedAttributeWriter struct { + tmp []byte + attributes chan attributeTriple +} + +func (wr *lineSeparatedAttributeWriter) Write(p []byte) (n int, err error) { + l := len(p) + + nlIdx := bytes.IndexByte(p, '\n') + for nlIdx >= 0 { + wr.tmp = append(wr.tmp, p[:nlIdx]...) + + if len(wr.tmp) == 0 { + // This should not happen + if len(p) > nlIdx+1 { + wr.tmp = wr.tmp[:0] + p = p[nlIdx+1:] + nlIdx = bytes.IndexByte(p, '\n') + continue + } else { + return l, nil + } + } + + working := attributeTriple{} + if wr.tmp[0] == '"' { + sb := new(strings.Builder) + remaining := string(wr.tmp[1:]) + for len(remaining) > 0 { + rn, _, tail, err := strconv.UnquoteChar(remaining, '"') + if err != nil { + if len(remaining) > 2 && remaining[0] == '"' && remaining[1] == ':' && remaining[2] == ' ' { + working.Filename = sb.String() + wr.tmp = []byte(remaining[3:]) + break + } + return l, fmt.Errorf("unexpected tail %s", string(remaining)) + } + _, _ = sb.WriteRune(rn) + remaining = tail + } + } else { + idx := bytes.IndexByte(wr.tmp, ':') + if idx < 0 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + working.Filename = string(wr.tmp[:idx]) + if len(wr.tmp) < idx+2 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + wr.tmp = wr.tmp[idx+2:] + } + + idx := bytes.IndexByte(wr.tmp, ':') + if idx < 0 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + + working.Attribute = string(wr.tmp[:idx]) + if len(wr.tmp) < idx+2 { + return l, fmt.Errorf("unexpected input %s", string(wr.tmp)) + } + + working.Value = string(wr.tmp[idx+2:]) + + wr.attributes <- working + wr.tmp = wr.tmp[:0] + if len(p) > nlIdx+1 { + p = p[nlIdx+1:] + nlIdx = bytes.IndexByte(p, '\n') + continue + } else { + return l, nil + } + } + + wr.tmp = append(wr.tmp, p...) + return l, nil +} + +func (wr *lineSeparatedAttributeWriter) ReadAttribute() <-chan attributeTriple { + return wr.attributes +} + +func (wr *lineSeparatedAttributeWriter) Close() error { + close(wr.attributes) + return nil +} diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go new file mode 100644 index 0000000000000..0b5137c33ba87 --- /dev/null +++ b/modules/git/repo_attribute_test.go @@ -0,0 +1,159 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { + wr := &nulSeparatedAttributeWriter{ + attributes: make(chan attributeTriple, 5), + } + + testStr := ".gitignore\"\n\x00linguist-vendored\x00unspecified\x00" + + n, err := wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + // Write a second attribute again + n, err = wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + //Write a partial attribute + _, err = wr.Write([]byte("incomplete-file")) + assert.NoError(t, err) + _, err = wr.Write([]byte("name\x00")) + assert.NoError(t, err) + + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("attribute\x00")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + + _, err = wr.Write([]byte("value\x00")) + assert.NoError(t, err) + + attr := <-wr.ReadAttribute() + assert.Equal(t, "incomplete-filename", attr.Filename) + assert.Equal(t, "attribute", attr.Attribute) + assert.Equal(t, "value", attr.Value) + + _, err = wr.Write([]byte("shouldbe.vendor\x00linguist-vendored\x00set\x00shouldbe.vendor\x00linguist-generated\x00unspecified\x00shouldbe.vendor\x00linguist-language\x00unspecified\x00")) + assert.NoError(t, err) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-vendored", + Value: "set", + }, attr) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-generated", + Value: "unspecified", + }, attr) + attr = <-wr.ReadAttribute() + assert.NoError(t, err) + assert.EqualValues(t, &attributeTriple{ + Filename: "shouldbe.vendor", + Attribute: "linguist-language", + Value: "unspecified", + }, attr) +} + +func Test_lineSeparatedAttributeWriter_ReadAttribute(t *testing.T) { + wr := &lineSeparatedAttributeWriter{ + attributes: make(chan attributeTriple, 5), + } + + testStr := `".gitignore\"\n": linguist-vendored: unspecified +` + n, err := wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + // Write a second attribute again + n, err = wr.Write([]byte(testStr)) + + assert.Equal(t, n, len(testStr)) + assert.NoError(t, err) + + select { + case attr := <-wr.ReadAttribute(): + assert.Equal(t, ".gitignore\"\n", attr.Filename) + assert.Equal(t, "linguist-vendored", attr.Attribute) + assert.Equal(t, "unspecified", attr.Value) + case <-time.After(100 * time.Millisecond): + assert.Fail(t, "took too long to read an attribute from the list") + } + + //Write a partial attribute + _, err = wr.Write([]byte("incomplete-file")) + assert.NoError(t, err) + _, err = wr.Write([]byte("name: ")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("attribute: ")) + assert.NoError(t, err) + select { + case <-wr.ReadAttribute(): + assert.Fail(t, "There should not be an attribute ready to read") + case <-time.After(100 * time.Millisecond): + } + _, err = wr.Write([]byte("value\n")) + assert.NoError(t, err) + attr := <-wr.ReadAttribute() + assert.Equal(t, "incomplete-filename", attr.Filename) + assert.Equal(t, "attribute", attr.Attribute) + assert.Equal(t, "value", attr.Value) +} From cec3f3fd65221be0df898520a514713315ffa63f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 22 Aug 2021 14:45:24 +0100 Subject: [PATCH 02/12] use .gitattributes in language stats Signed-off-by: Andrew Thornton --- modules/git/repo_index.go | 39 ++++++++++-- modules/git/repo_language_stats_gogit.go | 69 ++++++++++++++++++++- modules/git/repo_language_stats_nogogit.go | 72 +++++++++++++++++++++- 3 files changed, 171 insertions(+), 9 deletions(-) diff --git a/modules/git/repo_index.go b/modules/git/repo_index.go index 2c351e209fa7c..b301ff2437b54 100644 --- a/modules/git/repo_index.go +++ b/modules/git/repo_index.go @@ -6,11 +6,17 @@ package git import ( "bytes" + "context" + "io/ioutil" + "os" "strings" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" ) // ReadTreeToIndex reads a treeish to the index -func (repo *Repository) ReadTreeToIndex(treeish string) error { +func (repo *Repository) ReadTreeToIndex(treeish string, indexFilename ...string) error { if len(treeish) != 40 { res, err := NewCommand("rev-parse", "--verify", treeish).RunInDir(repo.Path) if err != nil { @@ -24,17 +30,42 @@ func (repo *Repository) ReadTreeToIndex(treeish string) error { if err != nil { return err } - return repo.readTreeToIndex(id) + return repo.readTreeToIndex(id, indexFilename...) } -func (repo *Repository) readTreeToIndex(id SHA1) error { - _, err := NewCommand("read-tree", id.String()).RunInDir(repo.Path) +func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error { + var env []string + if len(indexFilename) > 0 { + env = append(os.Environ(), "GIT_INDEX_FILE="+indexFilename[0]) + } + _, err := NewCommand("read-tree", id.String()).RunInDirWithEnv(repo.Path, env) if err != nil { return err } return nil } +// ReadTreeToTemporaryIndex reads a treeish to a temporary index file +func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) { + tmpIndex, err := ioutil.TempFile("", "index") + if err != nil { + return + } + filename = tmpIndex.Name() + cancel = func() { + err := util.Remove(filename) + if err != nil { + log.Error("failed to remove tmp index file: %v", err) + } + } + err = repo.ReadTreeToIndex(treeish, filename) + if err != nil { + defer cancel() + return "", func() {}, err + } + return +} + // EmptyIndex empties the index func (repo *Repository) EmptyIndex() error { _, err := NewCommand("read-tree", "--empty").RunInDir(repo.Path) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 20a7b061f2107..7d9da3ca638b7 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. +//go:build gogit // +build gogit package git @@ -41,9 +42,73 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } + var checker *CheckAttributeReader + + if CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, + Repo: repo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } + }() + } + defer cancel() + } + } + sizes := make(map[string]int64) err = tree.Files().ForEach(func(f *object.File) error { - if f.Size == 0 || analyze.IsVendor(f.Name) || enry.IsDotFile(f.Name) || + if f.Size() == 0 { + continue + } + + notVendored := false + notGenerated := false + + if checker != nil { + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + if vendored, has := attrs["linguist-vendor"]; has { + if vendored == "set" || vendored == "true" { + continue + } + notVendored = vendored == "false" + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + continue + } + notGenerated = generated == "false" + } + if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if group != "" { + language = group + } + + sizes[language] += f.Size() + + continue + } + } + } + + if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) || enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { return nil } @@ -53,7 +118,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if f.Size <= bigFileSize { content, _ = readFile(f, fileSizeLimit) } - if enry.IsGenerated(f.Name, content) { + if !notGenerated && enry.IsGenerated(f.Name, content) { return nil } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 1684f21d1675d..7217cd1139913 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. +//go:build !gogit // +build !gogit package git @@ -9,6 +10,7 @@ package git import ( "bufio" "bytes" + "context" "io" "math" @@ -61,13 +63,78 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } + var checker *CheckAttributeReader + + if CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"}, + Repo: repo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", commitID, err) + cancel() + } + }() + } + defer cancel() + } + } + contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) for _, f := range entries { contentBuf.Reset() content = contentBuf.Bytes() - if f.Size() == 0 || analyze.IsVendor(f.Name()) || enry.IsDotFile(f.Name()) || + + if f.Size() == 0 { + continue + } + + notVendored := false + notGenerated := false + + if checker != nil { + attrs, err := checker.CheckPath(f.Name()) + if err == nil { + if vendored, has := attrs["linguist-vendor"]; has { + if vendored == "set" || vendored == "true" { + continue + } + notVendored = vendored == "false" + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + continue + } + notGenerated = generated == "false" + } + if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { + // group languages, such as Pug -> HTML; SCSS -> CSS + group := enry.GetLanguageGroup(language) + if group != "" { + language = group + } + + sizes[language] += f.Size() + + continue + } + } + } + + if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) || enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) { continue } @@ -101,11 +168,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } } - if enry.IsGenerated(f.Name(), content) { + if !notGenerated && enry.IsGenerated(f.Name(), content) { continue } - // TODO: Use .gitattributes file for linguist overrides // FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary? // - eg. do the all the detection tests using filename first before reading content. language := analyze.GetCodeLanguage(f.Name(), content) From 3955b240bd30643e94e3c668cab578da5f89746f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 22 Aug 2021 19:57:17 +0100 Subject: [PATCH 03/12] Mark generated and vendored files and fold them by default Signed-off-by: Andrew Thornton --- modules/analyze/generated.go | 28 +++++++++++++ options/locale/locale_en-US.ini | 2 + services/gitdiff/gitdiff.go | 73 +++++++++++++++++++++++++++++++++ templates/repo/diff/box.tmpl | 14 ++++++- 4 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 modules/analyze/generated.go diff --git a/modules/analyze/generated.go b/modules/analyze/generated.go new file mode 100644 index 0000000000000..0f14d285452f5 --- /dev/null +++ b/modules/analyze/generated.go @@ -0,0 +1,28 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package analyze + +import ( + "path/filepath" + "strings" + + "github.com/go-enry/go-enry/v2/data" +) + +// IsGenerated returns whether or not path is a generated path. +func IsGenerated(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + if _, ok := data.GeneratedCodeExtensions[ext]; ok { + return true + } + + for _, m := range data.GeneratedCodeNameMatchers { + if m(path) { + return true + } + } + + return false +} diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 3eb38257768a4..7895c3582b9d9 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -2009,6 +2009,8 @@ diff.file_byte_size = Size diff.file_suppressed = File diff suppressed because it is too large diff.file_suppressed_line_too_long = File diff suppressed because one or more lines are too long diff.too_many_files = Some files were not shown because too many files changed in this diff +diff.generated = generated +diff.vendored = vendored diff.comment.placeholder = Leave a comment diff.comment.markdown_info = Styling with markdown is supported. diff.comment.add_single_comment = Add single comment diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index d50e41eb40279..cdafa1ab20612 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -22,6 +22,7 @@ import ( "strings" "code.gitea.io/gitea/models" + "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" @@ -591,6 +592,8 @@ type DiffFile struct { IsIncomplete bool IsIncompleteLineTooLong bool IsProtected bool + IsGenerated bool + IsVendored bool } // GetType returns type of diff file. @@ -1260,7 +1263,77 @@ func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID if err != nil { return nil, fmt.Errorf("ParsePatch: %v", err) } + + var checker *git.CheckAttributeReader + + if git.CheckGitVersionAtLeast("1.7.8") == nil { + indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID) + if err == nil { + defer deleteTemporaryFile() + + checker = &git.CheckAttributeReader{ + Attributes: []string{"linguist-vendored", "linguist-generated"}, + Repo: gitRepo, + IndexFile: indexFilename, + } + ctx, cancel := context.WithCancel(git.DefaultContext) + if err := checker.Init(ctx); err != nil { + log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err) + } else { + go func() { + err = checker.Run() + if err != nil { + log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err) + cancel() + } else { + log.Info("Done") + } + }() + } + defer func() { + log.Info("Cancelling the diff context") + cancel() + }() + } + } + for _, diffFile := range diff.Files { + + gotVendor := false + gotGenerated := false + if checker != nil { + log.Info("Checking %s", diffFile.Name) + attrs, err := checker.CheckPath(diffFile.Name) + log.Info("%v, %v", attrs, err) + if err == nil { + if vendored, has := attrs["linguist-vendored"]; has { + if vendored == "set" || vendored == "true" { + diffFile.IsVendored = true + gotVendor = true + } else { + gotVendor = vendored == "false" + } + } + if generated, has := attrs["linguist-generated"]; has { + if generated == "set" || generated == "true" { + diffFile.IsGenerated = true + gotGenerated = true + } else { + gotGenerated = generated == "false" + } + } + } else { + log.Error("Unexpected error: %v", err) + } + } + + if !gotVendor { + diffFile.IsVendored = analyze.IsVendor(diffFile.Name) + } + if !gotGenerated { + diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) + } + tailSection := diffFile.GetTailSection(gitRepo, beforeCommitID, afterCommitID) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 1ca2dcc4d8144..0780a00c014e0 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -49,11 +49,15 @@ {{$isImage := or (call $.IsBlobAnImage $blobBase) (call $.IsBlobAnImage $blobHead)}} {{$isCsv := (call $.IsCsvFile $file)}} {{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}} -
+

- {{svg "octicon-chevron-down" 18}} + {{if or $file.IsGenerated $file.IsVendored}} + {{svg "octicon-chevron-right" 18}} + {{else}} + {{svg "octicon-chevron-down" 18}} + {{end}}
{{if $file.IsBin}} @@ -65,6 +69,12 @@ {{end}}
{{if $file.IsRenamed}}{{$file.OldName}} → {{end}}{{$file.Name}}{{if .IsLFSFile}} ({{$.i18n.Tr "repo.stored_lfs"}}){{end}} + {{if $file.IsGenerated}} + {{$.i18n.Tr "repo.diff.generated"}} + {{end}} + {{if $file.IsVendored}} + {{$.i18n.Tr "repo.diff.vendored"}} + {{end}}
{{if $showFileViewToggle}} From 07e2ece15fee74ddbb5953caa242fe2cd12e4d36 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 22 Aug 2021 20:26:07 +0100 Subject: [PATCH 04/12] fixup! use .gitattributes in language stats --- modules/git/repo_language_stats_gogit.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 7d9da3ca638b7..bcb80d759ccfa 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -9,6 +9,7 @@ package git import ( "bytes" + "context" "io" "io/ioutil" From 933a977aa6819795bff26bcb08b3bd44bd384557 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 23 Aug 2021 01:34:04 +0100 Subject: [PATCH 05/12] fixup! use .gitattributes in language stats --- modules/git/repo_language_stats_gogit.go | 17 +++++++++-------- modules/git/repo_language_stats_nogogit.go | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index bcb80d759ccfa..94fb7edbb82cb 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -14,6 +14,7 @@ import ( "io/ioutil" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/log" "github.com/go-enry/go-enry/v2" "github.com/go-git/go-git/v5" @@ -73,25 +74,25 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err sizes := make(map[string]int64) err = tree.Files().ForEach(func(f *object.File) error { - if f.Size() == 0 { - continue + if f.Size == 0 { + return nil } notVendored := false notGenerated := false if checker != nil { - attrs, err := checker.CheckPath(f.Name()) + attrs, err := checker.CheckPath(f.Name) if err == nil { - if vendored, has := attrs["linguist-vendor"]; has { + if vendored, has := attrs["linguist-vendored"]; has { if vendored == "set" || vendored == "true" { - continue + return nil } notVendored = vendored == "false" } if generated, has := attrs["linguist-generated"]; has { if generated == "set" || generated == "true" { - continue + return nil } notGenerated = generated == "false" } @@ -102,9 +103,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } - sizes[language] += f.Size() + sizes[language] += f.Size - continue + return nil } } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 7217cd1139913..5cc7fd9d36e92 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -108,7 +108,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if checker != nil { attrs, err := checker.CheckPath(f.Name()) if err == nil { - if vendored, has := attrs["linguist-vendor"]; has { + if vendored, has := attrs["linguist-vendored"]; has { if vendored == "set" || vendored == "true" { continue } From 212133ffa3f28136624216644322a7aa28d2139a Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 23 Aug 2021 09:58:16 +0100 Subject: [PATCH 06/12] as per lunny and fix broken test Signed-off-by: Andrew Thornton --- modules/git/repo_attribute_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/git/repo_attribute_test.go b/modules/git/repo_attribute_test.go index 0b5137c33ba87..92d1a78fa4dad 100644 --- a/modules/git/repo_attribute_test.go +++ b/modules/git/repo_attribute_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Gitea Authors. All rights reserved. +// Copyright 2021 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. @@ -90,7 +90,7 @@ func Test_nulSeparatedAttributeWriter_ReadAttribute(t *testing.T) { }, attr) attr = <-wr.ReadAttribute() assert.NoError(t, err) - assert.EqualValues(t, &attributeTriple{ + assert.EqualValues(t, attributeTriple{ Filename: "shouldbe.vendor", Attribute: "linguist-language", Value: "unspecified", From e6cac7d556a226b226137ecab0b2165e16719921 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 23 Aug 2021 18:34:00 +0100 Subject: [PATCH 07/12] fix tests Signed-off-by: Andrew Thornton --- modules/git/repo_attribute.go | 30 +++++++++++++++++++----------- services/gitdiff/gitdiff.go | 17 ++++++++++------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index 829b83f33426c..9c6bcefcacee5 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "io" + "os" "strconv" "strings" ) @@ -92,9 +93,10 @@ type CheckAttributeReader struct { Attributes []string Repo *Repository IndexFile string + WorkTree string - stdinReader *io.PipeReader - stdinWriter *io.PipeWriter + stdinReader io.ReadCloser + stdinWriter *os.File stdOut attributeWriter cmd *Command env []string @@ -113,8 +115,13 @@ func (c *CheckAttributeReader) Init(ctx context.Context) error { c.env = []string{"GIT_INDEX_FILE=" + c.IndexFile} } + if len(c.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil { + c.env = []string{"GIT_WORK_TREE=" + c.WorkTree} + } + if len(c.Attributes) > 0 { cmdArgs = append(cmdArgs, c.Attributes...) + cmdArgs = append(cmdArgs, "--") } else { lw := new(nulSeparatedAttributeWriter) lw.attributes = make(chan attributeTriple) @@ -126,7 +133,11 @@ func (c *CheckAttributeReader) Init(ctx context.Context) error { c.ctx, c.cancel = context.WithCancel(ctx) c.cmd = NewCommandContext(c.ctx, cmdArgs...) - c.stdinReader, c.stdinWriter = io.Pipe() + var err error + c.stdinReader, c.stdinWriter, err = os.Pipe() + if err != nil { + return err + } if CheckGitVersionAtLeast("1.8.5") == nil { lw := new(nulSeparatedAttributeWriter) @@ -149,9 +160,9 @@ func (c *CheckAttributeReader) Run() error { close(c.running) return nil }) - if err != nil && (err != context.Canceled || err != context.DeadlineExceeded) { - defer c.cancel() - c.stdOut.Close() + defer c.cancel() + _ = c.stdOut.Close() + if err != nil && c.ctx.Err() != nil && err.Error() != "signal: killed" { return fmt.Errorf("failed to run attr-check. Error: %w\nStderr: %s", err, stdErr.String()) } @@ -166,6 +177,7 @@ func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) case <-c.running: } _, err := c.stdinWriter.Write([]byte(path + "\x00")) + _ = c.stdinWriter.Sync() if err != nil { defer c.cancel() return nil, err @@ -213,13 +225,9 @@ type nulSeparatedAttributeWriter struct { } func (wr *nulSeparatedAttributeWriter) Write(p []byte) (n int, err error) { - if wr.attributes == nil { - wr.attributes = make(chan attributeTriple, 5) - } - - nulIdx := bytes.IndexByte(p, '\x00') l, read := len(p), 0 + nulIdx := bytes.IndexByte(p, '\x00') for nulIdx >= 0 { wr.tmp = append(wr.tmp, p[:nulIdx]...) switch wr.pos { diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index cdafa1ab20612..b77425c2224cb 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -30,6 +30,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" @@ -1270,11 +1271,18 @@ func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID indexFilename, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(afterCommitID) if err == nil { defer deleteTemporaryFile() + workdir, err := ioutil.TempDir("", "empty-work-dir") + if err != nil { + log.Error("Unable to create temporary directory: %v", err) + return nil, err + } + defer util.RemoveAll(workdir) checker = &git.CheckAttributeReader{ Attributes: []string{"linguist-vendored", "linguist-generated"}, Repo: gitRepo, IndexFile: indexFilename, + WorkTree: workdir, } ctx, cancel := context.WithCancel(git.DefaultContext) if err := checker.Init(ctx); err != nil { @@ -1282,16 +1290,13 @@ func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID } else { go func() { err = checker.Run() - if err != nil { + if err != nil && err != ctx.Err() { log.Error("Unable to open checker for %s. Error: %v", afterCommitID, err) - cancel() - } else { - log.Info("Done") } + cancel() }() } defer func() { - log.Info("Cancelling the diff context") cancel() }() } @@ -1302,9 +1307,7 @@ func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID gotVendor := false gotGenerated := false if checker != nil { - log.Info("Checking %s", diffFile.Name) attrs, err := checker.CheckPath(diffFile.Name) - log.Info("%v, %v", attrs, err) if err == nil { if vendored, has := attrs["linguist-vendored"]; has { if vendored == "set" || vendored == "true" { From d0f173cae02359ab680db1cc778c770f18396020 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 23 Aug 2021 19:05:04 +0100 Subject: [PATCH 08/12] placate lint Signed-off-by: Andrew Thornton --- services/gitdiff/gitdiff.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index b77425c2224cb..43aa8ad44cd5b 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -1276,7 +1276,9 @@ func GetDiffRangeWithWhitespaceBehavior(repoPath, beforeCommitID, afterCommitID log.Error("Unable to create temporary directory: %v", err) return nil, err } - defer util.RemoveAll(workdir) + defer func() { + _ = util.RemoveAll(workdir) + }() checker = &git.CheckAttributeReader{ Attributes: []string{"linguist-vendored", "linguist-generated"}, From eb662aa4cc5d5463b6afb2c447884db8101ebaaf Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 27 Aug 2021 09:52:25 +0100 Subject: [PATCH 09/12] only hide generated not vendored Signed-off-by: Andrew Thornton --- templates/repo/diff/box.tmpl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 0780a00c014e0..367d329b89db6 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -49,11 +49,11 @@ {{$isImage := or (call $.IsBlobAnImage $blobBase) (call $.IsBlobAnImage $blobHead)}} {{$isCsv := (call $.IsCsvFile $file)}} {{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}} -
+
From b61dc7d64f02a69122e62d1f2537e78c530be5f8 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Fri, 27 Aug 2021 10:29:49 +0100 Subject: [PATCH 10/12] make whole of file clickable to fold Signed-off-by: Andrew Thornton --- templates/repo/diff/box.tmpl | 4 ++-- web_src/js/index.js | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/templates/repo/diff/box.tmpl b/templates/repo/diff/box.tmpl index 367d329b89db6..21565cd44818d 100644 --- a/templates/repo/diff/box.tmpl +++ b/templates/repo/diff/box.tmpl @@ -51,8 +51,8 @@ {{$showFileViewToggle := or $isImage (and (not $file.IsIncomplete) $isCsv)}}

-
- +
+ {{if $file.IsGenerated}} {{svg "octicon-chevron-right" 18}} {{else}} diff --git a/web_src/js/index.js b/web_src/js/index.js index 7e4970c3a062b..6e84a04ee59c1 100644 --- a/web_src/js/index.js +++ b/web_src/js/index.js @@ -2324,8 +2324,9 @@ function initCodeView() { } $(document).on('click', '.fold-file', ({currentTarget}) => { const box = currentTarget.closest('.file-content'); + const chevron = currentTarget.querySelector('a.chevron'); const folded = box.dataset.folded !== 'true'; - currentTarget.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18); + chevron.innerHTML = svg(`octicon-chevron-${folded ? 'right' : 'down'}`, 18); box.dataset.folded = String(folded); }); $(document).on('click', '.blob-excerpt', async ({currentTarget}) => { From 1836ae213cca7f585fbfacbfe446425ae7878551 Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 1 Sep 2021 08:25:45 +0100 Subject: [PATCH 11/12] as per lafriks --- modules/git/repo_language_stats_gogit.go | 2 +- modules/git/repo_language_stats_nogogit.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 94fb7edbb82cb..3abce1f0773de 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -99,7 +99,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { // group languages, such as Pug -> HTML; SCSS -> CSS group := enry.GetLanguageGroup(language) - if group != "" { + if len(group) == 0 { language = group } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 5cc7fd9d36e92..c3b96ea841e06 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -123,7 +123,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { // group languages, such as Pug -> HTML; SCSS -> CSS group := enry.GetLanguageGroup(language) - if group != "" { + if len(group) == 0 { language = group } From 6d26918f6ae0753adef5d84357a109e5a64a7f4e Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 9 Sep 2021 19:27:05 +0100 Subject: [PATCH 12/12] handle sync error Signed-off-by: Andrew Thornton --- modules/git/repo_attribute.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/modules/git/repo_attribute.go b/modules/git/repo_attribute.go index 9c6bcefcacee5..0bd7d7e49c947 100644 --- a/modules/git/repo_attribute.go +++ b/modules/git/repo_attribute.go @@ -176,9 +176,13 @@ func (c *CheckAttributeReader) CheckPath(path string) (map[string]string, error) return nil, c.ctx.Err() case <-c.running: } - _, err := c.stdinWriter.Write([]byte(path + "\x00")) - _ = c.stdinWriter.Sync() - if err != nil { + + if _, err := c.stdinWriter.Write([]byte(path + "\x00")); err != nil { + defer c.cancel() + return nil, err + } + + if err := c.stdinWriter.Sync(); err != nil { defer c.cancel() return nil, err }