Skip to content

Respect Co-authored-by trailers in the contributors graph #30925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions modules/util/commit_trailers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package util

import (
"errors"
"net/mail"
)

// ParseCommitTrailerValueWithAuthor parses a commit trailer value that contains author data.
// Note that it only parses the value and does not consider the trailer key i.e. we just
// parse something like the following:
//
// Foo Bar <foobar@example.com>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like it's a standard format, why not use mail.ParseAddress directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I originally was worried about using this since I wasn't too sure of what mail.ParseAddress accepted (i.e. wasn't familiar with RFC 5322) as input but looking at it now it seems to be a good solution compared to having us parsing the input manually.

It may still make sense to have a utility function around since it looks like we can accept a trailer with no name e.g.:

addrStr := "<hello@world>"
addr, _ := mail.ParseAddress(addrStr)
fmt.Printf("%s", addr.Address)
// hello@world
// Program exited.

Here we can throw an error if addr.Name is empty.

func ParseCommitTrailerValueWithAuthor(value string) (name, email string, err error) {
addr, err := mail.ParseAddress(value)
if err != nil {
return name, email, err
}

if addr.Name == "" {
return name, email, errors.New("commit trailer missing name")
}

name = addr.Name
email = addr.Address

return name, email, nil
}
41 changes: 41 additions & 0 deletions modules/util/commit_trailers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package util

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestParseCommitTrailerValueWithAuthor(t *testing.T) {
cases := []struct {
input string
shouldBeError bool
expectedName string
expectedEmail string
}{
{"Foo Bar <foobar@example.com", true, "", ""},
{"Foo Bar foobar@example.com>", true, "", ""},
{"Foo Bar <>", true, "", ""},
{"Foo Bar <invalid-email-address>", true, "", ""},
{"<foobar@example.com>", true, "", ""},
{" <foobar@example.com>", true, "", ""},
{"Foo Bar <foobar@example.com>", false, "Foo Bar", "foobar@example.com"},
{" Foo Bar <foobar@example.com>", false, "Foo Bar", "foobar@example.com"},
}

for n, c := range cases {
name, email, err := ParseCommitTrailerValueWithAuthor(c.input)
if c.shouldBeError {
assert.Error(t, err, "case %d should be a syntax error", n)
} else {
if err != nil {
assert.Fail(t, "did not expect an error: %v", err)
}
assert.Equal(t, c.expectedName, name, "case %d should have correct name", n)
assert.Equal(t, c.expectedEmail, email, "case %d should have correct email", n)
}
}
}
187 changes: 119 additions & 68 deletions services/repository/contributors_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
)

const (
Expand Down Expand Up @@ -53,10 +54,11 @@ type ContributorData struct {
Weeks map[int64]*WeekData `json:"weeks"`
}

// ExtendedCommitStats contains information for commit stats with author data
// ExtendedCommitStats contains information for commit stats with both author and coauthors data
type ExtendedCommitStats struct {
Author *api.CommitUser `json:"author"`
Stats *api.CommitStats `json:"stats"`
Author *api.CommitUser `json:"author"`
CoAuthors []*api.CommitUser `json:"co_authors"`
Stats *api.CommitStats `json:"stats"`
}

const layout = time.DateOnly
Expand Down Expand Up @@ -125,8 +127,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
_ = stdoutWriter.Close()
}()

gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse")
// AddOptionFormat("--max-count=%d", limit)
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as%n%(trailers:key=Co-authored-by,valueonly=true)", "--reverse")
Copy link
Member

@silverwind silverwind May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked compat on this trailers option and it does appear to be present in git 2.27 at least which is the oldest version where git's site has docs for, so it's likely fine to use:

https://git-scm.com/docs/git-log/2.27.0

gitCmd.AddDynamicArguments(baseCommit.ID.String())

var extendedCommitStats []*ExtendedCommitStats
Expand All @@ -150,6 +151,34 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
authorEmail := strings.TrimSpace(scanner.Text())
scanner.Scan()
date := strings.TrimSpace(scanner.Text())

var coAuthors []*api.CommitUser
emailSet := map[string]bool{}
for scanner.Scan() {
line := scanner.Text()
if line == "" {
// There should be an empty line before we read the commit stats line.
break
}
coAuthorName, coAuthorEmail, err := util.ParseCommitTrailerValueWithAuthor(line)
if authorEmail == coAuthorEmail {
// Authors shouldn't be co-authors too.
continue
}
if err != nil {
continue
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to have a log here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how frequent it is for commits to have invalid Co-authored-by trailers but if it won't cause a problem for admins we could have this as maybe an INFO/DEBUG level log

}
if _, exists := emailSet[coAuthorEmail]; exists {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a hash map here to account trailers with duplicate emails. Is it safe that this hash map is unbounded? If so, should we just ignore all the co-author data for this commit if they reach some certain bound?

continue
}
emailSet[coAuthorEmail] = true
coAuthor := &api.CommitUser{
Identity: api.Identity{Name: coAuthorName, Email: coAuthorEmail},
Date: date,
}
coAuthors = append(coAuthors, coAuthor)
}

scanner.Scan()
stats := strings.TrimSpace(scanner.Text())
if authorName == "" || authorEmail == "" || date == "" || stats == "" {
Expand Down Expand Up @@ -184,7 +213,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
},
Date: date,
},
Stats: &commitStats,
CoAuthors: coAuthors,
Stats: &commitStats,
}
extendedCommitStats = append(extendedCommitStats, res)
}
Expand Down Expand Up @@ -222,8 +252,6 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
return
}

layout := time.DateOnly

unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0)
contributorsCommitStats := make(map[string]*ContributorData)
contributorsCommitStats["total"] = &ContributorData{
Expand All @@ -237,67 +265,16 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
if len(userEmail) == 0 {
continue
}
u, _ := user_model.GetUserByEmail(ctx, userEmail)
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u.GetEmail()
}
// duplicated logic
if _, ok := contributorsCommitStats[userEmail]; !ok {
if u == nil {
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
if avatarLink == "" {
avatarLink = unknownUserAvatarLink
}
contributorsCommitStats[userEmail] = &ContributorData{
Name: v.Author.Name,
AvatarLink: avatarLink,
Weeks: make(map[int64]*WeekData),
}
} else {
contributorsCommitStats[userEmail] = &ContributorData{
Name: u.DisplayName(),
Login: u.LowerName,
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
HomeLink: u.HomeLink(),
Weeks: make(map[int64]*WeekData),
}
}
}
// Update user statistics
user := contributorsCommitStats[userEmail]
startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date)

val, _ := time.Parse(layout, startingOfWeek)
week := val.UnixMilli()

if user.Weeks[week] == nil {
user.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
if total.Weeks[week] == nil {
total.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}

authorData := getContributorData(ctx, contributorsCommitStats, v.Author, unknownUserAvatarLink)
date := v.Author.Date
stats := v.Stats
updateUserAndOverallStats(stats, date, authorData, total, false)

for _, coAuthor := range v.CoAuthors {
coAuthorData := getContributorData(ctx, contributorsCommitStats, coAuthor, unknownUserAvatarLink)
updateUserAndOverallStats(stats, date, coAuthorData, total, true)
}
user.Weeks[week].Additions += v.Stats.Additions
user.Weeks[week].Deletions += v.Stats.Deletions
user.Weeks[week].Commits++
user.TotalCommits++

// Update overall statistics
total.Weeks[week].Additions += v.Stats.Additions
total.Weeks[week].Deletions += v.Stats.Deletions
total.Weeks[week].Commits++
total.TotalCommits++
}

_ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout)
Expand All @@ -306,3 +283,77 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
genDone <- struct{}{}
}
}

func getContributorData(ctx context.Context, contributorsCommitStats map[string]*ContributorData, user *api.CommitUser, defaultUserAvatarLink string) *ContributorData {
userEmail := user.Email
u, _ := user_model.GetUserByEmail(ctx, userEmail)
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u.GetEmail()
}

if _, ok := contributorsCommitStats[userEmail]; !ok {
if u == nil {
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
if avatarLink == "" {
avatarLink = defaultUserAvatarLink
}
contributorsCommitStats[userEmail] = &ContributorData{
Name: user.Name,
AvatarLink: avatarLink,
Weeks: make(map[int64]*WeekData),
}
} else {
contributorsCommitStats[userEmail] = &ContributorData{
Name: u.DisplayName(),
Login: u.LowerName,
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
HomeLink: u.HomeLink(),
Weeks: make(map[int64]*WeekData),
}
}
}
return contributorsCommitStats[userEmail]
}

func updateUserAndOverallStats(stats *api.CommitStats, commitDate string, user, total *ContributorData, isCoAuthor bool) {
startingOfWeek, _ := findLastSundayBeforeDate(commitDate)

val, _ := time.Parse(layout, startingOfWeek)
week := val.UnixMilli()

if user.Weeks[week] == nil {
user.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
if total.Weeks[week] == nil {
total.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
// Update user statistics
user.Weeks[week].Additions += stats.Additions
user.Weeks[week].Deletions += stats.Deletions
user.Weeks[week].Commits++
user.TotalCommits++

if isCoAuthor {
// We would have or will count these additions/deletions/commits already when we encounter the original
// author of the commit. Let's avoid this duplication.
return
}

// Update overall statistics
total.Weeks[week].Additions += stats.Additions
total.Weeks[week].Deletions += stats.Deletions
total.Weeks[week].Commits++
total.TotalCommits++
}
Loading
Loading