-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Respect Co-authored-by trailers in the contributors graph #30925
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ea40999
99953fa
9ac9327
cce7812
25a9f12
8e3209b
fd0bf3b
6c713da
78fecfc
17205fa
22d8652
b880209
f2e114e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright 2024 The Gitea Authors. All rights reserved. | ||
// SPDX-License-Identifier: MIT | ||
|
||
package util | ||
|
||
import ( | ||
"errors" | ||
"net/mail" | ||
) | ||
|
||
// ParseCommitTrailerValueWithAuthor parses a commit trailer value that contains author data. | ||
// Note that it only parses the value and does not consider the trailer key i.e. we just | ||
// parse something like the following: | ||
// | ||
// Foo Bar <foobar@example.com> | ||
func ParseCommitTrailerValueWithAuthor(value string) (name, email string, err error) { | ||
addr, err := mail.ParseAddress(value) | ||
if err != nil { | ||
return name, email, err | ||
} | ||
|
||
if addr.Name == "" { | ||
return name, email, errors.New("commit trailer missing name") | ||
} | ||
|
||
name = addr.Name | ||
email = addr.Address | ||
|
||
return name, email, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Copyright 2024 The Gitea Authors. All rights reserved. | ||
// SPDX-License-Identifier: MIT | ||
|
||
package util | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestParseCommitTrailerValueWithAuthor(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
shouldBeError bool | ||
expectedName string | ||
expectedEmail string | ||
}{ | ||
{"Foo Bar <foobar@example.com", true, "", ""}, | ||
{"Foo Bar foobar@example.com>", true, "", ""}, | ||
{"Foo Bar <>", true, "", ""}, | ||
{"Foo Bar <invalid-email-address>", true, "", ""}, | ||
{"<foobar@example.com>", true, "", ""}, | ||
{" <foobar@example.com>", true, "", ""}, | ||
{"Foo Bar <foobar@example.com>", false, "Foo Bar", "foobar@example.com"}, | ||
{" Foo Bar <foobar@example.com>", false, "Foo Bar", "foobar@example.com"}, | ||
} | ||
|
||
for n, c := range cases { | ||
name, email, err := ParseCommitTrailerValueWithAuthor(c.input) | ||
if c.shouldBeError { | ||
assert.Error(t, err, "case %d should be a syntax error", n) | ||
} else { | ||
if err != nil { | ||
assert.Fail(t, "did not expect an error: %v", err) | ||
} | ||
assert.Equal(t, c.expectedName, name, "case %d should have correct name", n) | ||
assert.Equal(t, c.expectedEmail, email, "case %d should have correct email", n) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ import ( | |
"code.gitea.io/gitea/modules/graceful" | ||
"code.gitea.io/gitea/modules/log" | ||
api "code.gitea.io/gitea/modules/structs" | ||
"code.gitea.io/gitea/modules/util" | ||
) | ||
|
||
const ( | ||
|
@@ -53,10 +54,11 @@ type ContributorData struct { | |
Weeks map[int64]*WeekData `json:"weeks"` | ||
} | ||
|
||
// ExtendedCommitStats contains information for commit stats with author data | ||
// ExtendedCommitStats contains information for commit stats with both author and coauthors data | ||
type ExtendedCommitStats struct { | ||
Author *api.CommitUser `json:"author"` | ||
Stats *api.CommitStats `json:"stats"` | ||
Author *api.CommitUser `json:"author"` | ||
CoAuthors []*api.CommitUser `json:"co_authors"` | ||
Stats *api.CommitStats `json:"stats"` | ||
} | ||
|
||
const layout = time.DateOnly | ||
|
@@ -125,8 +127,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int | |
_ = stdoutWriter.Close() | ||
}() | ||
|
||
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse") | ||
// AddOptionFormat("--max-count=%d", limit) | ||
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as%n%(trailers:key=Co-authored-by,valueonly=true)", "--reverse") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked compat on this |
||
gitCmd.AddDynamicArguments(baseCommit.ID.String()) | ||
|
||
var extendedCommitStats []*ExtendedCommitStats | ||
|
@@ -150,6 +151,34 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int | |
authorEmail := strings.TrimSpace(scanner.Text()) | ||
scanner.Scan() | ||
date := strings.TrimSpace(scanner.Text()) | ||
|
||
var coAuthors []*api.CommitUser | ||
emailSet := map[string]bool{} | ||
for scanner.Scan() { | ||
line := scanner.Text() | ||
if line == "" { | ||
// There should be an empty line before we read the commit stats line. | ||
break | ||
} | ||
coAuthorName, coAuthorEmail, err := util.ParseCommitTrailerValueWithAuthor(line) | ||
if authorEmail == coAuthorEmail { | ||
// Authors shouldn't be co-authors too. | ||
continue | ||
} | ||
if err != nil { | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to have a log here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure how frequent it is for commits to have invalid |
||
} | ||
if _, exists := emailSet[coAuthorEmail]; exists { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a hash map here to account trailers with duplicate emails. Is it safe that this hash map is unbounded? If so, should we just ignore all the co-author data for this commit if they reach some certain bound? |
||
continue | ||
} | ||
emailSet[coAuthorEmail] = true | ||
coAuthor := &api.CommitUser{ | ||
Identity: api.Identity{Name: coAuthorName, Email: coAuthorEmail}, | ||
Date: date, | ||
} | ||
coAuthors = append(coAuthors, coAuthor) | ||
} | ||
|
||
scanner.Scan() | ||
stats := strings.TrimSpace(scanner.Text()) | ||
if authorName == "" || authorEmail == "" || date == "" || stats == "" { | ||
|
@@ -184,7 +213,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int | |
}, | ||
Date: date, | ||
}, | ||
Stats: &commitStats, | ||
CoAuthors: coAuthors, | ||
Stats: &commitStats, | ||
} | ||
extendedCommitStats = append(extendedCommitStats, res) | ||
} | ||
|
@@ -222,8 +252,6 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca | |
return | ||
} | ||
|
||
layout := time.DateOnly | ||
|
||
unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0) | ||
contributorsCommitStats := make(map[string]*ContributorData) | ||
contributorsCommitStats["total"] = &ContributorData{ | ||
|
@@ -237,67 +265,16 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca | |
if len(userEmail) == 0 { | ||
continue | ||
} | ||
u, _ := user_model.GetUserByEmail(ctx, userEmail) | ||
if u != nil { | ||
// update userEmail with user's primary email address so | ||
// that different mail addresses will linked to same account | ||
userEmail = u.GetEmail() | ||
} | ||
// duplicated logic | ||
if _, ok := contributorsCommitStats[userEmail]; !ok { | ||
if u == nil { | ||
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0) | ||
if avatarLink == "" { | ||
avatarLink = unknownUserAvatarLink | ||
} | ||
contributorsCommitStats[userEmail] = &ContributorData{ | ||
Name: v.Author.Name, | ||
AvatarLink: avatarLink, | ||
Weeks: make(map[int64]*WeekData), | ||
} | ||
} else { | ||
contributorsCommitStats[userEmail] = &ContributorData{ | ||
Name: u.DisplayName(), | ||
Login: u.LowerName, | ||
AvatarLink: u.AvatarLinkWithSize(ctx, 0), | ||
HomeLink: u.HomeLink(), | ||
Weeks: make(map[int64]*WeekData), | ||
} | ||
} | ||
} | ||
// Update user statistics | ||
user := contributorsCommitStats[userEmail] | ||
startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date) | ||
|
||
val, _ := time.Parse(layout, startingOfWeek) | ||
week := val.UnixMilli() | ||
|
||
if user.Weeks[week] == nil { | ||
user.Weeks[week] = &WeekData{ | ||
Additions: 0, | ||
Deletions: 0, | ||
Commits: 0, | ||
Week: week, | ||
} | ||
} | ||
if total.Weeks[week] == nil { | ||
total.Weeks[week] = &WeekData{ | ||
Additions: 0, | ||
Deletions: 0, | ||
Commits: 0, | ||
Week: week, | ||
} | ||
|
||
authorData := getContributorData(ctx, contributorsCommitStats, v.Author, unknownUserAvatarLink) | ||
date := v.Author.Date | ||
stats := v.Stats | ||
updateUserAndOverallStats(stats, date, authorData, total, false) | ||
|
||
for _, coAuthor := range v.CoAuthors { | ||
coAuthorData := getContributorData(ctx, contributorsCommitStats, coAuthor, unknownUserAvatarLink) | ||
updateUserAndOverallStats(stats, date, coAuthorData, total, true) | ||
} | ||
user.Weeks[week].Additions += v.Stats.Additions | ||
user.Weeks[week].Deletions += v.Stats.Deletions | ||
user.Weeks[week].Commits++ | ||
user.TotalCommits++ | ||
|
||
// Update overall statistics | ||
total.Weeks[week].Additions += v.Stats.Additions | ||
total.Weeks[week].Deletions += v.Stats.Deletions | ||
total.Weeks[week].Commits++ | ||
total.TotalCommits++ | ||
} | ||
|
||
_ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout) | ||
|
@@ -306,3 +283,77 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca | |
genDone <- struct{}{} | ||
} | ||
} | ||
|
||
func getContributorData(ctx context.Context, contributorsCommitStats map[string]*ContributorData, user *api.CommitUser, defaultUserAvatarLink string) *ContributorData { | ||
userEmail := user.Email | ||
u, _ := user_model.GetUserByEmail(ctx, userEmail) | ||
if u != nil { | ||
// update userEmail with user's primary email address so | ||
// that different mail addresses will linked to same account | ||
userEmail = u.GetEmail() | ||
} | ||
|
||
if _, ok := contributorsCommitStats[userEmail]; !ok { | ||
if u == nil { | ||
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0) | ||
if avatarLink == "" { | ||
avatarLink = defaultUserAvatarLink | ||
} | ||
contributorsCommitStats[userEmail] = &ContributorData{ | ||
Name: user.Name, | ||
AvatarLink: avatarLink, | ||
Weeks: make(map[int64]*WeekData), | ||
} | ||
} else { | ||
contributorsCommitStats[userEmail] = &ContributorData{ | ||
Name: u.DisplayName(), | ||
Login: u.LowerName, | ||
AvatarLink: u.AvatarLinkWithSize(ctx, 0), | ||
HomeLink: u.HomeLink(), | ||
Weeks: make(map[int64]*WeekData), | ||
} | ||
} | ||
} | ||
return contributorsCommitStats[userEmail] | ||
} | ||
|
||
func updateUserAndOverallStats(stats *api.CommitStats, commitDate string, user, total *ContributorData, isCoAuthor bool) { | ||
startingOfWeek, _ := findLastSundayBeforeDate(commitDate) | ||
|
||
val, _ := time.Parse(layout, startingOfWeek) | ||
week := val.UnixMilli() | ||
|
||
if user.Weeks[week] == nil { | ||
user.Weeks[week] = &WeekData{ | ||
Additions: 0, | ||
Deletions: 0, | ||
Commits: 0, | ||
Week: week, | ||
} | ||
} | ||
if total.Weeks[week] == nil { | ||
total.Weeks[week] = &WeekData{ | ||
Additions: 0, | ||
Deletions: 0, | ||
Commits: 0, | ||
Week: week, | ||
} | ||
} | ||
// Update user statistics | ||
user.Weeks[week].Additions += stats.Additions | ||
user.Weeks[week].Deletions += stats.Deletions | ||
user.Weeks[week].Commits++ | ||
user.TotalCommits++ | ||
|
||
if isCoAuthor { | ||
// We would have or will count these additions/deletions/commits already when we encounter the original | ||
// author of the commit. Let's avoid this duplication. | ||
return | ||
} | ||
|
||
// Update overall statistics | ||
total.Weeks[week].Additions += stats.Additions | ||
total.Weeks[week].Deletions += stats.Deletions | ||
total.Weeks[week].Commits++ | ||
total.TotalCommits++ | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like it's a standard format, why not use
mail.ParseAddress
directly?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I originally was worried about using this since I wasn't too sure of what
mail.ParseAddress
accepted (i.e. wasn't familiar with RFC 5322) as input but looking at it now it seems to be a good solution compared to having us parsing the input manually.It may still make sense to have a utility function around since it looks like we can accept a trailer with no name e.g.:
Here we can throw an error if
addr.Name
is empty.