-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Improve sync performance for pull-mirrors #19125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
6543
merged 25 commits into
go-gitea:main
from
petergardfjall:pull-mirror-tag-sync-optimization
Mar 31, 2022
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
aa3a762
optimize tag-release sync procedure for pull-mirrors
petergardfjall 33f5a5e
optimize tag-release sync procedure for pull-mirrors
petergardfjall 5cd3114
godoc
petergardfjall 4ce50be
please linter
petergardfjall d87be92
please linter: copyright notices
petergardfjall 0e67646
test foreachref.Format
petergardfjall 1fc0bb4
test foreachref.Parser
petergardfjall 6d30da1
stream git for-each-ref output to Parser
petergardfjall 5c91e3d
copyright header for test files
petergardfjall 2847d13
gofumpt
petergardfjall 5d6c962
explicitly ignore return value
petergardfjall 73bb43e
explicitly ignore return value (again)
petergardfjall 4274804
please linter: no encoding/json
petergardfjall 638d690
rename foreachref.Parser test function
petergardfjall 38a1b66
distinguish external imports
petergardfjall 20f6dc5
distinguish external imports part 2
petergardfjall a5220cf
sort prior to pagination
petergardfjall fcd1de6
include payload for annotated tags with signature
petergardfjall ef6352c
less verbose signature payload construction
petergardfjall 2f3908c
code cleanup: remove dead code
petergardfjall aa267d2
delimiters can be byte slices
petergardfjall 61eae9e
avoid closing writer twice
petergardfjall fd7f58c
avoid repetitive memory allocations
petergardfjall c1da4e0
make sure tests add newline to every reference in simulated output
petergardfjall b5ef68f
Merge branch 'main' into pull-mirror-tag-sync-optimization
wxiaoguang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package foreachref | ||
|
||
import ( | ||
"encoding/hex" | ||
"fmt" | ||
"io" | ||
"strings" | ||
) | ||
|
||
var ( | ||
nullChar = []byte("\x00") | ||
dualNullChar = []byte("\x00\x00") | ||
) | ||
|
||
// Format supports specifying and parsing an output format for 'git | ||
// for-each-ref'. See See git-for-each-ref(1) for available fields. | ||
type Format struct { | ||
petergardfjall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// fieldNames hold %(fieldname)s to be passed to the '--format' flag of | ||
// for-each-ref. See git-for-each-ref(1) for available fields. | ||
fieldNames []string | ||
|
||
// fieldDelim is the character sequence that is used to separate fields | ||
// for each reference. fieldDelim and refDelim should be selected to not | ||
// interfere with each other and to not be present in field values. | ||
fieldDelim []byte | ||
// fieldDelimStr is a string representation of fieldDelim. Used to save | ||
// us from repetitive reallocation whenever we need the delimiter as a | ||
// string. | ||
fieldDelimStr string | ||
// refDelim is the character sequence used to separate reference from | ||
// each other in the output. fieldDelim and refDelim should be selected | ||
// to not interfere with each other and to not be present in field | ||
// values. | ||
refDelim []byte | ||
} | ||
|
||
// NewFormat creates a forEachRefFormat using the specified fieldNames. See | ||
// git-for-each-ref(1) for available fields. | ||
func NewFormat(fieldNames ...string) Format { | ||
return Format{ | ||
fieldNames: fieldNames, | ||
fieldDelim: nullChar, | ||
fieldDelimStr: string(nullChar), | ||
refDelim: dualNullChar, | ||
} | ||
} | ||
|
||
// Flag returns a for-each-ref --format flag value that captures the fieldNames. | ||
func (f Format) Flag() string { | ||
var formatFlag strings.Builder | ||
for i, field := range f.fieldNames { | ||
// field key and field value | ||
formatFlag.WriteString(fmt.Sprintf("%s %%(%s)", field, field)) | ||
|
||
if i < len(f.fieldNames)-1 { | ||
// note: escape delimiters to allow control characters as | ||
// delimiters. For example, '%00' for null character or '%0a' | ||
// for newline. | ||
formatFlag.WriteString(f.hexEscaped(f.fieldDelim)) | ||
} | ||
} | ||
formatFlag.WriteString(f.hexEscaped(f.refDelim)) | ||
return formatFlag.String() | ||
} | ||
|
||
// Parser returns a Parser capable of parsing 'git for-each-ref' output produced | ||
// with this Format. | ||
func (f Format) Parser(r io.Reader) *Parser { | ||
return NewParser(r, f) | ||
} | ||
|
||
// hexEscaped produces hex-escpaed characters from a string. For example, "\n\0" | ||
// would turn into "%0a%00". | ||
func (f Format) hexEscaped(delim []byte) string { | ||
escaped := "" | ||
for i := 0; i < len(delim); i++ { | ||
escaped += "%" + hex.EncodeToString([]byte{delim[i]}) | ||
} | ||
return escaped | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package foreachref_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"code.gitea.io/gitea/modules/git/foreachref" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestFormat_Flag(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
|
||
givenFormat foreachref.Format | ||
|
||
wantFlag string | ||
}{ | ||
{ | ||
name: "references are delimited by dual null chars", | ||
|
||
// no reference fields requested | ||
givenFormat: foreachref.NewFormat(), | ||
|
||
// only a reference delimiter field in --format | ||
wantFlag: "%00%00", | ||
}, | ||
|
||
{ | ||
name: "a field is a space-separated key-value pair", | ||
|
||
givenFormat: foreachref.NewFormat("refname:short"), | ||
|
||
// only a reference delimiter field | ||
wantFlag: "refname:short %(refname:short)%00%00", | ||
}, | ||
|
||
{ | ||
name: "fields are separated by a null char field-delimiter", | ||
|
||
givenFormat: foreachref.NewFormat("refname:short", "author"), | ||
|
||
wantFlag: "refname:short %(refname:short)%00author %(author)%00%00", | ||
}, | ||
|
||
{ | ||
name: "multiple fields", | ||
|
||
givenFormat: foreachref.NewFormat("refname:short", "objecttype", "objectname"), | ||
|
||
wantFlag: "refname:short %(refname:short)%00objecttype %(objecttype)%00objectname %(objectname)%00%00", | ||
}, | ||
} | ||
|
||
for _, test := range tests { | ||
tc := test // don't close over loop variable | ||
t.Run(tc.name, func(t *testing.T) { | ||
gotFlag := tc.givenFormat.Flag() | ||
|
||
require.Equal(t, tc.wantFlag, gotFlag, "unexpected for-each-ref --format string. wanted: '%s', got: '%s'", tc.wantFlag, gotFlag) | ||
}) | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package foreachref | ||
petergardfjall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
import ( | ||
"bufio" | ||
"bytes" | ||
"fmt" | ||
"io" | ||
"strings" | ||
) | ||
|
||
// Parser parses 'git for-each-ref' output according to a given output Format. | ||
type Parser struct { | ||
petergardfjall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// tokenizes 'git for-each-ref' output into "reference paragraphs". | ||
scanner *bufio.Scanner | ||
|
||
// format represents the '--format' string that describes the expected | ||
// 'git for-each-ref' output structure. | ||
format Format | ||
|
||
// err holds the last encountered error during parsing. | ||
err error | ||
} | ||
|
||
// NewParser creates a 'git for-each-ref' output parser that will parse all | ||
// references in the provided Reader. The references in the output are assumed | ||
// to follow the specified Format. | ||
func NewParser(r io.Reader, format Format) *Parser { | ||
scanner := bufio.NewScanner(r) | ||
|
||
// in addition to the reference delimiter we specified in the --format, | ||
// `git for-each-ref` will always add a newline after every reference. | ||
refDelim := make([]byte, 0, len(format.refDelim)+1) | ||
refDelim = append(refDelim, format.refDelim...) | ||
refDelim = append(refDelim, '\n') | ||
|
||
// Split input into delimiter-separated "reference blocks". | ||
scanner.Split( | ||
func(data []byte, atEOF bool) (advance int, token []byte, err error) { | ||
// Scan until delimiter, marking end of reference. | ||
delimIdx := bytes.Index(data, refDelim) | ||
if delimIdx >= 0 { | ||
token := data[:delimIdx] | ||
advance := delimIdx + len(refDelim) | ||
return advance, token, nil | ||
} | ||
// If we're at EOF, we have a final, non-terminated reference. Return it. | ||
if atEOF { | ||
return len(data), data, nil | ||
} | ||
// Not yet a full field. Request more data. | ||
return 0, nil, nil | ||
}) | ||
|
||
return &Parser{ | ||
scanner: scanner, | ||
format: format, | ||
err: nil, | ||
} | ||
} | ||
|
||
// Next returns the next reference as a collection of key-value pairs. nil | ||
// denotes EOF but is also returned on errors. The Err method should always be | ||
// consulted after Next returning nil. | ||
// | ||
// It could, for example return something like: | ||
// | ||
// { "objecttype": "tag", "refname:short": "v1.16.4", "object": "f460b7543ed500e49c133c2cd85c8c55ee9dbe27" } | ||
// | ||
func (p *Parser) Next() map[string]string { | ||
if !p.scanner.Scan() { | ||
return nil | ||
} | ||
fields, err := p.parseRef(p.scanner.Text()) | ||
if err != nil { | ||
p.err = err | ||
return nil | ||
} | ||
return fields | ||
} | ||
|
||
// Err returns the latest encountered parsing error. | ||
func (p *Parser) Err() error { | ||
return p.err | ||
} | ||
|
||
// parseRef parses out all key-value pairs from a single reference block, such as | ||
// | ||
// "objecttype tag\0refname:short v1.16.4\0object f460b7543ed500e49c133c2cd85c8c55ee9dbe27" | ||
// | ||
func (p *Parser) parseRef(refBlock string) (map[string]string, error) { | ||
if refBlock == "" { | ||
// must be at EOF | ||
return nil, nil | ||
} | ||
|
||
fieldValues := make(map[string]string) | ||
|
||
fields := strings.Split(refBlock, p.format.fieldDelimStr) | ||
if len(fields) != len(p.format.fieldNames) { | ||
return nil, fmt.Errorf("unexpected number of reference fields: wanted %d, was %d", | ||
len(fields), len(p.format.fieldNames)) | ||
} | ||
for i, field := range fields { | ||
field = strings.TrimSpace(field) | ||
|
||
var fieldKey string | ||
var fieldVal string | ||
firstSpace := strings.Index(field, " ") | ||
if firstSpace > 0 { | ||
fieldKey = field[:firstSpace] | ||
fieldVal = field[firstSpace+1:] | ||
} else { | ||
// could be the case if the requested field had no value | ||
fieldKey = field | ||
} | ||
|
||
// enforce the format order of fields | ||
if p.format.fieldNames[i] != fieldKey { | ||
return nil, fmt.Errorf("unexpected field name at position %d: wanted: '%s', was: '%s'", | ||
i, p.format.fieldNames[i], fieldKey) | ||
} | ||
|
||
fieldValues[fieldKey] = fieldVal | ||
} | ||
|
||
return fieldValues, nil | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.