Skip to content

Commit ce88e34

Browse files
randall77gopherbot
authored andcommitted
cmd/compile: allocate backing store for append on the stack
When appending, if the backing store doesn't escape and a constant-sized backing store is big enough, use a constant-sized stack-allocated backing store instead of allocating it from the heap. cmd/go is <0.1% bigger. As an example of how this helps, if you edit strings/strings.go:FieldsFunc to replace spans := make([]span, 0, 32) with var spans []span then this CL removes the first 2 allocations that are part of the growth sequence: │ base │ exp │ │ allocs/op │ allocs/op vs base │ FieldsFunc/ASCII/16-24 3.000 ± ∞ ¹ 2.000 ± ∞ ¹ -33.33% (p=0.008 n=5) FieldsFunc/ASCII/256-24 7.000 ± ∞ ¹ 5.000 ± ∞ ¹ -28.57% (p=0.008 n=5) FieldsFunc/ASCII/4096-24 11.000 ± ∞ ¹ 9.000 ± ∞ ¹ -18.18% (p=0.008 n=5) FieldsFunc/ASCII/65536-24 18.00 ± ∞ ¹ 16.00 ± ∞ ¹ -11.11% (p=0.008 n=5) FieldsFunc/ASCII/1048576-24 30.00 ± ∞ ¹ 28.00 ± ∞ ¹ -6.67% (p=0.008 n=5) FieldsFunc/Mixed/16-24 2.000 ± ∞ ¹ 2.000 ± ∞ ¹ ~ (p=1.000 n=5) FieldsFunc/Mixed/256-24 7.000 ± ∞ ¹ 5.000 ± ∞ ¹ -28.57% (p=0.008 n=5) FieldsFunc/Mixed/4096-24 11.000 ± ∞ ¹ 9.000 ± ∞ ¹ -18.18% (p=0.008 n=5) FieldsFunc/Mixed/65536-24 18.00 ± ∞ ¹ 16.00 ± ∞ ¹ -11.11% (p=0.008 n=5) FieldsFunc/Mixed/1048576-24 30.00 ± ∞ ¹ 28.00 ± ∞ ¹ -6.67% (p=0.008 n=5) (Of course, people have spotted and fixed a bunch of allocation sites like this, but now we're ~automatically doing it everywhere going forward.) No significant increases in frame sizes in cmd/go. Change-Id: I301c4d9676667eacdae0058960321041d173751a Reviewed-on: https://go-review.googlesource.com/c/go/+/664299 Reviewed-by: Keith Randall <khr@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Keith Randall <khr@golang.org>
1 parent 3baf53a commit ce88e34

File tree

13 files changed

+255
-81
lines changed

13 files changed

+255
-81
lines changed

src/cmd/compile/internal/escape/call.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,14 @@ func (e *escape) call(ks []hole, call ir.Node) {
159159
}
160160
e.discard(call.RType)
161161

162+
// Model the new backing store that might be allocated by append.
163+
// Its address flows to the result.
164+
// Users of escape analysis can look at the escape information for OAPPEND
165+
// and use that to decide where to allocate the backing store.
166+
backingStore := e.spill(ks[0], call)
167+
// As we have a boolean to prevent reuse, we can treat these allocations as outside any loops.
168+
backingStore.dst.loopDepth = 0
169+
162170
case ir.OCOPY:
163171
call := call.(*ir.BinaryExpr)
164172
argument(e.mutatorHole(), call.X)

src/cmd/compile/internal/escape/escape.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,11 @@ func (b *batch) finish(fns []*ir.Func) {
306306
}
307307
} else {
308308
if base.Flag.LowerM != 0 && !goDeferWrapper {
309-
base.WarnfAt(n.Pos(), "%v escapes to heap", n)
309+
if n.Op() == ir.OAPPEND {
310+
base.WarnfAt(n.Pos(), "append escapes to heap")
311+
} else {
312+
base.WarnfAt(n.Pos(), "%v escapes to heap", n)
313+
}
310314
}
311315
if logopt.Enabled() {
312316
var e_curfn *ir.Func // TODO(mdempsky): Fix.
@@ -316,7 +320,11 @@ func (b *batch) finish(fns []*ir.Func) {
316320
n.SetEsc(ir.EscHeap)
317321
} else {
318322
if base.Flag.LowerM != 0 && n.Op() != ir.ONAME && !goDeferWrapper {
319-
base.WarnfAt(n.Pos(), "%v does not escape", n)
323+
if n.Op() == ir.OAPPEND {
324+
base.WarnfAt(n.Pos(), "append does not escape")
325+
} else {
326+
base.WarnfAt(n.Pos(), "%v does not escape", n)
327+
}
320328
}
321329
n.SetEsc(ir.EscNone)
322330
if !loc.hasAttr(attrPersists) {

src/cmd/compile/internal/ssagen/ssa.go

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,9 @@ type state struct {
10541054
// They are all (OffPtr (Select0 (runtime call))) and have the correct types,
10551055
// but the offsets are not set yet, and the type of the runtime call is also not final.
10561056
pendingHeapAllocations []*ssa.Value
1057+
1058+
// First argument of append calls that could be stack allocated.
1059+
appendTargets map[ir.Node]bool
10571060
}
10581061

10591062
type funcLine struct {
@@ -3735,6 +3738,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
37353738

37363739
// Add number of new elements to length.
37373740
nargs := s.constInt(types.Types[types.TINT], int64(len(n.Args)-1))
3741+
oldLen := l
37383742
l = s.newValue2(s.ssaOp(ir.OADD, types.Types[types.TINT]), types.Types[types.TINT], l, nargs)
37393743

37403744
// Decide if we need to grow
@@ -3754,6 +3758,123 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
37543758
b.AddEdgeTo(grow)
37553759
b.AddEdgeTo(assign)
37563760

3761+
// If the result of the append does not escape, we can use
3762+
// a stack-allocated backing store if len is small enough.
3763+
// A stack-allocated backing store could be used at every
3764+
// append that qualifies, but we limit it in some cases to
3765+
// avoid wasted code and stack space.
3766+
// TODO: handle ... append case.
3767+
maxStackSize := int64(base.Debug.VariableMakeThreshold)
3768+
if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] {
3769+
// if l <= K {
3770+
// if !used {
3771+
// if oldLen == 0 {
3772+
// var store [K]T
3773+
// s = store[:l:K]
3774+
// used = true
3775+
// }
3776+
// }
3777+
// }
3778+
// ... if we didn't use the stack backing store, call growslice ...
3779+
//
3780+
// oldLen==0 is not strictly necessary, but requiring it means
3781+
// we don't have to worry about copying existing elements.
3782+
// Allowing oldLen>0 would add complication. Worth it? I would guess not.
3783+
//
3784+
// TODO: instead of the used boolean, we could insist that this only applies
3785+
// to monotonic slices, those which once they have >0 entries never go back
3786+
// to 0 entries. Then oldLen==0 is enough.
3787+
//
3788+
// We also do this for append(x, ...) once for every x.
3789+
// It is ok to do it more often, but it is probably helpful only for
3790+
// the first instance. TODO: this could use more tuning. Using ir.Node
3791+
// as the key works for *ir.Name instances but probably nothing else.
3792+
if s.appendTargets == nil {
3793+
s.appendTargets = map[ir.Node]bool{}
3794+
}
3795+
s.appendTargets[sn] = true
3796+
3797+
K := maxStackSize / et.Size() // rounds down
3798+
KT := types.NewArray(et, K)
3799+
KT.SetNoalg(true)
3800+
types.CalcArraySize(KT)
3801+
// Align more than naturally for the type KT. See issue 73199.
3802+
align := types.NewArray(types.Types[types.TUINTPTR], 0)
3803+
types.CalcArraySize(align)
3804+
storeTyp := types.NewStruct([]*types.Field{
3805+
{Sym: types.BlankSym, Type: align},
3806+
{Sym: types.BlankSym, Type: KT},
3807+
})
3808+
storeTyp.SetNoalg(true)
3809+
types.CalcStructSize(storeTyp)
3810+
3811+
usedTestBlock := s.f.NewBlock(ssa.BlockPlain)
3812+
oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain)
3813+
bodyBlock := s.f.NewBlock(ssa.BlockPlain)
3814+
growSlice := s.f.NewBlock(ssa.BlockPlain)
3815+
3816+
// Make "used" boolean.
3817+
tBool := types.Types[types.TBOOL]
3818+
used := typecheck.TempAt(n.Pos(), s.curfn, tBool)
3819+
s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry
3820+
3821+
// Make backing store variable.
3822+
tInt := types.Types[types.TINT]
3823+
backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
3824+
backingStore.SetAddrtaken(true)
3825+
3826+
// if l <= K
3827+
s.startBlock(grow)
3828+
kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K))
3829+
b := s.endBlock()
3830+
b.Kind = ssa.BlockIf
3831+
b.SetControl(kTest)
3832+
b.AddEdgeTo(usedTestBlock)
3833+
b.AddEdgeTo(growSlice)
3834+
b.Likely = ssa.BranchLikely
3835+
3836+
// if !used
3837+
s.startBlock(usedTestBlock)
3838+
usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used))
3839+
b = s.endBlock()
3840+
b.Kind = ssa.BlockIf
3841+
b.SetControl(usedTest)
3842+
b.AddEdgeTo(oldLenTestBlock)
3843+
b.AddEdgeTo(growSlice)
3844+
b.Likely = ssa.BranchLikely
3845+
3846+
// if oldLen == 0
3847+
s.startBlock(oldLenTestBlock)
3848+
oldLenTest := s.newValue2(s.ssaOp(ir.OEQ, tInt), tBool, oldLen, s.constInt(tInt, 0))
3849+
b = s.endBlock()
3850+
b.Kind = ssa.BlockIf
3851+
b.SetControl(oldLenTest)
3852+
b.AddEdgeTo(bodyBlock)
3853+
b.AddEdgeTo(growSlice)
3854+
b.Likely = ssa.BranchLikely
3855+
3856+
// var store struct { _ [0]uintptr; arr [K]T }
3857+
s.startBlock(bodyBlock)
3858+
if et.HasPointers() {
3859+
s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem())
3860+
}
3861+
addr := s.addr(backingStore)
3862+
s.zero(storeTyp, addr)
3863+
3864+
// s = store.arr[:l:K]
3865+
s.vars[ptrVar] = addr
3866+
s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test.
3867+
s.vars[capVar] = s.constInt(tInt, K)
3868+
3869+
// used = true
3870+
s.assign(used, s.constBool(true), false, 0)
3871+
b = s.endBlock()
3872+
b.AddEdgeTo(assign)
3873+
3874+
// New block to use for growslice call.
3875+
grow = growSlice
3876+
}
3877+
37573878
// Call growslice
37583879
s.startBlock(grow)
37593880
taddr := s.expr(n.Fun)
@@ -3816,7 +3937,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
38163937
}
38173938

38183939
// Write args into slice.
3819-
oldLen := s.newValue2(s.ssaOp(ir.OSUB, types.Types[types.TINT]), types.Types[types.TINT], l, nargs)
3940+
oldLen = s.newValue2(s.ssaOp(ir.OSUB, types.Types[types.TINT]), types.Types[types.TINT], l, nargs)
38203941
p2 := s.newValue2(ssa.OpPtrIndex, pt, p, oldLen)
38213942
for i, arg := range args {
38223943
addr := s.newValue2(ssa.OpPtrIndex, pt, p2, s.constInt(types.Types[types.TINT], int64(i)))

src/cmd/compile/internal/types/size.go

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -388,52 +388,8 @@ func CalcSize(t *Type) {
388388
if t.Elem() == nil {
389389
break
390390
}
391-
392-
CalcSize(t.Elem())
393-
t.SetNotInHeap(t.Elem().NotInHeap())
394-
if t.Elem().width != 0 {
395-
cap := (uint64(MaxWidth) - 1) / uint64(t.Elem().width)
396-
if uint64(t.NumElem()) > cap {
397-
base.Errorf("type %L larger than address space", t)
398-
}
399-
}
400-
w = t.NumElem() * t.Elem().width
401-
t.align = t.Elem().align
402-
403-
// ABIInternal only allows "trivial" arrays (i.e., length 0 or 1)
404-
// to be passed by register.
405-
switch t.NumElem() {
406-
case 0:
407-
t.intRegs = 0
408-
t.floatRegs = 0
409-
case 1:
410-
t.intRegs = t.Elem().intRegs
411-
t.floatRegs = t.Elem().floatRegs
412-
default:
413-
t.intRegs = math.MaxUint8
414-
t.floatRegs = math.MaxUint8
415-
}
416-
switch a := t.Elem().alg; a {
417-
case AMEM, ANOEQ, ANOALG:
418-
t.setAlg(a)
419-
default:
420-
switch t.NumElem() {
421-
case 0:
422-
// We checked above that the element type is comparable.
423-
t.setAlg(AMEM)
424-
case 1:
425-
// Single-element array is same as its lone element.
426-
t.setAlg(a)
427-
default:
428-
t.setAlg(ASPECIAL)
429-
}
430-
}
431-
if t.NumElem() > 0 {
432-
x := PtrDataSize(t.Elem())
433-
if x > 0 {
434-
t.ptrBytes = t.Elem().width*(t.NumElem()-1) + x
435-
}
436-
}
391+
CalcArraySize(t)
392+
w = t.width
437393

438394
case TSLICE:
439395
if t.Elem() == nil {
@@ -586,6 +542,63 @@ func CalcStructSize(t *Type) {
586542
}
587543
}
588544

545+
// CalcArraySize calculates the size of t,
546+
// filling in t.width, t.align, t.alg, and t.ptrBytes,
547+
// even if size calculation is otherwise disabled.
548+
func CalcArraySize(t *Type) {
549+
elem := t.Elem()
550+
n := t.NumElem()
551+
CalcSize(elem)
552+
t.SetNotInHeap(elem.NotInHeap())
553+
if elem.width != 0 {
554+
cap := (uint64(MaxWidth) - 1) / uint64(elem.width)
555+
if uint64(n) > cap {
556+
base.Errorf("type %L larger than address space", t)
557+
}
558+
}
559+
560+
t.width = elem.width * n
561+
t.align = elem.align
562+
// ABIInternal only allows "trivial" arrays (i.e., length 0 or 1)
563+
// to be passed by register.
564+
switch n {
565+
case 0:
566+
t.intRegs = 0
567+
t.floatRegs = 0
568+
case 1:
569+
t.intRegs = elem.intRegs
570+
t.floatRegs = elem.floatRegs
571+
default:
572+
t.intRegs = math.MaxUint8
573+
t.floatRegs = math.MaxUint8
574+
}
575+
t.alg = AMEM // default
576+
if t.Noalg() {
577+
t.setAlg(ANOALG)
578+
}
579+
switch a := elem.alg; a {
580+
case AMEM, ANOEQ, ANOALG:
581+
t.setAlg(a)
582+
default:
583+
switch n {
584+
case 0:
585+
// We checked above that the element type is comparable.
586+
t.setAlg(AMEM)
587+
case 1:
588+
// Single-element array is same as its lone element.
589+
t.setAlg(a)
590+
default:
591+
t.setAlg(ASPECIAL)
592+
}
593+
}
594+
if n > 0 {
595+
x := PtrDataSize(elem)
596+
if x > 0 {
597+
t.ptrBytes = elem.width*(n-1) + x
598+
}
599+
}
600+
}
601+
589602
func (t *Type) widthCalculated() bool {
590603
return t.align > 0
591604
}

src/runtime/runtime_test.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"fmt"
1010
"internal/cpu"
1111
"internal/runtime/atomic"
12+
"internal/testenv"
1213
"io"
1314
"math/bits"
1415
. "runtime"
@@ -307,7 +308,7 @@ func TestTrailingZero(t *testing.T) {
307308
}
308309
}
309310

310-
func TestAppendGrowth(t *testing.T) {
311+
func TestAppendGrowthHeap(t *testing.T) {
311312
var x []int64
312313
check := func(want int) {
313314
if cap(x) != want {
@@ -324,6 +325,29 @@ func TestAppendGrowth(t *testing.T) {
324325
want = 2 * i
325326
}
326327
}
328+
Escape(&x[0]) // suppress stack-allocated backing store
329+
}
330+
331+
func TestAppendGrowthStack(t *testing.T) {
332+
var x []int64
333+
check := func(want int) {
334+
if cap(x) != want {
335+
t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
336+
}
337+
}
338+
339+
check(0)
340+
want := 32 / 8 // 32 is the default for cmd/compile/internal/base.DebugFlags.VariableMakeThreshold
341+
if Raceenabled || testenv.OptimizationOff() {
342+
want = 1
343+
}
344+
for i := 1; i <= 100; i++ {
345+
x = append(x, 1)
346+
check(want)
347+
if i&(i-1) == 0 {
348+
want = max(want, 2*i)
349+
}
350+
}
327351
}
328352

329353
var One = []int64{1}

test/escape2.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,13 @@ func foo70(mv1 *MV, m M) { // ERROR "leaking param: m$" "leaking param: mv1$"
494494

495495
func foo71(x *int) []*int { // ERROR "leaking param: x$"
496496
var y []*int
497-
y = append(y, x)
497+
y = append(y, x) // ERROR "append escapes to heap"
498498
return y
499499
}
500500

501501
func foo71a(x int) []*int { // ERROR "moved to heap: x$"
502502
var y []*int
503-
y = append(y, &x)
503+
y = append(y, &x) // ERROR "append escapes to heap"
504504
return y
505505
}
506506

@@ -860,12 +860,12 @@ func foo104(x []*int) { // ERROR "leaking param content: x"
860860

861861
// does not leak x but does leak content
862862
func foo105(x []*int) { // ERROR "leaking param content: x"
863-
_ = append(y, x...)
863+
_ = append(y, x...) // ERROR "append does not escape"
864864
}
865865

866866
// does leak x
867867
func foo106(x *int) { // ERROR "leaking param: x$"
868-
_ = append(y, x)
868+
_ = append(y, x) // ERROR "append does not escape"
869869
}
870870

871871
func foo107(x *int) map[*int]*int { // ERROR "leaking param: x$"

0 commit comments

Comments
 (0)