Skip to content

Commit cc41b65

Browse files
committed
fixup! 🐛 Refactor manager to avoid race conditions and provide clean shutdown
Signed-off-by: Vince Prignano <vincepri@vmware.com>
1 parent 38f1b90 commit cc41b65

File tree

2 files changed

+10
-25
lines changed

2 files changed

+10
-25
lines changed

pkg/manager/internal.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,16 +567,16 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e
567567
cm.logger.Info("Stopping and waiting for non leader election runnables")
568568
cm.runnables.Others.StopAndWait(cm.shutdownCtx)
569569

570+
// Stop all the leader election runnables, which includes reconcilers.
571+
cm.logger.Info("Stopping and waiting for leader election runnables")
572+
cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx)
573+
570574
// Stop the caches before the leader election runnables, this is an important
571575
// step to make sure that we don't race with the reconcilers by receiving more events
572576
// from the API servers and enqueueing them.
573577
cm.logger.Info("Stopping and waiting for caches")
574578
cm.runnables.Caches.StopAndWait(cm.shutdownCtx)
575579

576-
// Stop all the leader election runnables, which includes reconcilers.
577-
cm.logger.Info("Stopping and waiting for leader election runnables")
578-
cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx)
579-
580580
// Webhooks should come last, as they might be still serving some requests.
581581
cm.logger.Info("Stopping and waiting for webhooks")
582582
cm.runnables.Webhooks.StopAndWait(cm.shutdownCtx)

pkg/manager/runnable_group.go

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -54,29 +54,15 @@ func newRunnables(errChan chan error) *runnables {
5454
// The runnables added before Start are started when Start is called.
5555
// The runnables added after Start are started directly.
5656
func (r *runnables) Add(fn Runnable, ready runnableCheck) error {
57+
if ready == nil {
58+
// If we don't have a readiness check, always return true.
59+
ready = func(_ context.Context) bool { return true }
60+
}
61+
5762
switch runnable := fn.(type) {
5863
case hasCache:
5964
return r.Caches.Add(fn, func(ctx context.Context) bool {
60-
// Run the ready check for the cache a fixed number of times
61-
// backing off a bit; this is to give time to the runnables
62-
// to start up before their health check returns true.
63-
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
64-
Duration: 10 * time.Millisecond,
65-
Steps: 10,
66-
Factor: 1.0,
67-
}, func() (bool, error) {
68-
for i := 0; i < 10; i++ {
69-
<-time.After(time.Duration(i) * 10 * time.Millisecond)
70-
if !runnable.GetCache().WaitForCacheSync(ctx) {
71-
continue
72-
}
73-
return true, nil
74-
}
75-
return false, nil
76-
}); err != nil {
77-
return false
78-
}
79-
return true
65+
return ready(ctx) && runnable.GetCache().WaitForCacheSync(ctx)
8066
})
8167
case *webhook.Server:
8268
return r.Webhooks.Add(fn, ready)
@@ -259,7 +245,6 @@ func (r *runnableGroup) Add(rn Runnable, ready runnableCheck) error {
259245
}
260246
r.stop.RUnlock()
261247

262-
// If we don't have a readiness check, always return true.
263248
if ready == nil {
264249
ready = func(_ context.Context) bool { return true }
265250
}

0 commit comments

Comments
 (0)