Skip to content

Commit abd9acb

Browse files
committed
Add timeout and retry logic for finding NGINX PID file (nginx#676)
* Add timeout and retry logic for the NGINX pid file * Add timeout and retry logic for the NGINX pid file
1 parent 6cfcf67 commit abd9acb

File tree

2 files changed

+86
-10
lines changed

2 files changed

+86
-10
lines changed

internal/nginx/runtime/manager.go

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,27 @@ package runtime
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
7+
"io/fs"
68
"os"
79
"strconv"
810
"strings"
911
"syscall"
1012
"time"
13+
14+
"k8s.io/apimachinery/pkg/util/wait"
1115
)
1216

13-
const pidFile = "/etc/nginx/nginx.pid"
17+
const (
18+
pidFile = "/etc/nginx/nginx.pid"
19+
pidFileTimeout = 5 * time.Second
20+
)
1421

15-
type readFileFunc func(string) ([]byte, error)
22+
type (
23+
readFileFunc func(string) ([]byte, error)
24+
checkFileFunc func(string) (fs.FileInfo, error)
25+
)
1626

1727
//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Manager
1828

@@ -31,13 +41,8 @@ func NewManagerImpl() *ManagerImpl {
3141
}
3242

3343
func (m *ManagerImpl) Reload(ctx context.Context) error {
34-
// FIXME(pleshakov): Before reload attempt, make sure NGINX is running.
35-
// If the gateway container starts before NGINX container (which is possible),
36-
// then it is possible that a reload can be attempted when NGINX is not running yet.
37-
// Make sure to prevent this case, so we don't get an error.
38-
3944
// We find the main NGINX PID on every reload because it will change if the NGINX container is restarted.
40-
pid, err := findMainProcess(os.ReadFile)
45+
pid, err := findMainProcess(ctx, os.Stat, os.ReadFile, pidFileTimeout)
4146
if err != nil {
4247
return fmt.Errorf("failed to find NGINX main process: %w", err)
4348
}
@@ -65,7 +70,33 @@ func (m *ManagerImpl) Reload(ctx context.Context) error {
6570
return nil
6671
}
6772

68-
func findMainProcess(readFile readFileFunc) (int, error) {
73+
func findMainProcess(
74+
ctx context.Context,
75+
checkFile checkFileFunc,
76+
readFile readFileFunc,
77+
timeout time.Duration,
78+
) (int, error) {
79+
ctx, cancel := context.WithTimeout(ctx, timeout)
80+
defer cancel()
81+
82+
err := wait.PollUntilContextCancel(
83+
ctx,
84+
1*time.Second,
85+
true, /* poll immediately */
86+
func(ctx context.Context) (bool, error) {
87+
_, err := checkFile(pidFile)
88+
if err == nil {
89+
return true, nil
90+
}
91+
if !errors.Is(err, fs.ErrNotExist) {
92+
return false, err
93+
}
94+
return false, nil
95+
})
96+
if err != nil {
97+
return 0, err
98+
}
99+
69100
content, err := readFile(pidFile)
70101
if err != nil {
71102
return 0, err

internal/nginx/runtime/manager_test.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package runtime
22

33
import (
4+
"context"
45
"errors"
6+
"io/fs"
57
"testing"
8+
"time"
69
)
710

811
func TestFindMainProcess(t *testing.T) {
@@ -18,40 +21,82 @@ func TestFindMainProcess(t *testing.T) {
1821
return nil, errors.New("error")
1922
}
2023

24+
checkFileFuncGen := func(content fs.FileInfo) checkFileFunc {
25+
return func(name string) (fs.FileInfo, error) {
26+
if name != pidFile {
27+
return nil, errors.New("error")
28+
}
29+
return content, nil
30+
}
31+
}
32+
checkFileError := func(string) (fs.FileInfo, error) {
33+
return nil, errors.New("error")
34+
}
35+
var testFileInfo fs.FileInfo
36+
ctx := context.Background()
37+
cancellingCtx, cancel := context.WithCancel(ctx)
38+
time.AfterFunc(1*time.Millisecond, cancel)
39+
2140
tests := []struct {
41+
ctx context.Context
2242
readFile readFileFunc
43+
checkFile checkFileFunc
2344
msg string
2445
expected int
2546
expectError bool
2647
}{
2748
{
49+
ctx: ctx,
2850
readFile: readFileFuncGen([]byte("1\n")),
51+
checkFile: checkFileFuncGen(testFileInfo),
2952
expected: 1,
3053
expectError: false,
3154
msg: "normal case",
3255
},
3356
{
57+
ctx: ctx,
3458
readFile: readFileFuncGen([]byte("")),
59+
checkFile: checkFileFuncGen(testFileInfo),
3560
expected: 0,
3661
expectError: true,
3762
msg: "empty file content",
3863
},
3964
{
65+
ctx: ctx,
4066
readFile: readFileFuncGen([]byte("not a number")),
67+
checkFile: checkFileFuncGen(testFileInfo),
4168
expected: 0,
4269
expectError: true,
4370
msg: "bad file content",
4471
},
4572
{
73+
ctx: ctx,
4674
readFile: readFileError,
75+
checkFile: checkFileFuncGen(testFileInfo),
4776
expected: 0,
4877
expectError: true,
4978
msg: "cannot read file",
5079
},
80+
{
81+
ctx: ctx,
82+
readFile: readFileFuncGen([]byte("1\n")),
83+
checkFile: checkFileError,
84+
expected: 0,
85+
expectError: true,
86+
msg: "cannot find pid file",
87+
},
88+
{
89+
ctx: cancellingCtx,
90+
readFile: readFileFuncGen([]byte("1\n")),
91+
checkFile: checkFileError,
92+
expected: 0,
93+
expectError: true,
94+
msg: "context canceled",
95+
},
5196
}
5297

5398
for _, test := range tests {
54-
result, err := findMainProcess(test.readFile)
99+
result, err := findMainProcess(test.ctx, test.checkFile, test.readFile, 2*time.Millisecond)
55100

56101
if result != test.expected {
57102
t.Errorf("findMainProcess() returned %d but expected %d for case %q", result, test.expected, test.msg)

0 commit comments

Comments
 (0)