Skip to content

Commit 9c4fe3f

Browse files
committed
Refactor container restart command to use job
1 parent 978dd44 commit 9c4fe3f

File tree

3 files changed

+77
-47
lines changed

3 files changed

+77
-47
lines changed

tests/framework/resourcemanager.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ func (rm *ResourceManager) readAndHandleObjects(
159159
files []string,
160160
) error {
161161
for _, file := range files {
162-
data, err := rm.getFileContents(file)
162+
data, err := rm.GetFileContents(file)
163163
if err != nil {
164164
return err
165165
}
@@ -187,9 +187,9 @@ func (rm *ResourceManager) readAndHandleObjects(
187187
return nil
188188
}
189189

190-
// getFileContents takes a string that can either be a local file
190+
// GetFileContents takes a string that can either be a local file
191191
// path or an https:// URL to YAML manifests and provides the contents.
192-
func (rm *ResourceManager) getFileContents(file string) (*bytes.Buffer, error) {
192+
func (rm *ResourceManager) GetFileContents(file string) (*bytes.Buffer, error) {
193193
if strings.HasPrefix(file, "http://") {
194194
return nil, fmt.Errorf("data can't be retrieved from %s: http is not supported, use https", file)
195195
} else if strings.HasPrefix(file, "https://") {
@@ -314,7 +314,7 @@ func (rm *ResourceManager) waitForRoutesToBeReady(ctx context.Context, namespace
314314

315315
var numParents, readyCount int
316316
for _, route := range routeList.Items {
317-
numParents += len(route.Status.Parents)
317+
numParents += len(route.Status.Parents) // extract from the parentref not the status.
318318
for _, parent := range route.Status.Parents {
319319
for _, cond := range parent.Conditions {
320320
if cond.Type == string(v1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue {

tests/suite/graceful_recovery_test.go

Lines changed: 46 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,26 @@ package suite
33
import (
44
"context"
55
"errors"
6+
"fmt"
67
"net/http"
7-
"os/exec"
88
"strings"
99
"time"
1010

1111
. "github.com/onsi/ginkgo/v2"
1212
. "github.com/onsi/gomega"
13+
v1 "k8s.io/api/batch/v1"
1314
coordination "k8s.io/api/coordination/v1"
1415
core "k8s.io/api/core/v1"
1516
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617
"k8s.io/apimachinery/pkg/types"
1718
"k8s.io/apimachinery/pkg/util/wait"
1819
"sigs.k8s.io/controller-runtime/pkg/client"
20+
"sigs.k8s.io/yaml"
1921

2022
"github.com/nginxinc/nginx-gateway-fabric/tests/framework"
2123
)
2224

2325
const (
24-
// FIXME(bjee19): Find an automated way to keep the version updated here similar to dependabot.
25-
// https://github.com/nginxinc/nginx-gateway-fabric/issues/1665
26-
debugImage = "busybox:1.28"
27-
2826
teaURL = "https://cafe.example.com/tea"
2927
coffeeURL = "http://cafe.example.com/coffee"
3028
nginxContainerName = "nginx"
@@ -72,8 +70,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov
7270
leaseName, err := getLeaderElectionLeaseHolderName()
7371
Expect(err).ToNot(HaveOccurred())
7472

75-
output, err := restartNGFProcess()
76-
Expect(err).ToNot(HaveOccurred(), string(output))
73+
restartNGFProcess()
7774

7875
checkContainerLogsForErrors(podNames[0])
7976

@@ -104,8 +101,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov
104101
leaseName, err := getLeaderElectionLeaseHolderName()
105102
Expect(err).ToNot(HaveOccurred())
106103

107-
output, err := restartNginxContainer()
108-
Expect(err).ToNot(HaveOccurred(), string(output))
104+
restartNginxContainer()
109105

110106
checkContainerLogsForErrors(podNames[0])
111107

@@ -129,64 +125,40 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov
129125
})
130126
})
131127

132-
func restartNginxContainer() ([]byte, error) {
128+
func restartNginxContainer() {
133129
podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout)
134130
Expect(err).ToNot(HaveOccurred())
135131
Expect(podNames).ToNot(BeEmpty())
136132

137133
restartCount, err := getContainerRestartCount(nginxContainerName, podNames[0])
138134
Expect(err).ToNot(HaveOccurred())
139135

140-
output, err := exec.Command( // nolint:gosec
141-
"kubectl",
142-
"exec",
143-
"-n",
144-
ngfNamespace,
145-
podNames[0],
146-
"--container",
147-
"nginx",
148-
"--",
149-
"sh",
150-
"-c",
151-
"$(PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID)").CombinedOutput()
152-
if err != nil {
153-
return output, err
154-
}
136+
job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"[n]ginx: master process\") && kill -9 $PID")
137+
Expect(err).ToNot(HaveOccurred())
155138

156139
err = waitForContainerRestart(podNames[0], nginxContainerName, restartCount)
157140
Expect(err).ToNot(HaveOccurred())
158141

159-
return nil, nil
142+
err = resourceManager.Delete([]client.Object{job})
143+
Expect(err).ToNot(HaveOccurred())
160144
}
161145

162-
func restartNGFProcess() ([]byte, error) {
146+
func restartNGFProcess() {
163147
podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout)
164148
Expect(err).ToNot(HaveOccurred())
165149
Expect(podNames).ToNot(BeEmpty())
166150

167151
restartCount, err := getContainerRestartCount(ngfContainerName, podNames[0])
168152
Expect(err).ToNot(HaveOccurred())
169153

170-
output, err := exec.Command( // nolint:gosec
171-
"kubectl",
172-
"debug",
173-
"-n",
174-
ngfNamespace,
175-
podNames[0],
176-
"--image="+debugImage,
177-
"--target=nginx-gateway",
178-
"--",
179-
"sh",
180-
"-c",
181-
"$(PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID)").CombinedOutput()
182-
if err != nil {
183-
return output, err
184-
}
154+
job, err := runNodeDebuggerJob(podNames[0], "PID=$(pgrep -f \"/[u]sr/bin/gateway\") && kill -9 $PID")
155+
Expect(err).ToNot(HaveOccurred())
185156

186157
err = waitForContainerRestart(podNames[0], ngfContainerName, restartCount)
187158
Expect(err).ToNot(HaveOccurred())
188159

189-
return nil, nil
160+
err = resourceManager.Delete([]client.Object{job})
161+
Expect(err).ToNot(HaveOccurred())
190162
}
191163

192164
func waitForContainerRestart(ngfPodName string, containerName string, currentRestartCount int) error {
@@ -355,3 +327,34 @@ func getContainerRestartCount(containerName, ngfPodName string) (int, error) {
355327

356328
return restartCount, nil
357329
}
330+
331+
func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) {
332+
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
333+
defer cancel()
334+
335+
var ngfPod core.Pod
336+
if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil {
337+
return nil, errors.New("could not retrieve ngfPod")
338+
}
339+
340+
b, err := resourceManager.GetFileContents("graceful-recovery/node-debugger-job.yaml")
341+
if err != nil {
342+
return nil, errors.New("error processing node debugger job file")
343+
}
344+
345+
job := &v1.Job{}
346+
_ = v1.AddToScheme(resourceManager.K8sClient.Scheme())
347+
if err = yaml.Unmarshal(b.Bytes(), job); err != nil {
348+
return nil, errors.New("error with yaml unmarshal")
349+
}
350+
351+
job.Spec.Template.Spec.NodeSelector["kubernetes.io/hostname"] = ngfPod.Spec.NodeName
352+
job.Spec.Template.Spec.Containers[0].Args = []string{jobScript}
353+
job.Namespace = ngfNamespace
354+
355+
if err = resourceManager.Apply([]client.Object{job}); err != nil {
356+
return nil, fmt.Errorf("errored in applying job: %w", err)
357+
}
358+
359+
return job, nil
360+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
apiVersion: batch/v1
2+
kind: Job
3+
metadata:
4+
name: node-debugger-job
5+
spec:
6+
template:
7+
spec:
8+
hostPID: true
9+
hostIPC: true
10+
nodeSelector:
11+
kubernetes.io/hostname: ""
12+
containers:
13+
- name: node-debugger-container
14+
image: ubuntu
15+
command: ["/bin/bash", "-c"]
16+
args: []
17+
securityContext:
18+
privileged: true
19+
volumeMounts:
20+
- name: host-fs
21+
mountPath: /mnt/host
22+
volumes:
23+
- name: host-fs
24+
hostPath:
25+
path: /
26+
type: Directory
27+
restartPolicy: Never

0 commit comments

Comments
 (0)