Skip to content

Commit 8b2900e

Browse files
authored
Add readiness probe (#1047)
Problem: The NKG Pod would report Ready as soon as it started, which could lead to traffic being sent too soon before nginx was actually configured. Solution: Add a readiness probe that will report Ready once the controller has written its first config to nginx (or has nothing to do on startup). Also changed the metrics "disable" helm flag to "enable" to be consistent and easier to read.
1 parent 7871727 commit 8b2900e

File tree

19 files changed

+419
-127
lines changed

19 files changed

+419
-127
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ build-nkg-debug-image: debug-build build-nkg-image ## Build NKG image with debug
135135
generate-manifests: ## Generate manifests using Helm.
136136
cp $(CHART_DIR)/crds/* $(MANIFEST_DIR)/crds/
137137
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) $(HELM_TEMPLATE_EXTRA_ARGS_FOR_ALL_MANIFESTS_FILE) -n nginx-gateway | cat $(strip $(MANIFEST_DIR))/namespace.yaml - > $(strip $(MANIFEST_DIR))/nginx-gateway.yaml
138-
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.disable=true -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
138+
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.enable=false -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
139139
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer.yaml
140140
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.annotations.'service\.beta\.kubernetes\.io\/aws-load-balancer-type'="nlb" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer-aws-nlb.yaml
141141
helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.type=NodePort --set service.externalTrafficPolicy="" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/nodeport.yaml

cmd/gateway/commands.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"go.uber.org/zap"
1010
"k8s.io/apimachinery/pkg/types"
1111
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
12+
"sigs.k8s.io/controller-runtime/pkg/log"
1213
ctlrZap "sigs.k8s.io/controller-runtime/pkg/log/zap"
1314

1415
"github.com/nginxinc/nginx-kubernetes-gateway/internal/mode/provisioner"
@@ -42,11 +43,16 @@ var (
4243
updateGCStatus bool
4344
disableMetrics bool
4445
metricsSecure bool
46+
disableHealth bool
4547

4648
metricsListenPort = intValidatingValue{
4749
validator: validatePort,
4850
value: 9113,
4951
}
52+
healthListenPort = intValidatingValue{
53+
validator: validatePort,
54+
value: 8081,
55+
}
5056
gateway = namespacedNameValue{}
5157
configName = stringValidatingValue{
5258
validator: validateResourceName,
@@ -92,6 +98,11 @@ func createStaticModeCommand() *cobra.Command {
9298
"commit", commit,
9399
"date", date,
94100
)
101+
log.SetLogger(logger)
102+
103+
if err := ensureNoPortCollisions(metricsListenPort.value, healthListenPort.value); err != nil {
104+
return fmt.Errorf("error validating ports: %w", err)
105+
}
95106

96107
podIP := os.Getenv("POD_IP")
97108
if err := validateIP(podIP); err != nil {
@@ -126,6 +137,10 @@ func createStaticModeCommand() *cobra.Command {
126137
GatewayNsName: gwNsName,
127138
UpdateGatewayClassStatus: updateGCStatus,
128139
MetricsConfig: metricsConfig,
140+
HealthConfig: config.HealthConfig{
141+
Enabled: !disableHealth,
142+
Port: healthListenPort.value,
143+
},
129144
}
130145

131146
if err := static.StartManager(conf); err != nil {
@@ -171,7 +186,7 @@ func createStaticModeCommand() *cobra.Command {
171186
cmd.Flags().Var(
172187
&metricsListenPort,
173188
"metrics-port",
174-
"Set the port where the metrics are exposed. Format: [1023 - 65535]",
189+
"Set the port where the metrics are exposed. Format: [1024 - 65535]",
175190
)
176191

177192
cmd.Flags().BoolVar(
@@ -182,6 +197,19 @@ func createStaticModeCommand() *cobra.Command {
182197
" Please note that this endpoint will be secured with a self-signed certificate.",
183198
)
184199

200+
cmd.Flags().BoolVar(
201+
&disableHealth,
202+
"health-disable",
203+
false,
204+
"Disable running the health probe server.",
205+
)
206+
207+
cmd.Flags().Var(
208+
&healthListenPort,
209+
"health-port",
210+
"Set the port where the health probe server is exposed. Format: [1024 - 65535]",
211+
)
212+
185213
return cmd
186214
}
187215

cmd/gateway/commands_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
121121
"--metrics-port=9114",
122122
"--metrics-disable",
123123
"--metrics-secure-serving",
124+
"--health-port=8081",
125+
"--health-disable",
124126
},
125127
wantErr: false,
126128
},
@@ -214,6 +216,33 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
214216
expectedErrPrefix: `invalid argument "999" for "--metrics-secure-serving" flag: strconv.ParseBool:` +
215217
` parsing "999": invalid syntax`,
216218
},
219+
{
220+
name: "health-port is invalid type",
221+
args: []string{
222+
"--health-port=invalid", // not an int
223+
},
224+
wantErr: true,
225+
expectedErrPrefix: `invalid argument "invalid" for "--health-port" flag: failed to parse int value:` +
226+
` strconv.ParseInt: parsing "invalid": invalid syntax`,
227+
},
228+
{
229+
name: "health-port is outside of range",
230+
args: []string{
231+
"--health-port=999", // outside of range
232+
},
233+
wantErr: true,
234+
expectedErrPrefix: `invalid argument "999" for "--health-port" flag:` +
235+
` port outside of valid port range [1024 - 65535]: 999`,
236+
},
237+
{
238+
name: "health-disable is not a bool",
239+
args: []string{
240+
"--health-disable=999", // not a bool
241+
},
242+
wantErr: true,
243+
expectedErrPrefix: `invalid argument "999" for "--health-disable" flag: strconv.ParseBool:` +
244+
` parsing "999": invalid syntax`,
245+
},
217246
}
218247

219248
for _, test := range tests {

cmd/gateway/validation.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,17 @@ func validatePort(port int) error {
107107
}
108108
return nil
109109
}
110+
111+
// ensureNoPortCollisions checks if the same port has been defined multiple times
112+
func ensureNoPortCollisions(ports ...int) error {
113+
seen := make(map[int]struct{})
114+
115+
for _, port := range ports {
116+
if _, ok := seen[port]; ok {
117+
return fmt.Errorf("port %d has been defined multiple times", port)
118+
}
119+
seen[port] = struct{}{}
120+
}
121+
122+
return nil
123+
}

cmd/gateway/validation_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,3 +331,10 @@ func TestValidatePort(t *testing.T) {
331331
})
332332
}
333333
}
334+
335+
func TestEnsureNoPortCollisions(t *testing.T) {
336+
g := NewWithT(t)
337+
338+
g.Expect(ensureNoPortCollisions(9113, 8081)).To(Succeed())
339+
g.Expect(ensureNoPortCollisions(9113, 9113)).ToNot(Succeed())
340+
}

conformance/provisioner/static-deployment.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ spec:
2929
- --gatewayclass=nginx
3030
- --config=nginx-gateway-config
3131
- --metrics-disable
32+
- --health-port=8081
3233
env:
3334
- name: POD_IP
3435
valueFrom:
@@ -41,6 +42,15 @@ spec:
4142
image: ghcr.io/nginxinc/nginx-kubernetes-gateway:edge
4243
imagePullPolicy: Always
4344
name: nginx-gateway
45+
ports:
46+
- name: health
47+
containerPort: 8081
48+
readinessProbe:
49+
httpGet:
50+
path: /readyz
51+
port: health
52+
initialDelaySeconds: 3
53+
periodSeconds: 1
4454
securityContext:
4555
allowPrivilegeEscalation: false
4656
capabilities:

deploy/helm-chart/README.md

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -132,25 +132,28 @@ kubectl delete -f https://github.com/kubernetes-sigs/gateway-api/releases/downlo
132132

133133
The following tables lists the configurable parameters of the NGINX Kubernetes Gateway chart and their default values.
134134

135-
| Parameter | Description | Default Value |
136-
|--------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
137-
| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway |
138-
| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge |
139-
| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always |
140-
| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx |
141-
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller |
142-
| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment |
143-
|`nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource | [See nginxGateway.config section](values.yaml) |
144-
| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
145-
| `nginx.image.tag` | The tag for the NGINX image. | edge |
146-
| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always |
147-
| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} |
148-
| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated |
149-
| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true |
150-
| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer |
151-
| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local |
152-
| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} |
153-
| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
154-
| `metrics.disable` | Disable exposing metrics in the Prometheus format. |false |
155-
| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] |9113 |
156-
| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. |false |
135+
| Parameter | Description | Default Value |
136+
|-----------|-------------|---------------|
137+
| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway |
138+
| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge |
139+
| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always |
140+
| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx |
141+
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller |
142+
| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment |
143+
| `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource. | [See nginxGateway.config section](values.yaml) |
144+
| `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true |
145+
| `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 |
146+
| `nginxGateway.readinessProbe.initialDelaySeconds` | The number of seconds after the Pod has started before the readiness probes are initiated. | 3 |
147+
| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
148+
| `nginx.image.tag` | The tag for the NGINX image. | edge |
149+
| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always |
150+
| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} |
151+
| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated |
152+
| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true |
153+
| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer |
154+
| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local |
155+
| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} |
156+
| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
157+
| `metrics.enable` | Enable exposing metrics in the Prometheus format. | true |
158+
| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] | 9113 |
159+
| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. | false |

deploy/helm-chart/templates/deployment.yaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ spec:
1616
metadata:
1717
labels:
1818
{{- include "nginx-gateway.selectorLabels" . | nindent 8 }}
19-
{{- if not .Values.metrics.disable }}
19+
{{- if .Values.metrics.enable }}
2020
annotations:
2121
prometheus.io/scrape: "true"
2222
prometheus.io/port: "{{ .Values.metrics.port }}"
@@ -31,14 +31,19 @@ spec:
3131
- --gateway-ctlr-name={{ .Values.nginxGateway.gatewayControllerName }}
3232
- --gatewayclass={{ .Values.nginxGateway.gatewayClassName }}
3333
- --config={{ include "nginx-gateway.config-name" . }}
34-
{{- if not .Values.metrics.disable }}
34+
{{- if .Values.metrics.enable }}
3535
- --metrics-port={{ .Values.metrics.port }}
3636
{{- if .Values.metrics.secure }}
3737
- --metrics-secure-serving
3838
{{- end }}
3939
{{- else }}
4040
- --metrics-disable
4141
{{- end }}
42+
{{- if .Values.nginxGateway.readinessProbe.enable }}
43+
- --health-port={{ .Values.nginxGateway.readinessProbe.port }}
44+
{{- else }}
45+
- --health-disable
46+
{{- end }}
4247
env:
4348
- name: POD_IP
4449
valueFrom:
@@ -51,11 +56,21 @@ spec:
5156
image: {{ .Values.nginxGateway.image.repository }}:{{ .Values.nginxGateway.image.tag | default .Chart.AppVersion }}
5257
imagePullPolicy: {{ .Values.nginxGateway.image.pullPolicy }}
5358
name: nginx-gateway
54-
{{- if not .Values.metrics.disable }}
5559
ports:
60+
{{- if .Values.metrics.enable }}
5661
- name: metrics
5762
containerPort: {{ .Values.metrics.port }}
5863
{{- end }}
64+
{{- if .Values.nginxGateway.readinessProbe.enable }}
65+
- name: health
66+
containerPort: {{ .Values.nginxGateway.readinessProbe.port }}
67+
readinessProbe:
68+
httpGet:
69+
path: /readyz
70+
port: health
71+
initialDelaySeconds: {{ .Values.nginxGateway.readinessProbe.initialDelaySeconds }}
72+
periodSeconds: 1
73+
{{- end }}
5974
securityContext:
6075
allowPrivilegeEscalation: false
6176
capabilities:

0 commit comments

Comments
 (0)