diff --git a/Makefile b/Makefile index 670c7b3427..5fd725e3b3 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,8 @@ TAG ?= $(VERSION:v%=%)## The tag of the image. For example, 0.3.0 TARGET ?= local## The target of the build. Possible values: local and container KIND_KUBE_CONFIG=$${HOME}/.kube/kind/config## The location of the kind kubeconfig OUT_DIR ?= $(shell pwd)/build/out## The folder where the binary will be stored -ARCH ?= amd64## The architecture of the image and/or binary. For example: amd64 or arm64 +GOARCH ?= amd64## The architecture of the image and/or binary. For example: amd64 or arm64 +GOOS ?= linux## The OS of the image and/or binary. For example: linux or darwin override HELM_TEMPLATE_COMMON_ARGS += --set creator=template --set nameOverride=nginx-gateway## The common options for the Helm template command. override HELM_TEMPLATE_EXTRA_ARGS_FOR_ALL_MANIFESTS_FILE += --set service.create=false## The options to be passed to the full Helm templating command only. override NGINX_DOCKER_BUILD_OPTIONS += --build-arg NJS_DIR=$(NJS_DIR) --build-arg NGINX_CONF_DIR=$(NGINX_CONF_DIR) @@ -35,11 +36,11 @@ build-images: build-nkg-image build-nginx-image ## Build the NKG and nginx docke .PHONY: build-nkg-image build-nkg-image: check-for-docker build ## Build the NKG docker image - docker build --platform linux/$(ARCH) --target $(strip $(TARGET)) -f build/Dockerfile -t $(strip $(PREFIX)):$(strip $(TAG)) . + docker build --platform linux/$(GOARCH) --target $(strip $(TARGET)) -f build/Dockerfile -t $(strip $(PREFIX)):$(strip $(TAG)) . .PHONY: build-nginx-image build-nginx-image: check-for-docker ## Build the custom nginx image - docker build --platform linux/$(ARCH) $(strip $(NGINX_DOCKER_BUILD_OPTIONS)) -f build/Dockerfile.nginx -t $(strip $(NGINX_PREFIX)):$(strip $(TAG)) . + docker build --platform linux/$(GOARCH) $(strip $(NGINX_DOCKER_BUILD_OPTIONS)) -f build/Dockerfile.nginx -t $(strip $(NGINX_PREFIX)):$(strip $(TAG)) . .PHONY: check-for-docker check-for-docker: ## Check if Docker is installed @@ -49,13 +50,13 @@ check-for-docker: ## Check if Docker is installed build: ## Build the binary ifeq (${TARGET},local) @go version || (code=$$?; printf "\033[0;31mError\033[0m: unable to build locally\n"; exit $$code) - CGO_ENABLED=0 GOOS=linux GOARCH=$(ARCH) go build -trimpath -a -ldflags "$(GO_LINKER_FLAGS)" $(ADDITIONAL_GO_BUILD_FLAGS) -o $(OUT_DIR)/gateway github.com/nginxinc/nginx-kubernetes-gateway/cmd/gateway + CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build -trimpath -a -ldflags "$(GO_LINKER_FLAGS)" $(ADDITIONAL_GO_BUILD_FLAGS) -o $(OUT_DIR)/gateway github.com/nginxinc/nginx-kubernetes-gateway/cmd/gateway endif .PHONY: build-goreleaser build-goreleaser: ## Build the binary using GoReleaser @goreleaser -v || (code=$$?; printf "\033[0;31mError\033[0m: there was a problem with GoReleaser. Follow the docs to install it https://goreleaser.com/install\n"; exit $$code) - GOOS=linux GOPATH=$(shell go env GOPATH) GOARCH=$(ARCH) goreleaser build --clean --snapshot --single-target + GOOS=linux GOPATH=$(shell go env GOPATH) GOARCH=$(GOARCH) goreleaser build --clean --snapshot --single-target .PHONY: generate generate: ## Run go generate diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index 505939bce6..32eb9fa0ec 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -25,7 +25,6 @@ const ( gatewayCtrlNameFlag = "gateway-ctlr-name" gatewayCtrlNameUsageFmt = `The name of the Gateway controller. ` + `The controller name must be of the form: DOMAIN/PATH. The controller's domain is '%s'` - gatewayFlag = "gateway" ) var ( @@ -38,25 +37,6 @@ var ( gatewayClassName = stringValidatingValue{ validator: validateResourceName, } - - // Backing values for static subcommand cli flags. - updateGCStatus bool - disableMetrics bool - metricsSecure bool - disableHealth bool - - metricsListenPort = intValidatingValue{ - validator: validatePort, - value: 9113, - } - healthListenPort = intValidatingValue{ - validator: validatePort, - value: 8081, - } - gateway = namespacedNameValue{} - configName = stringValidatingValue{ - validator: validateResourceName, - } ) func createRootCommand() *cobra.Command { @@ -85,6 +65,46 @@ func createRootCommand() *cobra.Command { } func createStaticModeCommand() *cobra.Command { + // flag names + const ( + gatewayFlag = "gateway" + configFlag = "config" + updateGCStatusFlag = "update-gatewayclass-status" + metricsDisableFlag = "metrics-disable" + metricsSecureFlag = "metrics-secure-serving" + metricsPortFlag = "metrics-port" + healthDisableFlag = "health-disable" + healthPortFlag = "health-port" + leaderElectionDisableFlag = "leader-election-disable" + leaderElectionLockNameFlag = "leader-election-lock-name" + ) + + // flag values + var ( + updateGCStatus bool + gateway = namespacedNameValue{} + configName = stringValidatingValue{ + validator: validateResourceName, + } + disableMetrics bool + metricsSecure bool + metricsListenPort = intValidatingValue{ + validator: validatePort, + value: 9113, + } + disableHealth bool + healthListenPort = intValidatingValue{ + validator: validatePort, + value: 8081, + } + + disableLeaderElection bool + leaderElectionLockName = stringValidatingValue{ + validator: validateResourceName, + value: "nginx-gateway-leader-election-lock", + } + ) + cmd := &cobra.Command{ Use: "static-mode", Short: "Configure NGINX in the scope of a single Gateway resource", @@ -109,9 +129,14 @@ func createStaticModeCommand() *cobra.Command { return fmt.Errorf("error validating POD_IP environment variable: %w", err) } - namespace := os.Getenv("MY_NAMESPACE") + namespace := os.Getenv("POD_NAMESPACE") if namespace == "" { - return errors.New("MY_NAMESPACE environment variable must be set") + return errors.New("POD_NAMESPACE environment variable must be set") + } + + podName := os.Getenv("POD_NAME") + if podName == "" { + return errors.New("POD_NAME environment variable must be set") } var gwNsName *types.NamespacedName @@ -119,13 +144,6 @@ func createStaticModeCommand() *cobra.Command { gwNsName = &gateway.value } - metricsConfig := config.MetricsConfig{} - if !disableMetrics { - metricsConfig.Enabled = true - metricsConfig.Port = metricsListenPort.value - metricsConfig.Secure = metricsSecure - } - conf := config.Config{ GatewayCtlrName: gatewayCtlrName.value, ConfigName: configName.String(), @@ -136,11 +154,20 @@ func createStaticModeCommand() *cobra.Command { Namespace: namespace, GatewayNsName: gwNsName, UpdateGatewayClassStatus: updateGCStatus, - MetricsConfig: metricsConfig, HealthConfig: config.HealthConfig{ Enabled: !disableHealth, Port: healthListenPort.value, }, + MetricsConfig: config.MetricsConfig{ + Enabled: !disableMetrics, + Port: metricsListenPort.value, + Secure: metricsSecure, + }, + LeaderElection: config.LeaderElection{ + Enabled: !disableLeaderElection, + LockName: leaderElectionLockName.String(), + Identity: podName, + }, } if err := static.StartManager(conf); err != nil { @@ -163,7 +190,7 @@ func createStaticModeCommand() *cobra.Command { cmd.Flags().VarP( &configName, - "config", + configFlag, "c", `The name of the NginxGateway resource to be used for this controller's dynamic configuration.`+ ` Lives in the same Namespace as the controller.`, @@ -171,27 +198,27 @@ func createStaticModeCommand() *cobra.Command { cmd.Flags().BoolVar( &updateGCStatus, - "update-gatewayclass-status", + updateGCStatusFlag, true, "Update the status of the GatewayClass resource.", ) cmd.Flags().BoolVar( &disableMetrics, - "metrics-disable", + metricsDisableFlag, false, "Disable exposing metrics in the Prometheus format.", ) cmd.Flags().Var( &metricsListenPort, - "metrics-port", + metricsPortFlag, "Set the port where the metrics are exposed. Format: [1024 - 65535]", ) cmd.Flags().BoolVar( &metricsSecure, - "metrics-secure-serving", + metricsSecureFlag, false, "Enable serving metrics via https. By default metrics are served via http."+ " Please note that this endpoint will be secured with a self-signed certificate.", @@ -199,17 +226,33 @@ func createStaticModeCommand() *cobra.Command { cmd.Flags().BoolVar( &disableHealth, - "health-disable", + healthDisableFlag, false, "Disable running the health probe server.", ) cmd.Flags().Var( &healthListenPort, - "health-port", + healthPortFlag, "Set the port where the health probe server is exposed. Format: [1024 - 65535]", ) + cmd.Flags().BoolVar( + &disableLeaderElection, + leaderElectionDisableFlag, + false, + "Disable leader election. Leader election is used to avoid multiple replicas of the NGINX Kubernetes Gateway"+ + " reporting the status of the Gateway API resources. If disabled, "+ + "all replicas of NGINX Kubernetes Gateway will update the statuses of the Gateway API resources.", + ) + + cmd.Flags().Var( + &leaderElectionLockName, + leaderElectionLockNameFlag, + "The name of the leader election lock. "+ + "A Lease object with this name will be created in the same Namespace as the controller.", + ) + return cmd } diff --git a/cmd/gateway/commands_test.go b/cmd/gateway/commands_test.go index f6a4006e3f..5f545520ce 100644 --- a/cmd/gateway/commands_test.go +++ b/cmd/gateway/commands_test.go @@ -123,6 +123,8 @@ func TestStaticModeCmdFlagValidation(t *testing.T) { "--metrics-secure-serving", "--health-port=8081", "--health-disable", + "--leader-election-lock-name=my-lock", + "--leader-election-disable=false", }, wantErr: false, }, @@ -243,6 +245,22 @@ func TestStaticModeCmdFlagValidation(t *testing.T) { expectedErrPrefix: `invalid argument "999" for "--health-disable" flag: strconv.ParseBool:` + ` parsing "999": invalid syntax`, }, + { + name: "leader-election-lock-name is set to invalid string", + args: []string{ + "--leader-election-lock-name=!@#$", + }, + wantErr: true, + expectedErrPrefix: `invalid argument "!@#$" for "--leader-election-lock-name" flag: invalid format`, + }, + { + name: "leader-election-disable is set to empty string", + args: []string{ + "--leader-election-disable=", + }, + wantErr: true, + expectedErrPrefix: `invalid argument "" for "--leader-election-disable" flag: strconv.ParseBool`, + }, } for _, test := range tests { diff --git a/conformance/provisioner/static-deployment.yaml b/conformance/provisioner/static-deployment.yaml index 38c66a247f..fa8c1b1415 100644 --- a/conformance/provisioner/static-deployment.yaml +++ b/conformance/provisioner/static-deployment.yaml @@ -10,7 +10,6 @@ metadata: app.kubernetes.io/instance: nginx-gateway app.kubernetes.io/version: "edge" spec: - # We only support a single replica for now replicas: 1 selector: matchLabels: @@ -30,15 +29,20 @@ spec: - --config=nginx-gateway-config - --metrics-disable - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election env: - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP - - name: MY_NAMESPACE + - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name image: ghcr.io/nginxinc/nginx-kubernetes-gateway:edge imagePullPolicy: Always name: nginx-gateway diff --git a/deploy/helm-chart/README.md b/deploy/helm-chart/README.md index 88f41ae6f5..815e1f1535 100644 --- a/deploy/helm-chart/README.md +++ b/deploy/helm-chart/README.md @@ -12,9 +12,9 @@ This chart deploys the NGINX Kubernetes Gateway in your Kubernetes cluster. ### Installing the Gateway API resources > Note: The Gateway API resources from the standard channel (the CRDs and the validating webhook) must be installed -before deploying NGINX Kubernetes Gateway. If they are already installed in your cluster, please ensure they are the -correct version as supported by the NGINX Kubernetes Gateway - -[see the Technical Specifications](../../README.md#technical-specifications). +> before deploying NGINX Kubernetes Gateway. If they are already installed in your cluster, please ensure they are +> the correct version as supported by the NGINX Kubernetes Gateway - +> [see the Technical Specifications](../../README.md#technical-specifications). To install the Gateway resources from [the Gateway API repo](https://github.com/kubernetes-sigs/gateway-api), run: @@ -58,6 +58,7 @@ helm install my-release . --create-namespace --wait -n nginx-gateway ``` ## Upgrading the Chart + ### Upgrading the Gateway Resources Before you upgrade a release, ensure the Gateway API resources are the correct version as supported by the NGINX @@ -71,8 +72,8 @@ kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/downloa ### Upgrading the CRDs -Helm does not upgrade the NGINX Kubernetes Gateway CRDs during a release upgrade. Before you upgrade a release, -you must [pull the chart](#pulling-the-chart) from GitHub and run the following command to upgrade the CRDs: +Helm does not upgrade the NGINX Kubernetes Gateway CRDs during a release upgrade. Before you upgrade a release, you +must [pull the chart](#pulling-the-chart) from GitHub and run the following command to upgrade the CRDs: ```shell kubectl apply -f crds/ @@ -92,8 +93,8 @@ To upgrade the release `my-release`, run: helm upgrade my-release oci://ghcr.io/nginxinc/charts/nginx-kubernetes-gateway -n nginx-gateway ``` -This will upgrade to the latest stable release. To upgrade to the latest version from the `main` branch, -specify the `--version 0.0.0-edge` flag when upgrading. +This will upgrade to the latest stable release. To upgrade to the latest version from the `main` branch, specify +the `--version 0.0.0-edge` flag when upgrading. ### Upgrading the Chart from the Sources @@ -118,7 +119,7 @@ These commands remove all the Kubernetes components associated with the release ### Uninstalling the Gateway Resources ->**Warning: This command will delete all the corresponding custom resources in your cluster across all namespaces! +> **Warning: This command will delete all the corresponding custom resources in your cluster across all namespaces! Please ensure there are no custom resources that you want to keep and there are no other Gateway API implementations running in the cluster!** @@ -132,28 +133,31 @@ kubectl delete -f https://github.com/kubernetes-sigs/gateway-api/releases/downlo The following tables lists the configurable parameters of the NGINX Kubernetes Gateway chart and their default values. -| Parameter | Description | Default Value | -|-----------|-------------|---------------| -| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway | -| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge | -| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always | -| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx | -| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller | -| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment | -| `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource. | [See nginxGateway.config section](values.yaml) | -| `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true | -| `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 | -| `nginxGateway.readinessProbe.initialDelaySeconds` | The number of seconds after the Pod has started before the readiness probes are initiated. | 3 | -| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx | -| `nginx.image.tag` | The tag for the NGINX image. | edge | -| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always | -| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} | -| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated | -| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true | -| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer | -| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local | -| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} | -| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] | -| `metrics.enable` | Enable exposing metrics in the Prometheus format. | true | -| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] | 9113 | -| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. | false | +| Parameter | Description | Default Value | +|---------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------| +| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway | +| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge | +| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always | +| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx | +| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller | +| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment | +| `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource | [See nginxGateway.config section](values.yaml) | +| `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true | +| `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 | +| `nginxGateway.readinessProbe.initialDelaySeconds` | The number of seconds after the Pod has started before the readiness probes are initiated. | 3 | +| `nginxGateway.replicaCount` | The number of replicas of the NGINX Kubernetes Gateway Deployment. | 1 | +| `nginxGateway.leaderElection.enable` | Enable leader election. Leader election is used to avoid multiple replicas of the NGINX Kubernetes Gateway reporting the status of the Gateway API resources. | true | +| `nginxGateway.leaderElection.lockName` | The name of the leader election lock. A Lease object with this name will be created in the same Namespace as the controller. | Autogenerated | +| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx | +| `nginx.image.tag` | The tag for the NGINX image. | edge | +| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always | +| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} | +| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated | +| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true | +| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer | +| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local | +| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} | +| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] | +| `metrics.disable` | Disable exposing metrics in the Prometheus format. | false | +| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] | 9113 | +| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. | false | diff --git a/deploy/helm-chart/templates/_helpers.tpl b/deploy/helm-chart/templates/_helpers.tpl index acdc0b16e9..f94eeb379f 100644 --- a/deploy/helm-chart/templates/_helpers.tpl +++ b/deploy/helm-chart/templates/_helpers.tpl @@ -66,3 +66,14 @@ Create the name of the ServiceAccount to use {{- define "nginx-gateway.serviceAccountName" -}} {{- default (include "nginx-gateway.fullname" .) .Values.serviceAccount.name }} {{- end }} + +{{/* +Expand leader election lock name. +*/}} +{{- define "nginx-gateway.leaderElectionName" -}} +{{- if .Values.nginxGateway.leaderElection.lockName -}} +{{ .Values.nginxGateway.leaderElection.lockName }} +{{- else -}} +{{- printf "%s-%s" (include "nginx-gateway.fullname" .) "leader-election" -}} +{{- end -}} +{{- end -}} diff --git a/deploy/helm-chart/templates/deployment.yaml b/deploy/helm-chart/templates/deployment.yaml index 6bd2536008..d1561cacfa 100644 --- a/deploy/helm-chart/templates/deployment.yaml +++ b/deploy/helm-chart/templates/deployment.yaml @@ -7,8 +7,7 @@ metadata: labels: {{- include "nginx-gateway.labels" . | nindent 4 }} spec: - # We only support a single replica for now - replicas: 1 + replicas: {{ .Values.nginxGateway.replicaCount }} selector: matchLabels: {{- include "nginx-gateway.selectorLabels" . | nindent 6 }} @@ -44,15 +43,24 @@ spec: {{- else }} - --health-disable {{- end }} + {{- if .Values.nginxGateway.leaderElection.enable }} + - --leader-election-lock-name={{ include "nginx-gateway.leaderElectionName" . }} + {{- else }} + - --leader-election-disable + {{- end }} env: - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP - - name: MY_NAMESPACE + - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name image: {{ .Values.nginxGateway.image.repository }}:{{ .Values.nginxGateway.image.tag | default .Chart.AppVersion }} imagePullPolicy: {{ .Values.nginxGateway.image.pullPolicy }} name: nginx-gateway diff --git a/deploy/helm-chart/templates/rbac.yaml b/deploy/helm-chart/templates/rbac.yaml index 0c245f4ee5..405c34525e 100644 --- a/deploy/helm-chart/templates/rbac.yaml +++ b/deploy/helm-chart/templates/rbac.yaml @@ -70,6 +70,16 @@ rules: - nginxgateways/status verbs: - update +{{- if .Values.nginxGateway.leaderElection.enable }} +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update +{{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/deploy/helm-chart/values.yaml b/deploy/helm-chart/values.yaml index 613c6d27f4..c45b488d8a 100644 --- a/deploy/helm-chart/values.yaml +++ b/deploy/helm-chart/values.yaml @@ -13,6 +13,17 @@ nginxGateway: logging: ## Log level. Supported values "info", "debug", "error". level: info + ## The number of replicas of the NGINX Kubernetes Gateway Deployment. + replicaCount: 1 + ## The configuration for leader election. + leaderElection: + ## Enable leader election. Leader election is used to avoid multiple replicas of the NGINX Kubernetes Gateway + ## reporting the status of the Gateway API resources. If not enabled, all replicas of NGINX Kubernetes Gateway + ## will update the statuses of the Gateway API resources. + enable: true + ## The name of the leader election lock. A Lease object with this name will be created in the same Namespace as + ## the controller. Autogenerated if not set or set to "". + lockName: "" ## Defines the settings for the control plane readiness probe. This probe returns Ready when the controller ## has started and configured NGINX to serve traffic. diff --git a/deploy/manifests/nginx-gateway.yaml b/deploy/manifests/nginx-gateway.yaml index 4c94453bfc..8adf00fc94 100644 --- a/deploy/manifests/nginx-gateway.yaml +++ b/deploy/manifests/nginx-gateway.yaml @@ -81,6 +81,14 @@ rules: - nginxgateways/status verbs: - update +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - update --- # Source: nginx-kubernetes-gateway/templates/rbac.yaml apiVersion: rbac.authorization.k8s.io/v1 @@ -111,7 +119,6 @@ metadata: app.kubernetes.io/instance: nginx-gateway app.kubernetes.io/version: "edge" spec: - # We only support a single replica for now replicas: 1 selector: matchLabels: @@ -134,15 +141,20 @@ spec: - --config=nginx-gateway-config - --metrics-port=9113 - --health-port=8081 + - --leader-election-lock-name=nginx-gateway-leader-election env: - name: POD_IP valueFrom: fieldRef: fieldPath: status.podIP - - name: MY_NAMESPACE + - name: POD_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name image: ghcr.io/nginxinc/nginx-kubernetes-gateway:edge imagePullPolicy: Always name: nginx-gateway diff --git a/docs/architecture.md b/docs/architecture.md index ebe95b83fa..007d605e87 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -40,10 +40,11 @@ The figure shows: cluster through the Kubernetes API by creating Kubernetes objects. - *Clients A* and *Clients B* connect to *Applications A* and *B*, respectively. This applications have been deployed by the corresponding users. -- The *NKG Pod*, [deployed by *Cluster Operator*](/docs/installation.md) in the Namespace *nginx-gateway*. This Pod -consists of two containers: `NGINX` and `NKG`. The *NKG* container interacts with the Kubernetes API to retrieve the -most up-to-date Gateway API resources created within the cluster. It then dynamically configures the *NGINX* -container based on these resources, ensuring proper alignment between the cluster state and the NGINX configuration. +- The *NKG Pod*, [deployed by *Cluster Operator*](/docs/installation.md) in the Namespace *nginx-gateway*. For +scalability and availability, you can have multiple replicas. This Pod consists of two containers: `NGINX` and `NKG`. +The *NKG* container interacts with the Kubernetes API to retrieve the most up-to-date Gateway API resources created +within the cluster. It then dynamically configures the *NGINX* container based on these resources, ensuring proper +alignment between the cluster state and the NGINX configuration. - *Gateway AB*, created by *Cluster Operator*, requests a point where traffic can be translated to Services within the cluster. This Gateway includes a listener with a hostname `*.example.com`. Application Developers have the ability to attach their application's routes to this Gateway if their application's hostname matches `*.example.com`. @@ -69,7 +70,7 @@ Next, let's explore the NKG Pod. ## The NGINX Kubernetes Gateway Pod -The NGINX Kubernetes Gateway consists of three containers: +The NGINX Kubernetes Gateway consists of two containers: 1. `nginx`: the data plane. Consists of an NGINX master process and NGINX worker processes. The master process controls the worker processes. The worker processes handle the client traffic and load balance the traffic to the backend @@ -94,8 +95,11 @@ these components. The following list provides a description of each connection, along with its corresponding type indicated in parentheses. To enhance readability, the suffix "process" has been omitted from the process descriptions below. -1. (HTTPS) *NKG* reads the *Kubernetes API* to get the latest versions of the resources in the cluster and writes to the -API to update the handled resources' statuses and emit events. +1. (HTTPS) + - Read: *NKG* reads the *Kubernetes API* to get the latest versions of the resources in the cluster. + - Write: *NKG* writes to the *Kubernetes API* to update the handled resources' statuses and emit events. If there's + more than one replica of *NKG* and [leader election](/deploy/helm-chart/README.md#configuration) is enabled, only + the *NKG* Pod that is leading will write statuses to the *Kubernetes API*. 2. (HTTP, HTTPS) *Prometheus* fetches the `controller-runtime` and NGINX metrics via an HTTP endpoint that *NKG* exposes. The default is :9113/metrics. Note: Prometheus is not required by NKG, the endpoint can be turned off. 3. (File I/O) diff --git a/docs/cli-help.md b/docs/cli-help.md index 60154b694c..0bb4985388 100644 --- a/docs/cli-help.md +++ b/docs/cli-help.md @@ -14,14 +14,17 @@ Usage: Flags: -| Name | Type | Description | -|------|------|-------------| -| `gateway-ctlr-name` | `string` | The name of the Gateway controller. The controller name must be of the form: `DOMAIN/PATH`. The controller's domain is `gateway.nginx.org`. | -| `gatewayclass` | `string` | The name of the GatewayClass resource. Every NGINX Gateway must have a unique corresponding GatewayClass resource. | +| Name | Type | Description | +|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `gateway-ctlr-name` | `string` | The name of the Gateway controller. The controller name must be of the form: `DOMAIN/PATH`. The controller's domain is `gateway.nginx.org`. | +| `gatewayclass` | `string` | The name of the GatewayClass resource. Every NGINX Gateway must have a unique corresponding GatewayClass resource. | | `gateway` | `string` | The namespaced name of the Gateway resource to use. Must be of the form: `NAMESPACE/NAME`. If not specified, the control plane will process all Gateways for the configured GatewayClass. However, among them, it will choose the oldest resource by creation timestamp. If the timestamps are equal, it will choose the resource that appears first in alphabetical order by {namespace}/{name}. | -| `config` | `string` | The name of the NginxGateway resource to be used for this controller's dynamic configuration. Lives in the same Namespace as the controller. | -| `metrics-disable` | `bool` | Disable exposing metrics in the Prometheus format. (default false) | -| `metrics-port` | `int` | Sets the port where the Prometheus metrics are exposed. Format: `[1024 - 65535]` (default `9113`) | -| `metrics-secure-serving` | `bool` | Configures if the metrics endpoint should be secured using https. Please note that this endpoint will be secured with a self-signed certificate. (default false) | -| `health-disable` | `bool` | Disable running the health probe server. (default false) | -| `health-port` | `int` | Set the port where the health probe server is exposed. Format: `[1024 - 65535]` (default `8081`) | +| `config` | `string` | The name of the NginxGateway resource to be used for this controller's dynamic configuration. Lives in the same Namespace as the controller. | +| `metrics-disable` | `bool` | Disable exposing metrics in the Prometheus format. (default false) | +| `metrics-listen-port` | `int` | Sets the port where the Prometheus metrics are exposed. Format: `[1024 - 65535]` (default `9113`) | +| `metrics-secure-serving` | `bool` | Configures if the metrics endpoint should be secured using https. Please note that this endpoint will be secured with a self-signed certificate. (default false) | +| `update-gatewayclass-status` | `bool` | Update the status of the GatewayClass resource. (default true) | +| `health-disable` | `bool` | Disable running the health probe server. (default false) | +| `health-port` | `int` | Set the port where the health probe server is exposed. Format: `[1024 - 65535]` (default `8081`) | +| `leader-election-disable` | `bool` | Disable leader election. Leader election is used to avoid multiple replicas of the NGINX Kubernetes Gateway reporting the status of the Gateway API resources. If disabled, all replicas of NGINX Kubernetes Gateway will update the statuses of the Gateway API resources. (default false) | +| `leader-election-lock-name` | `string` | The name of the leader election lock. A Lease object with this name will be created in the same Namespace as the controller. (default "nginx-gateway-leader-election-lock") | diff --git a/internal/framework/status/statuses.go b/internal/framework/status/statuses.go index 8de537d981..9da49c4b9f 100644 --- a/internal/framework/status/statuses.go +++ b/internal/framework/status/statuses.go @@ -4,9 +4,41 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/gateway-api/apis/v1beta1" + nkgAPI "github.com/nginxinc/nginx-kubernetes-gateway/apis/v1alpha1" "github.com/nginxinc/nginx-kubernetes-gateway/internal/framework/conditions" ) +// Status is the status of one or more Kubernetes resources that the StatusUpdater will update. +type Status interface { + // APIGroup returns the GroupName of the resources contained in the status + APIGroup() string +} + +// GatewayAPIStatuses holds the status-related information about Gateway API resources. +type GatewayAPIStatuses struct { + GatewayClassStatuses GatewayClassStatuses + GatewayStatuses GatewayStatuses + HTTPRouteStatuses HTTPRouteStatuses +} + +func (g GatewayAPIStatuses) APIGroup() string { + return v1beta1.GroupName +} + +// NginxGatewayStatus holds status-related information about the NginxGateway resource. +type NginxGatewayStatus struct { + // NsName is the NamespacedName of the NginxGateway resource. + NsName types.NamespacedName + // Conditions is the list of conditions for this NginxGateway. + Conditions []conditions.Condition + // ObservedGeneration is the generation of the resource that was processed. + ObservedGeneration int64 +} + +func (n *NginxGatewayStatus) APIGroup() string { + return nkgAPI.GroupName +} + // ListenerStatuses holds the statuses of listeners where the key is the name of a listener in the Gateway resource. type ListenerStatuses map[string]ListenerStatus @@ -19,14 +51,6 @@ type GatewayStatuses map[types.NamespacedName]GatewayStatus // GatewayClassStatuses holds the statuses of GatewayClasses where the key is the namespaced name of a GatewayClass. type GatewayClassStatuses map[types.NamespacedName]GatewayClassStatus -// Statuses holds the status-related information about resources. -type Statuses struct { - GatewayClassStatuses GatewayClassStatuses - GatewayStatuses GatewayStatuses - HTTPRouteStatuses HTTPRouteStatuses - NginxGatewayStatus *NginxGatewayStatus -} - // GatewayStatus holds the status of the winning Gateway resource. type GatewayStatus struct { // ListenerStatuses holds the statuses of listeners defined on the Gateway. @@ -72,13 +96,3 @@ type GatewayClassStatus struct { // ObservedGeneration is the generation of the resource that was processed. ObservedGeneration int64 } - -// NginxGatewayStatus holds status-related information about the NginxGateway resource. -type NginxGatewayStatus struct { - // NsName is the NamespacedName of the NginxGateway resource. - NsName types.NamespacedName - // Conditions is the list of conditions for this NginxGateway. - Conditions []conditions.Condition - // ObservedGeneration is the generation of the resource that was processed. - ObservedGeneration int64 -} diff --git a/internal/framework/status/statusfakes/fake_updater.go b/internal/framework/status/statusfakes/fake_updater.go index 53c2002b02..423b4f6584 100644 --- a/internal/framework/status/statusfakes/fake_updater.go +++ b/internal/framework/status/statusfakes/fake_updater.go @@ -9,21 +9,86 @@ import ( ) type FakeUpdater struct { - UpdateStub func(context.Context, status.Statuses) + DisableStub func() + disableMutex sync.RWMutex + disableArgsForCall []struct { + } + EnableStub func(context.Context) + enableMutex sync.RWMutex + enableArgsForCall []struct { + arg1 context.Context + } + UpdateStub func(context.Context, status.Status) updateMutex sync.RWMutex updateArgsForCall []struct { arg1 context.Context - arg2 status.Statuses + arg2 status.Status } invocations map[string][][]interface{} invocationsMutex sync.RWMutex } -func (fake *FakeUpdater) Update(arg1 context.Context, arg2 status.Statuses) { +func (fake *FakeUpdater) Disable() { + fake.disableMutex.Lock() + fake.disableArgsForCall = append(fake.disableArgsForCall, struct { + }{}) + stub := fake.DisableStub + fake.recordInvocation("Disable", []interface{}{}) + fake.disableMutex.Unlock() + if stub != nil { + fake.DisableStub() + } +} + +func (fake *FakeUpdater) DisableCallCount() int { + fake.disableMutex.RLock() + defer fake.disableMutex.RUnlock() + return len(fake.disableArgsForCall) +} + +func (fake *FakeUpdater) DisableCalls(stub func()) { + fake.disableMutex.Lock() + defer fake.disableMutex.Unlock() + fake.DisableStub = stub +} + +func (fake *FakeUpdater) Enable(arg1 context.Context) { + fake.enableMutex.Lock() + fake.enableArgsForCall = append(fake.enableArgsForCall, struct { + arg1 context.Context + }{arg1}) + stub := fake.EnableStub + fake.recordInvocation("Enable", []interface{}{arg1}) + fake.enableMutex.Unlock() + if stub != nil { + fake.EnableStub(arg1) + } +} + +func (fake *FakeUpdater) EnableCallCount() int { + fake.enableMutex.RLock() + defer fake.enableMutex.RUnlock() + return len(fake.enableArgsForCall) +} + +func (fake *FakeUpdater) EnableCalls(stub func(context.Context)) { + fake.enableMutex.Lock() + defer fake.enableMutex.Unlock() + fake.EnableStub = stub +} + +func (fake *FakeUpdater) EnableArgsForCall(i int) context.Context { + fake.enableMutex.RLock() + defer fake.enableMutex.RUnlock() + argsForCall := fake.enableArgsForCall[i] + return argsForCall.arg1 +} + +func (fake *FakeUpdater) Update(arg1 context.Context, arg2 status.Status) { fake.updateMutex.Lock() fake.updateArgsForCall = append(fake.updateArgsForCall, struct { arg1 context.Context - arg2 status.Statuses + arg2 status.Status }{arg1, arg2}) stub := fake.UpdateStub fake.recordInvocation("Update", []interface{}{arg1, arg2}) @@ -39,13 +104,13 @@ func (fake *FakeUpdater) UpdateCallCount() int { return len(fake.updateArgsForCall) } -func (fake *FakeUpdater) UpdateCalls(stub func(context.Context, status.Statuses)) { +func (fake *FakeUpdater) UpdateCalls(stub func(context.Context, status.Status)) { fake.updateMutex.Lock() defer fake.updateMutex.Unlock() fake.UpdateStub = stub } -func (fake *FakeUpdater) UpdateArgsForCall(i int) (context.Context, status.Statuses) { +func (fake *FakeUpdater) UpdateArgsForCall(i int) (context.Context, status.Status) { fake.updateMutex.RLock() defer fake.updateMutex.RUnlock() argsForCall := fake.updateArgsForCall[i] @@ -55,6 +120,10 @@ func (fake *FakeUpdater) UpdateArgsForCall(i int) (context.Context, status.Statu func (fake *FakeUpdater) Invocations() map[string][][]interface{} { fake.invocationsMutex.RLock() defer fake.invocationsMutex.RUnlock() + fake.disableMutex.RLock() + defer fake.disableMutex.RUnlock() + fake.enableMutex.RLock() + defer fake.enableMutex.RUnlock() fake.updateMutex.RLock() defer fake.updateMutex.RUnlock() copiedInvocations := map[string][][]interface{}{} diff --git a/internal/framework/status/updater.go b/internal/framework/status/updater.go index 49c29651c1..c11dc7a8a1 100644 --- a/internal/framework/status/updater.go +++ b/internal/framework/status/updater.go @@ -2,6 +2,8 @@ package status import ( "context" + "fmt" + "sync" "github.com/go-logr/logr" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -15,9 +17,17 @@ import ( //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Updater // Updater updates statuses of the Gateway API resources. +// Updater can be disabled. In this case, it will stop updating the statuses of resources, while +// always saving the statuses of the last Update call. This is used to support multiple replicas of +// control plane being able to run simultaneously where only the leader will update statuses. type Updater interface { // Update updates the statuses of the resources. - Update(context.Context, Statuses) + Update(context.Context, Status) + // Enable enables status updates. The updater will update the statuses in Kubernetes API to ensure they match the + // statuses of the last Update invocation. + Enable(ctx context.Context) + // Disable disables status updates. + Disable() } // UpdaterConfig holds configuration parameters for Updater. @@ -36,58 +46,140 @@ type UpdaterConfig struct { PodIP string // UpdateGatewayClassStatus enables updating the status of the GatewayClass resource. UpdateGatewayClassStatus bool + // LeaderElectionEnabled indicates whether Leader Election is enabled. + // If it is not enabled, the updater will always write statuses to the Kubernetes API. + LeaderElectionEnabled bool } -// updaterImpl updates statuses of the Gateway API resources. +// UpdaterImpl updates statuses of the Gateway API resources. // // It has the following limitations: // -// (1) It doesn't understand the leader election. Only the leader must report the statuses of the resources. Otherwise, -// multiple replicas will step on each other when trying to report statuses for the same resources. +// (1) It is not smart. It will update the status of a resource (make an API call) even if it hasn't changed. // -// (2) It is not smart. It will update the status of a resource (make an API call) even if it hasn't changed. -// -// (3) It is synchronous, which means the status reporter can slow down the event loop. +// (2) It is synchronous, which means the status reporter can slow down the event loop. // Consider the following cases: // (a) Sometimes the Gateway will need to update statuses of all resources it handles, which could be ~1000. Making 1000 // status API calls sequentially will take time. // (b) k8s API can become slow or even timeout. This will increase every update status API call. -// Making updaterImpl asynchronous will prevent it from adding variable delays to the event loop. +// Making UpdaterImpl asynchronous will prevent it from adding variable delays to the event loop. // -// (4) It doesn't retry on failures. This means there is a chance that some resources will not have up-to-do statuses. +// (3) It doesn't retry on failures. This means there is a chance that some resources will not have up-to-do statuses. // Statuses are important part of the Gateway API, so we need to ensure that the Gateway always keep the resources // statuses up-to-date. // -// (5) It doesn't clear the statuses of a resources that are no longer handled by the Gateway. For example, if +// (4) It doesn't clear the statuses of a resources that are no longer handled by the Gateway. For example, if // an HTTPRoute resource no longer has the parentRef to the Gateway resources, the Gateway must update the status // of the resource to remove the status about the removed parentRef. // -// (6) If another controllers changes the status of the Gateway/HTTPRoute resource so that the information set by our +// (5) If another controllers changes the status of the Gateway/HTTPRoute resource so that the information set by our // Gateway is removed, our Gateway will not restore the status until the EventLoop invokes the StatusUpdater as a // result of processing some other new change to a resource(s). // FIXME(pleshakov): Make updater production ready // https://github.com/nginxinc/nginx-kubernetes-gateway/issues/691 -// To support new resources, updaterImpl needs to be modified. Consider making updaterImpl extendable, so that it +// UpdaterImpl needs to be modified to support new resources. Consider making UpdaterImpl extendable, so that it // goes along the Open-closed principle. -type updaterImpl struct { - cfg UpdaterConfig +type UpdaterImpl struct { + lastStatuses lastStatuses + cfg UpdaterConfig + isLeader bool + + lock sync.Mutex +} + +// lastStatuses hold the last saved statuses. Used when leader election is enabled to write the last saved statuses on +// a leader change. +type lastStatuses struct { + nginxGateway *NginxGatewayStatus + gatewayAPI GatewayAPIStatuses +} + +// Enable writes the last saved statuses for the Gateway API resources. +// Used in leader election when the Pod starts leading. It's possible that during a leader change, +// some statuses are missed. This will ensure that the latest statuses are written when a new leader takes over. +func (upd *UpdaterImpl) Enable(ctx context.Context) { + defer upd.lock.Unlock() + upd.lock.Lock() + + upd.isLeader = true + + upd.cfg.Logger.Info("Writing last statuses") + upd.updateGatewayAPI(ctx, upd.lastStatuses.gatewayAPI) + upd.updateNginxGateway(ctx, upd.lastStatuses.nginxGateway) +} + +func (upd *UpdaterImpl) Disable() { + defer upd.lock.Unlock() + upd.lock.Lock() + + upd.isLeader = false } // NewUpdater creates a new Updater. -func NewUpdater(cfg UpdaterConfig) Updater { - return &updaterImpl{ +func NewUpdater(cfg UpdaterConfig) *UpdaterImpl { + return &UpdaterImpl{ cfg: cfg, + // If leader election is enabled then we should not start running as a leader. Instead, + // we wait for Enable to be invoked by the Leader Elector goroutine. + isLeader: !cfg.LeaderElectionEnabled, } } -func (upd *updaterImpl) Update(ctx context.Context, statuses Statuses) { +func (upd *UpdaterImpl) Update(ctx context.Context, status Status) { // FIXME(pleshakov) Merge the new Conditions in the status with the existing Conditions // https://github.com/nginxinc/nginx-kubernetes-gateway/issues/558 + defer upd.lock.Unlock() + upd.lock.Lock() + + switch s := status.(type) { + case *NginxGatewayStatus: + upd.updateNginxGateway(ctx, s) + case GatewayAPIStatuses: + upd.updateGatewayAPI(ctx, s) + default: + panic(fmt.Sprintf("unknown status type %T with group name %s", s, status.APIGroup())) + } +} + +func (upd *UpdaterImpl) updateNginxGateway(ctx context.Context, status *NginxGatewayStatus) { + upd.lastStatuses.nginxGateway = status + + if !upd.isLeader { + upd.cfg.Logger.Info("Skipping updating Nginx Gateway status because not leader") + return + } + + upd.cfg.Logger.Info("Updating Nginx Gateway status") + + if status != nil { + upd.writeStatuses(ctx, status.NsName, &nkgAPI.NginxGateway{}, func(object client.Object) { + ng := object.(*nkgAPI.NginxGateway) + ng.Status = nkgAPI.NginxGatewayStatus{ + Conditions: convertConditions( + status.Conditions, + status.ObservedGeneration, + upd.cfg.Clock.Now(), + ), + } + }) + } +} + +func (upd *UpdaterImpl) updateGatewayAPI(ctx context.Context, statuses GatewayAPIStatuses) { + upd.lastStatuses.gatewayAPI = statuses + + if !upd.isLeader { + upd.cfg.Logger.Info("Skipping updating Gateway API status because not leader") + return + } + + upd.cfg.Logger.Info("Updating Gateway API statuses") + if upd.cfg.UpdateGatewayClassStatus { for nsname, gcs := range statuses.GatewayClassStatuses { - upd.update(ctx, nsname, &v1beta1.GatewayClass{}, func(object client.Object) { + upd.writeStatuses(ctx, nsname, &v1beta1.GatewayClass{}, func(object client.Object) { gc := object.(*v1beta1.GatewayClass) gc.Status = prepareGatewayClassStatus(gcs, upd.cfg.Clock.Now()) }, @@ -96,7 +188,7 @@ func (upd *updaterImpl) Update(ctx context.Context, statuses Statuses) { } for nsname, gs := range statuses.GatewayStatuses { - upd.update(ctx, nsname, &v1beta1.Gateway{}, func(object client.Object) { + upd.writeStatuses(ctx, nsname, &v1beta1.Gateway{}, func(object client.Object) { gw := object.(*v1beta1.Gateway) gw.Status = prepareGatewayStatus(gs, upd.cfg.PodIP, upd.cfg.Clock.Now()) }) @@ -109,7 +201,7 @@ func (upd *updaterImpl) Update(ctx context.Context, statuses Statuses) { default: } - upd.update(ctx, nsname, &v1beta1.HTTPRoute{}, func(object client.Object) { + upd.writeStatuses(ctx, nsname, &v1beta1.HTTPRoute{}, func(object client.Object) { hr := object.(*v1beta1.HTTPRoute) // statuses.GatewayStatus is never nil when len(statuses.HTTPRouteStatuses) > 0 hr.Status = prepareHTTPRouteStatus( @@ -119,23 +211,9 @@ func (upd *updaterImpl) Update(ctx context.Context, statuses Statuses) { ) }) } - - ngStatus := statuses.NginxGatewayStatus - if ngStatus != nil { - upd.update(ctx, ngStatus.NsName, &nkgAPI.NginxGateway{}, func(object client.Object) { - ng := object.(*nkgAPI.NginxGateway) - ng.Status = nkgAPI.NginxGatewayStatus{ - Conditions: convertConditions( - ngStatus.Conditions, - ngStatus.ObservedGeneration, - upd.cfg.Clock.Now(), - ), - } - }) - } } -func (upd *updaterImpl) update( +func (upd *UpdaterImpl) writeStatuses( ctx context.Context, nsname types.NamespacedName, obj client.Object, diff --git a/internal/framework/status/updater_test.go b/internal/framework/status/updater_test.go index 0e7f9b7b41..c3ee8876e9 100644 --- a/internal/framework/status/updater_test.go +++ b/internal/framework/status/updater_test.go @@ -2,6 +2,7 @@ package status_test import ( "context" + "sync" "time" . "github.com/onsi/ginkgo/v2" @@ -21,6 +22,12 @@ import ( staticConds "github.com/nginxinc/nginx-kubernetes-gateway/internal/mode/static/state/conditions" ) +type unsupportedStatus struct{} + +func (u unsupportedStatus) APIGroup() string { + return "unsupported" +} + var _ = Describe("Updater", func() { const gcName = "my-class" @@ -61,7 +68,7 @@ var _ = Describe("Updater", func() { } var ( - updater status.Updater + updater *status.UpdaterImpl gc *v1beta1.GatewayClass gw, ignoredGw *v1beta1.Gateway hr *v1beta1.HTTPRoute @@ -72,8 +79,8 @@ var _ = Describe("Updater", func() { Value: "1.2.3.4", } - createStatuses = func(gens generations) status.Statuses { - return status.Statuses{ + createGwAPIStatuses = func(gens generations) status.GatewayAPIStatuses { + return status.GatewayAPIStatuses{ GatewayClassStatuses: status.GatewayClassStatuses{ {Name: gcName}: { ObservedGeneration: gens.gatewayClass, @@ -109,14 +116,17 @@ var _ = Describe("Updater", func() { }, }, }, - NginxGatewayStatus: &status.NginxGatewayStatus{ - NsName: types.NamespacedName{ - Namespace: "nginx-gateway", - Name: "nginx-gateway-config", - }, - ObservedGeneration: 3, - Conditions: status.CreateTestConditions("Test"), + } + } + + createNGStatus = func(gen int64) *status.NginxGatewayStatus { + return &status.NginxGatewayStatus{ + NsName: types.NamespacedName{ + Namespace: "nginx-gateway", + Name: "nginx-gateway-config", }, + ObservedGeneration: gen, + Conditions: status.CreateTestConditions("Test"), } } @@ -221,6 +231,22 @@ var _ = Describe("Updater", func() { }, } } + + createExpectedNGWithGeneration = func(gen int64) *nkgAPI.NginxGateway { + return &nkgAPI.NginxGateway{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "nginx-gateway", + Name: "nginx-gateway-config", + }, + TypeMeta: metav1.TypeMeta{ + Kind: "NginxGateway", + APIVersion: "gateway.nginx.org/v1alpha1", + }, + Status: nkgAPI.NginxGatewayStatus{ + Conditions: status.CreateExpectedAPIConditions("Test", gen, fakeClockTime), + }, + } + } ) BeforeAll(func() { @@ -293,8 +319,8 @@ var _ = Describe("Updater", func() { Expect(client.Create(context.Background(), ng)).Should(Succeed()) }) - It("should update statuses", func() { - updater.Update(context.Background(), createStatuses(generations{ + It("should update gateway API statuses", func() { + updater.Update(context.Background(), createGwAPIStatuses(generations{ gatewayClass: 1, gateways: 1, })) @@ -352,21 +378,13 @@ var _ = Describe("Updater", func() { Expect(helpers.Diff(expectedHR, latestHR)).To(BeEmpty()) }) + It("should update nginx gateway status", func() { + updater.Update(context.Background(), createNGStatus(1)) + }) + It("should have the updated status of NginxGateway in the API server", func() { latestNG := &nkgAPI.NginxGateway{} - expectedNG := &nkgAPI.NginxGateway{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "nginx-gateway", - Name: "nginx-gateway-config", - }, - TypeMeta: metav1.TypeMeta{ - Kind: "NginxGateway", - APIVersion: "gateway.nginx.org/v1alpha1", - }, - Status: nkgAPI.NginxGatewayStatus{ - Conditions: status.CreateExpectedAPIConditions("Test", 3, fakeClockTime), - }, - } + expectedNG := createExpectedNGWithGeneration(1) err := client.Get( context.Background(), @@ -383,7 +401,7 @@ var _ = Describe("Updater", func() { It("should update statuses with canceled context - function normally returns", func() { ctx, cancel := context.WithCancel(context.Background()) cancel() - updater.Update(ctx, createStatuses(generations{ + updater.Update(ctx, createGwAPIStatuses(generations{ gatewayClass: 2, gateways: 2, })) @@ -452,6 +470,162 @@ var _ = Describe("Updater", func() { Expect(helpers.Diff(expectedHR, latestHR)).To(BeEmpty()) }) }) + When("the Pod is not the current leader", func() { + It("should not update any statuses", func() { + updater.Disable() + updater.Update(context.Background(), createGwAPIStatuses(generations{ + gateways: 3, + })) + updater.Update(context.Background(), createNGStatus(2)) + }) + + It("should not have the updated status of Gateway in the API server", func() { + latestGw := &v1beta1.Gateway{} + // testing that the generation has not changed from 2 to 3 + expectedGw := createExpectedGwWithGeneration(2) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "test", Name: "gateway"}, + latestGw, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedGw.ResourceVersion = latestGw.ResourceVersion + + Expect(helpers.Diff(expectedGw, latestGw)).To(BeEmpty()) + }) + + It("should not have the updated status of the Nginx Gateway resource in the API server", func() { + latestNG := &nkgAPI.NginxGateway{} + expectedNG := createExpectedNGWithGeneration(1) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "nginx-gateway", Name: "nginx-gateway-config"}, + latestNG, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedNG.ResourceVersion = latestNG.ResourceVersion + + Expect(helpers.Diff(expectedNG, latestNG)).To(BeEmpty()) + }) + }) + When("the Pod starts leading", func() { + It("writes the last statuses", func() { + updater.Enable(context.Background()) + }) + + It("should have the updated status of Gateway in the API server", func() { + latestGw := &v1beta1.Gateway{} + expectedGw := createExpectedGwWithGeneration(3) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "test", Name: "gateway"}, + latestGw, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedGw.ResourceVersion = latestGw.ResourceVersion + + Expect(helpers.Diff(expectedGw, latestGw)).To(BeEmpty()) + }) + + It("should have the updated status of the Nginx Gateway resource in the API server", func() { + latestNG := &nkgAPI.NginxGateway{} + expectedNG := createExpectedNGWithGeneration(2) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "nginx-gateway", Name: "nginx-gateway-config"}, + latestNG, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedNG.ResourceVersion = latestNG.ResourceVersion + + Expect(helpers.Diff(expectedNG, latestNG)).To(BeEmpty()) + }) + }) + + When("the Pod is the current leader", func() { + It("should update Gateway API statuses", func() { + updater.Update(context.Background(), createGwAPIStatuses(generations{ + gateways: 4, + })) + }) + + It("should have the updated status of Gateway in the API server", func() { + latestGw := &v1beta1.Gateway{} + expectedGw := createExpectedGwWithGeneration(4) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "test", Name: "gateway"}, + latestGw, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedGw.ResourceVersion = latestGw.ResourceVersion + + Expect(helpers.Diff(expectedGw, latestGw)).To(BeEmpty()) + }) + + It("should update Nginx Gateway status", func() { + updater.Update(context.Background(), createNGStatus(3)) + }) + It("should have the updated status of Nginx Gateway in the API server", func() { + latestNG := &nkgAPI.NginxGateway{} + expectedNG := createExpectedNGWithGeneration(3) + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "nginx-gateway", Name: "nginx-gateway-config"}, + latestNG, + ) + Expect(err).Should(Not(HaveOccurred())) + + expectedNG.ResourceVersion = latestNG.ResourceVersion + + Expect(helpers.Diff(expectedNG, latestNG)).To(BeEmpty()) + }) + It("updates and writes last statuses synchronously", func() { + wg := &sync.WaitGroup{} + ctx := context.Background() + + // spin up 10 goroutines that Update and 10 that WriteLastStatuses + // and make sure that 20 updates were made to the Gateway resource. + for i := 0; i < 10; i++ { + wg.Add(2) + go func() { + updater.Update(ctx, createGwAPIStatuses(generations{gateways: 5})) + wg.Done() + }() + + go func() { + updater.Enable(ctx) + wg.Done() + }() + } + + wg.Wait() + + latestGw := &v1beta1.Gateway{} + + err := client.Get( + context.Background(), + types.NamespacedName{Namespace: "test", Name: "gateway"}, + latestGw, + ) + Expect(err).Should(Not(HaveOccurred())) + + // Before this test there were 5 updates to the Gateway resource. + // So now the resource version should equal 25. + Expect(latestGw.ResourceVersion).To(Equal("25")) + }) + }) }) Describe("Skip GatewayClass updates", Ordered, func() { @@ -489,7 +663,7 @@ var _ = Describe("Updater", func() { It("should not update GatewayClass status", func() { updater.Update( context.Background(), - status.Statuses{ + status.GatewayAPIStatuses{ GatewayClassStatuses: status.GatewayClassStatuses{ {Name: gcName}: { ObservedGeneration: 1, @@ -507,4 +681,24 @@ var _ = Describe("Updater", func() { Expect(latestGc.Status).To(BeZero()) }) }) + + Describe("Edge cases", func() { + It("panics on update if status type is unknown", func() { + updater := status.NewUpdater(status.UpdaterConfig{ + GatewayCtlrName: gatewayCtrlName, + GatewayClassName: gcName, + Client: client, + Logger: zap.New(), + Clock: fakeClock, + PodIP: "1.2.3.4", + UpdateGatewayClassStatus: true, + }) + + update := func() { + updater.Update(context.Background(), unsupportedStatus{}) + } + + Expect(update).Should(Panic()) + }) + }) }) diff --git a/internal/mode/provisioner/deployment.go b/internal/mode/provisioner/deployment.go index 9068c694bc..091052245e 100644 --- a/internal/mode/provisioner/deployment.go +++ b/internal/mode/provisioner/deployment.go @@ -2,6 +2,7 @@ package provisioner import ( "fmt" + "strings" v1 "k8s.io/api/apps/v1" "k8s.io/apimachinery/pkg/types" @@ -22,11 +23,21 @@ func prepareDeployment(depYAML []byte, id string, gwNsName types.NamespacedName) dep.Spec.Selector.MatchLabels["app"] = id dep.Spec.Template.ObjectMeta.Labels["app"] = id - extraArgs := []string{ + finalArgs := []string{ "--gateway=" + gwNsName.String(), "--update-gatewayclass-status=false", } - dep.Spec.Template.Spec.Containers[0].Args = append(dep.Spec.Template.Spec.Containers[0].Args, extraArgs...) + + for _, arg := range dep.Spec.Template.Spec.Containers[0].Args { + if strings.Contains(arg, "leader-election-lock-name") { + lockNameArg := "--leader-election-lock-name=" + gwNsName.Name + finalArgs = append(finalArgs, lockNameArg) + } else { + finalArgs = append(finalArgs, arg) + } + } + + dep.Spec.Template.Spec.Containers[0].Args = finalArgs return dep, nil } diff --git a/internal/mode/provisioner/handler.go b/internal/mode/provisioner/handler.go index e7e69d7713..3dafc663a5 100644 --- a/internal/mode/provisioner/handler.go +++ b/internal/mode/provisioner/handler.go @@ -54,7 +54,7 @@ func newEventHandler( } func (h *eventHandler) setGatewayClassStatuses(ctx context.Context) { - statuses := status.Statuses{ + statuses := status.GatewayAPIStatuses{ GatewayClassStatuses: make(status.GatewayClassStatuses), } diff --git a/internal/mode/provisioner/handler_test.go b/internal/mode/provisioner/handler_test.go index bf6549f636..2ef1bddff3 100644 --- a/internal/mode/provisioner/handler_test.go +++ b/internal/mode/provisioner/handler_test.go @@ -144,6 +144,8 @@ var _ = Describe("handler", func() { expectedGwFlag := fmt.Sprintf("--gateway=%s", gwNsName.String()) Expect(dep.Spec.Template.Spec.Containers[0].Args).To(ContainElement(expectedGwFlag)) Expect(dep.Spec.Template.Spec.Containers[0].Args).To(ContainElement("--update-gatewayclass-status=false")) + expectedLockFlag := fmt.Sprintf("--leader-election-lock-name=%s", gwNsName.Name) + Expect(dep.Spec.Template.Spec.Containers[0].Args).To(ContainElement(expectedLockFlag)) } itShouldPanicWhenUpsertingGateway := func(gwNsName types.NamespacedName) { diff --git a/internal/mode/static/build_statuses.go b/internal/mode/static/build_statuses.go index 4b8c7aae77..82ef6edf4b 100644 --- a/internal/mode/static/build_statuses.go +++ b/internal/mode/static/build_statuses.go @@ -15,9 +15,9 @@ type nginxReloadResult struct { error error } -// buildStatuses builds status.Statuses from a Graph. -func buildStatuses(graph *graph.Graph, nginxReloadRes nginxReloadResult) status.Statuses { - statuses := status.Statuses{ +// buildGatewayAPIStatuses builds status.Statuses from a Graph. +func buildGatewayAPIStatuses(graph *graph.Graph, nginxReloadRes nginxReloadResult) status.GatewayAPIStatuses { + statuses := status.GatewayAPIStatuses{ HTTPRouteStatuses: make(status.HTTPRouteStatuses), } diff --git a/internal/mode/static/build_statuses_test.go b/internal/mode/static/build_statuses_test.go index f0283ded20..b84c3f0bf2 100644 --- a/internal/mode/static/build_statuses_test.go +++ b/internal/mode/static/build_statuses_test.go @@ -135,7 +135,7 @@ func TestBuildStatuses(t *testing.T) { Routes: routes, } - expected := status.Statuses{ + expected := status.GatewayAPIStatuses{ GatewayClassStatuses: status.GatewayClassStatuses{ {Name: ""}: { ObservedGeneration: 1, @@ -196,7 +196,7 @@ func TestBuildStatuses(t *testing.T) { g := NewWithT(t) var nginxReloadRes nginxReloadResult - result := buildStatuses(graph, nginxReloadRes) + result := buildGatewayAPIStatuses(graph, nginxReloadRes) g.Expect(helpers.Diff(expected, result)).To(BeEmpty()) } @@ -246,7 +246,7 @@ func TestBuildStatusesNginxErr(t *testing.T) { Routes: routes, } - expected := status.Statuses{ + expected := status.GatewayAPIStatuses{ GatewayClassStatuses: status.GatewayClassStatuses{}, GatewayStatuses: status.GatewayStatuses{ {Namespace: "test", Name: "gateway"}: { @@ -288,7 +288,7 @@ func TestBuildStatusesNginxErr(t *testing.T) { g := NewWithT(t) nginxReloadRes := nginxReloadResult{error: errors.New("test error")} - result := buildStatuses(graph, nginxReloadRes) + result := buildGatewayAPIStatuses(graph, nginxReloadRes) g.Expect(helpers.Diff(expected, result)).To(BeEmpty()) } diff --git a/internal/mode/static/config/config.go b/internal/mode/static/config/config.go index de92fdfd2c..0a6910afc7 100644 --- a/internal/mode/static/config/config.go +++ b/internal/mode/static/config/config.go @@ -7,23 +7,25 @@ import ( ) type Config struct { - // GatewayCtlrName is the name of this controller. - GatewayCtlrName string - // ConfigName is the name of the NginxGateway resource for this controller. - ConfigName string - // Logger is the Zap Logger used by all components. - Logger logr.Logger // AtomicLevel is an atomically changeable, dynamic logging level. AtomicLevel zap.AtomicLevel // GatewayNsName is the namespaced name of a Gateway resource that the Gateway will use. // The Gateway will ignore all other Gateway resources. GatewayNsName *types.NamespacedName + // Logger is the Zap Logger used by all components. + Logger logr.Logger + // GatewayCtlrName is the name of this controller. + GatewayCtlrName string + // ConfigName is the name of the NginxGateway resource for this controller. + ConfigName string // GatewayClassName is the name of the GatewayClass resource that the Gateway will use. GatewayClassName string // PodIP is the IP address of this Pod. PodIP string // Namespace is the Namespace of this Pod. Namespace string + // LeaderElection contains the configuration for leader election. + LeaderElection LeaderElection // UpdateGatewayClassStatus enables updating the status of the GatewayClass resource. UpdateGatewayClassStatus bool // MetricsConfig specifies the metrics config. @@ -49,3 +51,13 @@ type HealthConfig struct { // Enabled is the flag for toggling the health probe server on or off. Enabled bool } + +// LeaderElection contains the configuration for leader election. +type LeaderElection struct { + // LockName holds the name of the leader election lock. + LockName string + // Identity is the unique name of the controller used for identifying the leader. + Identity string + // Enabled indicates whether leader election is enabled. + Enabled bool +} diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index d69f45cb6e..442d7ba171 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -114,7 +114,7 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev } } - h.cfg.statusUpdater.Update(ctx, buildStatuses(graph, nginxReloadRes)) + h.cfg.statusUpdater.Update(ctx, buildGatewayAPIStatuses(graph, nginxReloadRes)) } func (h *eventHandlerImpl) updateNginx(ctx context.Context, conf dataplane.Configuration) error { @@ -157,15 +157,13 @@ func (h *eventHandlerImpl) updateControlPlaneAndSetStatus(ctx context.Context, c } if cfg != nil { - statuses := status.Statuses{ - NginxGatewayStatus: &status.NginxGatewayStatus{ - NsName: client.ObjectKeyFromObject(cfg), - Conditions: cond, - ObservedGeneration: cfg.Generation, - }, + nginxGatewayStatus := &status.NginxGatewayStatus{ + NsName: client.ObjectKeyFromObject(cfg), + Conditions: cond, + ObservedGeneration: cfg.Generation, } - h.cfg.statusUpdater.Update(ctx, statuses) + h.cfg.statusUpdater.Update(ctx, nginxGatewayStatus) h.cfg.logger.Info("Reconfigured control plane.") } } diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index 007a6cca52..04b68e3b74 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -169,16 +169,14 @@ var _ = Describe("eventHandler", func() { } } - expStatuses := func(cond conditions.Condition) status.Statuses { - return status.Statuses{ - NginxGatewayStatus: &status.NginxGatewayStatus{ - NsName: types.NamespacedName{ - Namespace: namespace, - Name: configName, - }, - Conditions: []conditions.Condition{cond}, - ObservedGeneration: 0, + expStatuses := func(cond conditions.Condition) *status.NginxGatewayStatus { + return &status.NginxGatewayStatus{ + NsName: types.NamespacedName{ + Namespace: namespace, + Name: configName, }, + Conditions: []conditions.Condition{cond}, + ObservedGeneration: 0, } } @@ -207,7 +205,8 @@ var _ = Describe("eventHandler", func() { event := <-fakeEventRecorder.Events Expect(event).To(Equal( "Warning UpdateFailed Failed to update control plane configuration: logging.level: Unsupported value: " + - "\"invalid\": supported values: \"info\", \"debug\", \"error\"")) + "\"invalid\": supported values: \"info\", \"debug\", \"error\"", + )) Expect(handler.cfg.logLevelSetter.Enabled(zap.InfoLevel)).To(BeTrue()) }) diff --git a/internal/mode/static/leader.go b/internal/mode/static/leader.go new file mode 100644 index 0000000000..f80f4087f6 --- /dev/null +++ b/internal/mode/static/leader.go @@ -0,0 +1,90 @@ +package static + +import ( + "context" + "fmt" + "time" + + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" + "k8s.io/client-go/tools/record" +) + +const ( + // These values are the defaults used by the core client. + renewDeadline = 10 * time.Second + leaseDuration = 15 * time.Second + retryPeriod = 2 * time.Second +) + +// leaderElectorRunnableConfig holds all the configuration for the leaderElector struct. +type leaderElectorRunnableConfig struct { + // kubeConfig is the kube config for the cluster. Used to create coreV1 and coordinationV1 clients which are needed + // for leader election. + kubeConfig *rest.Config + // recorder is the Kubernetes event recorder. Used to record events on the lease lock. + recorder record.EventRecorder + // onStartedLeading is the callback that is invoked asynchronously when the Pod starts leading. + onStartedLeading func(ctx context.Context) + // onStoppedLeading is the callback that is invoked asynchronously when the Pod stops leading. + onStoppedLeading func() + // lockNs is the namespace where the LeaseLock resource lives. + lockNs string + // lockName is the name of the LeaseLock resource. + lockName string + // identity is the unique name of this Pod. Used to identify the leader. + identity string +} + +// leaderElectorRunnable wraps a leaderelection.LeaderElector so that it implements the manager.Runnable interface +// and can be managed by the manager. +type leaderElectorRunnable struct { + le *leaderelection.LeaderElector +} + +// Start runs the leaderelection.LeaderElector and blocks until the context is canceled or Run returns. +func (l *leaderElectorRunnable) Start(ctx context.Context) error { + l.le.Run(ctx) + return nil +} + +// IsLeader returns if the Pod is the current leader. +func (l *leaderElectorRunnable) IsLeader() bool { + return l.le.IsLeader() +} + +// newLeaderElector returns a new leader elector client. +func newLeaderElectorRunnable(config leaderElectorRunnableConfig) (*leaderElectorRunnable, error) { + lock, err := resourcelock.NewFromKubeconfig( + resourcelock.LeasesResourceLock, + config.lockNs, + config.lockName, + resourcelock.ResourceLockConfig{ + Identity: config.identity, + EventRecorder: config.recorder, + }, + config.kubeConfig, + renewDeadline, + ) + if err != nil { + return nil, fmt.Errorf("error creating lease lock for leader election: %w", err) + } + + leaderElector, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ + Lock: lock, + LeaseDuration: leaseDuration, + RenewDeadline: renewDeadline, + RetryPeriod: retryPeriod, + Callbacks: leaderelection.LeaderCallbacks{ + OnStartedLeading: config.onStartedLeading, + OnStoppedLeading: config.onStoppedLeading, + }, + Name: lock.Describe(), + }) + if err != nil { + return nil, fmt.Errorf("error creating leader elector: %w", err) + } + + return &leaderElectorRunnable{le: leaderElector}, nil +} diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 90e65db6d3..a140f581d9 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -133,15 +133,19 @@ func StartManager(cfg config.Config) error { Logger: cfg.Logger.WithName("statusUpdater"), Clock: status.NewRealClock(), UpdateGatewayClassStatus: cfg.UpdateGatewayClassStatus, + LeaderElectionEnabled: cfg.LeaderElection.Enabled, }) eventHandler := newEventHandlerImpl(eventHandlerConfig{ - processor: processor, - serviceResolver: resolver.NewServiceResolverImpl(mgr.GetClient()), - generator: ngxcfg.NewGeneratorImpl(), - logger: cfg.Logger.WithName("eventHandler"), - logLevelSetter: logLevelSetter, - nginxFileMgr: file.NewManagerImpl(cfg.Logger.WithName("nginxFileManager"), file.NewStdLibOSFileManager()), + processor: processor, + serviceResolver: resolver.NewServiceResolverImpl(mgr.GetClient()), + generator: ngxcfg.NewGeneratorImpl(), + logger: cfg.Logger.WithName("eventHandler"), + logLevelSetter: logLevelSetter, + nginxFileMgr: file.NewManagerImpl( + cfg.Logger.WithName("nginxFileManager"), + file.NewStdLibOSFileManager(), + ), nginxRuntimeMgr: ngxruntime.NewManagerImpl(), statusUpdater: statusUpdater, eventRecorder: recorder, @@ -162,6 +166,33 @@ func StartManager(cfg config.Config) error { return fmt.Errorf("cannot register event loop: %w", err) } + leaderElectorLogger := cfg.Logger.WithName("leaderElector") + + if cfg.LeaderElection.Enabled { + leaderElector, err := newLeaderElectorRunnable(leaderElectorRunnableConfig{ + kubeConfig: clusterCfg, + recorder: recorder, + onStartedLeading: func(ctx context.Context) { + leaderElectorLogger.Info("Started leading") + statusUpdater.Enable(ctx) + }, + onStoppedLeading: func() { + leaderElectorLogger.Info("Stopped leading") + statusUpdater.Disable() + }, + lockNs: cfg.Namespace, + lockName: cfg.LeaderElection.LockName, + identity: cfg.LeaderElection.Identity, + }) + if err != nil { + return err + } + + if err = mgr.Add(leaderElector); err != nil { + return fmt.Errorf("cannot register leader elector: %w", err) + } + } + // Ensure NGINX is running before registering metrics & starting the manager. if err := ngxruntime.EnsureNginxRunning(ctx); err != nil { return fmt.Errorf("NGINX is not running: %w", err) @@ -271,7 +302,6 @@ func registerControllers( return fmt.Errorf("cannot register controller for %T: %w", regCfg.objectType, err) } } - return nil }