From 1a5b69330c800a433bc3bd6825a2d44066995f18 Mon Sep 17 00:00:00 2001 From: Ignas Baranauskas Date: Wed, 24 Jul 2024 11:07:02 +0100 Subject: [PATCH 1/2] Add a check for head pod imagePullSecrets --- pkg/controllers/raycluster_controller.go | 52 +++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index d0c3cbabf..5691a92f4 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -213,6 +213,10 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: requeueTime}, err } + if err := r.deleteHeadPodIfMissingImagePullSecrets(ctx, cluster); err != nil { + return ctrl.Result{RequeueAfter: requeueTime}, err + } + _, err = r.kubeClient.RbacV1().ClusterRoleBindings().Apply(ctx, desiredOAuthClusterRoleBinding(cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update OAuth ClusterRoleBinding") @@ -470,6 +474,7 @@ func generateCACertificate() ([]byte, []byte, error) { return privateKeyPem, certPem, nil } + func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.NetworkPolicyApplyConfiguration { return networkingv1ac.NetworkPolicy(cluster.Name+"-workers", cluster.Namespace). WithLabels(map[string]string{RayClusterNameLabel: cluster.Name}). @@ -486,6 +491,7 @@ func desiredWorkersNetworkPolicy(cluster *rayv1.RayCluster) *networkingv1ac.Netw metav1ac.OwnerReference().WithUID(cluster.UID).WithName(cluster.Name).WithKind(cluster.Kind).WithAPIVersion(cluster.APIVersion).WithController(true), ) } + func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConfiguration, kubeRayNamespaces []string) *networkingv1ac.NetworkPolicyApplyConfiguration { allSecuredPorts := []*networkingv1ac.NetworkPolicyPortApplyConfiguration{ networkingv1ac.NetworkPolicyPort().WithProtocol(corev1.ProtocolTCP).WithPort(intstr.FromInt(8443)), @@ -544,6 +550,49 @@ func desiredHeadNetworkPolicy(cluster *rayv1.RayCluster, cfg *config.KubeRayConf ) } +func (r *RayClusterReconciler) deleteHeadPodIfMissingImagePullSecrets(ctx context.Context, cluster *rayv1.RayCluster) error { + serviceAccount, err := r.kubeClient.CoreV1().ServiceAccounts(cluster.Namespace).Get(ctx, oauthServiceAccountNameFromCluster(cluster), metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get OAuth ServiceAccount: %w", err) + } + + headPod, err := getHeadPod(ctx, r, cluster) + if err != nil { + return fmt.Errorf("failed to get head pod: %w", err) + } + + if headPod == nil { + return nil + } + + missingSecrets := map[string]bool{} + for _, secret := range serviceAccount.ImagePullSecrets { + missingSecrets[secret.Name] = true + } + for _, secret := range headPod.Spec.ImagePullSecrets { + delete(missingSecrets, secret.Name) + } + if len(missingSecrets) > 0 { + if err := r.kubeClient.CoreV1().Pods(headPod.Namespace).Delete(ctx, headPod.Name, metav1.DeleteOptions{}); err != nil { + return fmt.Errorf("failed to delete head pod: %w", err) + } + } + return nil +} + +func getHeadPod(ctx context.Context, r *RayClusterReconciler, cluster *rayv1.RayCluster) (*corev1.Pod, error) { + podList, err := r.kubeClient.CoreV1().Pods(cluster.Namespace).List(ctx, metav1.ListOptions{ + LabelSelector: fmt.Sprintf("ray.io/node-type=head,ray.io/cluster=%s", cluster.Name), + }) + if err != nil { + return nil, err + } + if len(podList.Items) > 0 { + return &podList.Items[0], nil + } + return nil, nil +} + // SetupWithManager sets up the controller with the Manager. func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { r.kubeClient = kubernetes.NewForConfigOrDie(mgr.GetConfig()) @@ -577,7 +626,8 @@ func (r *RayClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { NamespacedName: client.ObjectKey{ Name: name, Namespace: namespace, - }}} + }, + }} }), ) if r.IsOpenShift { From 16441e224f618a89d21e0f927c0eafab761df077 Mon Sep 17 00:00:00 2001 From: Ignas Baranauskas Date: Thu, 25 Jul 2024 11:21:00 +0100 Subject: [PATCH 2/2] Unit test for head pod imagePullSecrets --- pkg/controllers/raycluster_controller_test.go | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/pkg/controllers/raycluster_controller_test.go b/pkg/controllers/raycluster_controller_test.go index 94958b334..e0a7e969c 100644 --- a/pkg/controllers/raycluster_controller_test.go +++ b/pkg/controllers/raycluster_controller_test.go @@ -33,7 +33,7 @@ import ( var _ = Describe("RayCluster controller", func() { Context("RayCluster controller test", func() { - var rayClusterName = "test-raycluster" + rayClusterName := "test-raycluster" var namespaceName string BeforeEach(func(ctx SpecContext) { By("Creating a namespace for running the tests.") @@ -145,6 +145,53 @@ var _ = Describe("RayCluster controller", func() { }).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceName, Equal(foundRayCluster.Name))) }) + It("should delete the head pod if missing image pull secrets", func(ctx SpecContext) { + foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{}) + Expect(err).To(Not(HaveOccurred())) + + Eventually(func() (*corev1.ServiceAccount, error) { + return k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{}) + }).WithTimeout(time.Second * 10).Should(WithTransform(OwnerReferenceKind, Equal("RayCluster"))) + + headPodName := "head-pod" + headPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: headPodName, + Namespace: namespaceName, + Labels: map[string]string{ + "ray.io/node-type": "head", + "ray.io/cluster": foundRayCluster.Name, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "head-container", + Image: "busybox", + }, + }, + }, + } + _, err = k8sClient.CoreV1().Pods(namespaceName).Create(ctx, headPod, metav1.CreateOptions{}) + Expect(err).To(Not(HaveOccurred())) + + Eventually(func() (*corev1.Pod, error) { + return k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{}) + }).WithTimeout(time.Second * 10).ShouldNot(BeNil()) + + sa, err := k8sClient.CoreV1().ServiceAccounts(namespaceName).Get(ctx, oauthServiceAccountNameFromCluster(foundRayCluster), metav1.GetOptions{}) + Expect(err).To(Not(HaveOccurred())) + + sa.ImagePullSecrets = append(sa.ImagePullSecrets, corev1.LocalObjectReference{Name: "test-image-pull-secret"}) + _, err = k8sClient.CoreV1().ServiceAccounts(namespaceName).Update(ctx, sa, metav1.UpdateOptions{}) + Expect(err).To(Not(HaveOccurred())) + + Eventually(func() error { + _, err := k8sClient.CoreV1().Pods(namespaceName).Get(ctx, headPodName, metav1.GetOptions{}) + return err + }).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound)) + }) + It("should remove CRB when the RayCluster is deleted", func(ctx SpecContext) { foundRayCluster, err := rayClient.RayV1().RayClusters(namespaceName).Get(ctx, rayClusterName, metav1.GetOptions{}) Expect(err).To(Not(HaveOccurred())) @@ -157,7 +204,6 @@ var _ = Describe("RayCluster controller", func() { return err }).WithTimeout(time.Second * 10).Should(Satisfy(errors.IsNotFound)) }) - }) })