From c3060b9c2f7d76b625d22c5226ca7fe7fc78d4c4 Mon Sep 17 00:00:00 2001 From: ChristianZaccaria Date: Thu, 11 Apr 2024 18:41:10 +0100 Subject: [PATCH 1/2] RC Controller - Avoid multiple calls to DiscoveryAPI --- main.go | 6 +++--- pkg/controllers/raycluster_controller.go | 25 ++++++++++++++---------- pkg/controllers/support.go | 25 +++++++++++++++--------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/main.go b/main.go index 752f146f2..8a129e806 100644 --- a/main.go +++ b/main.go @@ -146,9 +146,9 @@ func main() { }) exitOnError(err, "unable to start manager") - v, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) - if v { - rayClusterController := controllers.RayClusterReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: cfg} + ok, err := HasAPIResourceForGVK(kubeClient.DiscoveryClient, rayv1.GroupVersion.WithKind("RayCluster")) + if ok { + rayClusterController := controllers.RayClusterReconciler{Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: cfg.KubeRay} exitOnError(rayClusterController.SetupWithManager(mgr), "Error setting up RayCluster controller") } else if err != nil { exitOnError(err, "Could not determine if RayCluster CR present on cluster.") diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index a098f9b27..769a5e099 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -48,11 +48,13 @@ import ( // RayClusterReconciler reconciles a RayCluster object type RayClusterReconciler struct { client.Client - kubeClient *kubernetes.Clientset - routeClient *routev1client.RouteV1Client - Scheme *runtime.Scheme - CookieSalt string - Config *config.CodeFlareOperatorConfiguration + kubeClient *kubernetes.Clientset + routeClient *routev1client.RouteV1Client + Scheme *runtime.Scheme + CookieSalt string + Config *config.KubeRayConfiguration + IsOpenShift bool + IsOpenShiftInitialized bool } const ( @@ -105,8 +107,10 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } isLocalInteractive := annotationBoolVal(ctx, &cluster, "sdk.codeflare.dev/local_interactive", false) - ingressDomain := "" // FIX - CFO will retrieve it. - isOpenShift, ingressHost := getClusterType(ctx, r.kubeClient, &cluster, ingressDomain) + if !r.IsOpenShiftInitialized { + r.IsOpenShift = isOpenShift(ctx, r.kubeClient, &cluster) + r.IsOpenShiftInitialized = true + } if cluster.ObjectMeta.DeletionTimestamp.IsZero() { if !controllerutil.ContainsFinalizer(&cluster, oAuthFinalizer) { @@ -141,7 +145,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - if cluster.Status.State != "suspended" && r.isRayDashboardOAuthEnabled() && isOpenShift { + if cluster.Status.State != "suspended" && r.isRayDashboardOAuthEnabled() && r.IsOpenShift { logger.Info("Creating OAuth Objects") _, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredClusterRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { @@ -182,9 +186,10 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } - } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !isOpenShift { + } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !r.IsOpenShift { + ingressDomain := "" logger.Info("Creating Dashboard Ingress") - _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, ingressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, getIngressHost(ctx, r.kubeClient, &cluster, ingressDomain)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { // This log is info level since errors are not fatal and are expected logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, true) diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 348cd03de..44b21bda9 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -126,42 +126,49 @@ func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error) // Check where we are running. We are trying to distinguish here whether // this is vanilla kubernetes cluster or Openshift -func getClusterType(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressDomain string) (bool, string) { +func isOpenShift(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster) bool { // The discovery package is used to discover APIs supported by a Kubernetes API server. logger := ctrl.LoggerFrom(ctx) config, err := ctrl.GetConfig() if err != nil && config == nil { logger.Info("Cannot retrieve config, assuming we're on Vanilla Kubernetes") - return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingressDomain) + return false } dclient, err := getDiscoveryClient(config) if err != nil && dclient == nil { logger.Info("Cannot retrieve a DiscoveryClient, assuming we're on Vanilla Kubernetes") - return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingressDomain) + return false } apiGroupList, err := dclient.ServerGroups() if err != nil { logger.Info("Error while querying ServerGroups, assuming we're on Vanilla Kubernetes") - return false, "" + return false } for i := 0; i < len(apiGroupList.Groups); i++ { if strings.HasSuffix(apiGroupList.Groups[i].Name, ".openshift.io") { logger.Info("We detected being on OpenShift!") - return true, "" + return true } } + logger.Info("We detected being on Vanilla Kubernetes!") + return false +} + +// getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. +func getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressDomain string) string { + logger := ctrl.LoggerFrom(ctx) onKind, _ := isOnKindCluster(clientset) if onKind && ingressDomain == "" { logger.Info("We detected being on a KinD cluster!") - return false, "kind" + return "kind" } logger.Info("We detected being on Vanilla Kubernetes!") - return false, fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingressDomain) + return fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingressDomain) } func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { - if r.Config != nil && r.Config.KubeRay != nil && r.Config.KubeRay.RayDashboardOAuthEnabled != nil { - return *r.Config.KubeRay.RayDashboardOAuthEnabled + if r.Config != nil && r.Config.RayDashboardOAuthEnabled != nil { + return *r.Config.RayDashboardOAuthEnabled } return true } From ca16f64502753f549d841ddd6875df88d9c63579 Mon Sep 17 00:00:00 2001 From: ChristianZaccaria Date: Fri, 12 Apr 2024 11:33:27 +0100 Subject: [PATCH 2/2] Add TODO notifier on ingressDomain --- pkg/controllers/raycluster_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index 769a5e099..db13f0854 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -187,7 +187,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !r.IsOpenShift { - ingressDomain := "" + ingressDomain := "" // TODO: ingressDomain should be retrieved by the CFO and used here to fix local_interactive. Jira: https://issues.redhat.com/browse/RHOAIENG-5330 logger.Info("Creating Dashboard Ingress") _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, getIngressHost(ctx, r.kubeClient, &cluster, ingressDomain)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil {