minor fixes and unit tests additions

MichaelClifford · openshift-merge-robot · commit 54a5a12a1038 · 2023-09-25T13:05:54.000-04:00
diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py
@@ -614,6 +614,15 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
         worker_gpu=0,  # hard to detect currently how many gpus, can override it with what the user asked for
         namespace=rc["metadata"]["namespace"],
         dashboard=ray_route,
+        head_cpus=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][
+            "resources"
+        ]["limits"]["cpu"],
+        head_mem=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][
+            "resources"
+        ]["limits"]["memory"],
+        head_gpu=rc["spec"]["headGroupSpec"]["template"]["spec"]["containers"][0][
+            "resources"
+        ]["limits"]["nvidia.com/gpu"],
     )
 
 
@@ -644,6 +653,9 @@ def _copy_to_ray(cluster: Cluster) -> RayCluster:
         worker_gpu=cluster.config.num_gpus,
         namespace=cluster.config.namespace,
         dashboard=cluster.cluster_dashboard_uri(),
+        head_cpus=cluster.config.head_cpus,
+        head_mem=cluster.config.head_memory,
+        head_gpu=cluster.config.head_gpus,
     )
     if ray.status == CodeFlareClusterStatus.READY:
         ray.status = RayClusterStatus.READY
diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py
@@ -107,7 +107,17 @@ def update_priority(yaml, item, dispatch_priority, priority_val):
 
 
 def update_custompodresources(
-    item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, head_cpus, head_memory, head_gpus):
+    item,
+    min_cpu,
+    max_cpu,
+    min_memory,
+    max_memory,
+    gpu,
+    workers,
+    head_cpus,
+    head_memory,
+    head_gpus,
+):
     if "custompodresources" in item.keys():
         custompodresources = item.get("custompodresources")
         for i in range(len(custompodresources)):
@@ -120,8 +130,8 @@ def update_custompodresources(
                 resource["limits"]["memory"] = str(head_memory) + "G"
                 resource["requests"]["nvidia.com/gpu"] = head_gpus
                 resource["limits"]["nvidia.com/gpu"] = head_gpus
-     
-            else: 
+
+            else:
                 for k, v in resource.items():
                     if k == "replicas" and i == 1:
                         resource[k] = workers
@@ -217,8 +227,8 @@ def update_nodes(
 ):
     if "generictemplate" in item.keys():
         head = item.get("generictemplate").get("spec").get("headGroupSpec")
-        head["rayStartParams"]["num_gpus"] = str(int(head_gpus))
-        
+        head["rayStartParams"]["num-gpus"] = str(int(head_gpus))
+
         worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0]
         # Head counts as first worker
         worker["replicas"] = workers
@@ -235,7 +245,9 @@ def update_nodes(
             update_env(spec, env)
             if comp == head:
                 # TODO: Eventually add head node configuration outside of template
-                update_resources(spec, head_cpus, head_cpus, head_memory, head_memory, head_gpus)
+                update_resources(
+                    spec, head_cpus, head_cpus, head_memory, head_memory, head_gpus
+                )
             else:
                 update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu)
 
@@ -388,7 +400,17 @@ def generate_appwrapper(
     update_labels(user_yaml, instascale, instance_types)
     update_priority(user_yaml, item, dispatch_priority, priority_val)
     update_custompodresources(
-        item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, head_cpus, head_memory, head_gpus) 
+        item,
+        min_cpu,
+        max_cpu,
+        min_memory,
+        max_memory,
+        gpu,
+        workers,
+        head_cpus,
+        head_memory,
+        head_gpus,
+    )
     update_nodes(
         item,
         appwrapper_name,
diff --git a/tests/unit_test.py b/tests/unit_test.py
@@ -525,6 +525,9 @@ def test_ray_details(mocker, capsys):
         worker_gpu=0,
         namespace="ns",
         dashboard="fake-uri",
+        head_cpus=2,
+        head_mem=8,
+        head_gpu=0,
     )
     mocker.patch(
         "codeflare_sdk.cluster.cluster.Cluster.status",
@@ -1685,6 +1688,9 @@ def test_cluster_status(mocker):
         worker_gpu=0,
         namespace="ns",
         dashboard="fake-uri",
+        head_cpus=2,
+        head_mem=8,
+        head_gpu=0,
     )
     cf = Cluster(ClusterConfiguration(name="test", namespace="ns"))
     mocker.patch("codeflare_sdk.cluster.cluster._app_wrapper_status", return_value=None)