Skip to content

Commit 18d125f

Browse files
make namespace configurable
1 parent f0b5377 commit 18d125f

File tree

3 files changed

+29
-19
lines changed

3 files changed

+29
-19
lines changed

src/codeflare_sdk/cluster/cluster.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from argparse import Namespace
12
from os import stat
23
from typing import List, Optional, Tuple
4+
from unicodedata import name
35

46
import openshift as oc
57

@@ -18,6 +20,7 @@ def __init__(self, config: ClusterConfiguration):
1820

1921
def create_app_wrapper(self):
2022
name=self.config.name
23+
namespace=self.config.namespace
2124
min_cpu=self.config.min_cpus
2225
max_cpu=self.config.max_cpus
2326
min_memory=self.config.min_memory
@@ -29,21 +32,23 @@ def create_app_wrapper(self):
2932
instascale=self.config.instascale
3033
instance_types=self.config.machine_types
3134
env=self.config.envs
32-
return generate_appwrapper(name=name, min_cpu=min_cpu, max_cpu=max_cpu, min_memory=min_memory,
35+
return generate_appwrapper(name=name, namespace=namespace, min_cpu=min_cpu, max_cpu=max_cpu, min_memory=min_memory,
3336
max_memory=max_memory, gpu=gpu, workers=workers, template=template,
3437
image=image, instascale=instascale, instance_types=instance_types, env=env)
3538

36-
# creates a new cluster with the provided or default spec
37-
def up(self, namespace='default'):
39+
# creates a new cluster with the provided or default spec
40+
def up(self):
41+
namespace = self.config.namespace
3842
with oc.project(namespace):
3943
oc.invoke("apply", ["-f", self.app_wrapper_yaml])
4044

41-
def down(self, namespace='default'):
45+
def down(self):
46+
namespace = self.config.namespace
4247
with oc.project(namespace):
4348
oc.invoke("delete", ["AppWrapper", self.app_wrapper_name])
4449

4550
def status(self, print_to_console=True):
46-
cluster = _ray_cluster_status(self.config.name)
51+
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
4752
if cluster:
4853
#overriding the number of gpus with requested
4954
cluster.worker_gpu = self.config.gpu
@@ -55,19 +60,19 @@ def status(self, print_to_console=True):
5560
pretty_print.print_no_resources_found()
5661
return None
5762

58-
def cluster_uri(self, namespace='default'):
59-
return f'ray://{self.config.name}-head-svc.{namespace}.svc:10001'
63+
def cluster_uri(self):
64+
return f'ray://{self.config.name}-head-svc.{self.config.namespace}.svc:10001'
6065

61-
def cluster_dashboard_uri(self, namespace='default'):
62-
return f'http://{self.config.name}-head-svc.{namespace}.svc:8265'
66+
def cluster_dashboard_uri(self):
67+
return f'http://{self.config.name}-head-svc.{self.config.namespace}.svc:8265'
6368

6469

6570
# checks whether the ray cluster is ready
6671
def is_ready(self, print_to_console=True):
6772
ready = False
6873
status = CodeFlareClusterStatus.UNKNOWN
6974
# check the app wrapper status
70-
appwrapper = _app_wrapper_status(self.config.name)
75+
appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
7176
if appwrapper:
7277
if appwrapper.status in [AppWrapperStatus.RUNNING, AppWrapperStatus.COMPLETED, AppWrapperStatus.RUNNING_HOLD_COMPLETION]:
7378
ready = False
@@ -84,7 +89,7 @@ def is_ready(self, print_to_console=True):
8489
return ready, status# no need to check the ray status since still in queue
8590

8691
# check the ray cluster status
87-
cluster = _ray_cluster_status(self.config.name)
92+
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
8893
if cluster:
8994
if cluster.status == RayClusterStatus.READY:
9095
ready = True
@@ -99,16 +104,19 @@ def is_ready(self, print_to_console=True):
99104
pretty_print.print_clusters([cluster])
100105
return status, ready
101106

107+
def get_current_namespace():
108+
namespace = oc.invoke("project",["-q"]).actions()[0].out.strip()
109+
return namespace
102110

103-
def list_all_clusters(print_to_console=True):
104-
clusters = _get_ray_clusters()
111+
def list_all_clusters(namespace, print_to_console=True):
112+
clusters = _get_ray_clusters(namespace)
105113
if print_to_console:
106114
pretty_print.print_clusters(clusters)
107115
return clusters
108116

109117

110-
def list_all_queued(print_to_console=True):
111-
app_wrappers = _get_app_wrappers(filter=[AppWrapperStatus.RUNNING, AppWrapperStatus.PENDING])
118+
def list_all_queued(namespace, print_to_console=True):
119+
app_wrappers = _get_app_wrappers( namespace, filter=[AppWrapperStatus.RUNNING, AppWrapperStatus.PENDING])
112120
if print_to_console:
113121
pretty_print.print_app_wrappers_status(app_wrappers)
114122
return app_wrappers
@@ -151,7 +159,7 @@ def _get_ray_clusters(namespace='default') -> List[RayCluster]:
151159

152160

153161

154-
def _get_app_wrappers(filter:List[AppWrapperStatus], namespace='default') -> List[AppWrapper]:
162+
def _get_app_wrappers(namespace='default', filter=List[AppWrapperStatus]) -> List[AppWrapper]:
155163
list_of_app_wrappers = []
156164

157165
with oc.project(namespace), oc.timeout(10*60):

src/codeflare_sdk/cluster/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
@dataclass
77
class ClusterConfiguration:
88
name: str
9+
namespace: str = "default"
910
head_info: list = field(default_factory=list)
1011
machine_types: list = field(default_factory=list) #["m4.xlarge", "g4dn.xlarge"]
1112
min_cpus: int = 1

src/codeflare_sdk/utils/generate_yaml.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ def gen_names(name):
1919
else:
2020
return name, name
2121

22-
def update_names(yaml, item, appwrapper_name, cluster_name):
22+
def update_names(yaml, item, appwrapper_name, cluster_name, namespace):
2323
metadata = yaml.get("metadata")
2424
metadata["name"] = appwrapper_name
25+
metadata["namespace"] = namespace
2526
lower_meta = item.get("generictemplate", {}).get("metadata")
2627
lower_meta["labels"]["appwrapper.mcad.ibm.com"] = appwrapper_name
2728
lower_meta["name"] = cluster_name
@@ -128,12 +129,12 @@ def write_user_appwrapper(user_yaml, output_file_name):
128129
yaml.dump(user_yaml, outfile, default_flow_style=False)
129130
print(f"Written to: {output_file_name}")
130131

131-
def generate_appwrapper(name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, template, image, instascale, instance_types, env):
132+
def generate_appwrapper(name, namespace, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, template, image, instascale, instance_types, env):
132133
user_yaml = read_template(template)
133134
appwrapper_name, cluster_name = gen_names(name)
134135
resources = user_yaml.get("spec","resources")
135136
item = resources["resources"].get("GenericItems")[0]
136-
update_names(user_yaml, item, appwrapper_name, cluster_name)
137+
update_names(user_yaml, item, appwrapper_name, cluster_name, namespace)
137138
update_labels(user_yaml, instascale, instance_types)
138139
update_custompodresources(item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers)
139140
update_nodes(item, appwrapper_name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, image, instascale, env)

0 commit comments

Comments
 (0)