Skip to content

Commit 10804fc

Browse files
make namespace configurable
1 parent f0b5377 commit 10804fc

File tree

3 files changed

+28
-19
lines changed

3 files changed

+28
-19
lines changed

src/codeflare_sdk/cluster/cluster.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def __init__(self, config: ClusterConfiguration):
1818

1919
def create_app_wrapper(self):
2020
name=self.config.name
21+
namespace=self.config.namespace
2122
min_cpu=self.config.min_cpus
2223
max_cpu=self.config.max_cpus
2324
min_memory=self.config.min_memory
@@ -29,21 +30,23 @@ def create_app_wrapper(self):
2930
instascale=self.config.instascale
3031
instance_types=self.config.machine_types
3132
env=self.config.envs
32-
return generate_appwrapper(name=name, min_cpu=min_cpu, max_cpu=max_cpu, min_memory=min_memory,
33+
return generate_appwrapper(name=name, namespace=namespace, min_cpu=min_cpu, max_cpu=max_cpu, min_memory=min_memory,
3334
max_memory=max_memory, gpu=gpu, workers=workers, template=template,
3435
image=image, instascale=instascale, instance_types=instance_types, env=env)
3536

36-
# creates a new cluster with the provided or default spec
37-
def up(self, namespace='default'):
37+
# creates a new cluster with the provided or default spec
38+
def up(self):
39+
namespace = self.config.namespace
3840
with oc.project(namespace):
3941
oc.invoke("apply", ["-f", self.app_wrapper_yaml])
4042

41-
def down(self, namespace='default'):
43+
def down(self):
44+
namespace = self.config.namespace
4245
with oc.project(namespace):
4346
oc.invoke("delete", ["AppWrapper", self.app_wrapper_name])
4447

4548
def status(self, print_to_console=True):
46-
cluster = _ray_cluster_status(self.config.name)
49+
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
4750
if cluster:
4851
#overriding the number of gpus with requested
4952
cluster.worker_gpu = self.config.gpu
@@ -55,19 +58,19 @@ def status(self, print_to_console=True):
5558
pretty_print.print_no_resources_found()
5659
return None
5760

58-
def cluster_uri(self, namespace='default'):
59-
return f'ray://{self.config.name}-head-svc.{namespace}.svc:10001'
61+
def cluster_uri(self):
62+
return f'ray://{self.config.name}-head-svc.{self.config.namespace}.svc:10001'
6063

61-
def cluster_dashboard_uri(self, namespace='default'):
62-
return f'http://{self.config.name}-head-svc.{namespace}.svc:8265'
64+
def cluster_dashboard_uri(self):
65+
return f'http://{self.config.name}-head-svc.{self.config.namespace}.svc:8265'
6366

6467

6568
# checks whether the ray cluster is ready
6669
def is_ready(self, print_to_console=True):
6770
ready = False
6871
status = CodeFlareClusterStatus.UNKNOWN
6972
# check the app wrapper status
70-
appwrapper = _app_wrapper_status(self.config.name)
73+
appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
7174
if appwrapper:
7275
if appwrapper.status in [AppWrapperStatus.RUNNING, AppWrapperStatus.COMPLETED, AppWrapperStatus.RUNNING_HOLD_COMPLETION]:
7376
ready = False
@@ -84,7 +87,7 @@ def is_ready(self, print_to_console=True):
8487
return ready, status# no need to check the ray status since still in queue
8588

8689
# check the ray cluster status
87-
cluster = _ray_cluster_status(self.config.name)
90+
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
8891
if cluster:
8992
if cluster.status == RayClusterStatus.READY:
9093
ready = True
@@ -99,16 +102,19 @@ def is_ready(self, print_to_console=True):
99102
pretty_print.print_clusters([cluster])
100103
return status, ready
101104

105+
def get_current_namespace():
106+
namespace = oc.invoke("project",["-q"]).actions()[0].out.strip()
107+
return namespace
102108

103-
def list_all_clusters(print_to_console=True):
104-
clusters = _get_ray_clusters()
109+
def list_all_clusters(namespace, print_to_console=True):
110+
clusters = _get_ray_clusters(namespace)
105111
if print_to_console:
106112
pretty_print.print_clusters(clusters)
107113
return clusters
108114

109115

110-
def list_all_queued(print_to_console=True):
111-
app_wrappers = _get_app_wrappers(filter=[AppWrapperStatus.RUNNING, AppWrapperStatus.PENDING])
116+
def list_all_queued(namespace, print_to_console=True):
117+
app_wrappers = _get_app_wrappers( namespace, filter=[AppWrapperStatus.RUNNING, AppWrapperStatus.PENDING])
112118
if print_to_console:
113119
pretty_print.print_app_wrappers_status(app_wrappers)
114120
return app_wrappers
@@ -151,7 +157,7 @@ def _get_ray_clusters(namespace='default') -> List[RayCluster]:
151157

152158

153159

154-
def _get_app_wrappers(filter:List[AppWrapperStatus], namespace='default') -> List[AppWrapper]:
160+
def _get_app_wrappers(namespace='default', filter=List[AppWrapperStatus]) -> List[AppWrapper]:
155161
list_of_app_wrappers = []
156162

157163
with oc.project(namespace), oc.timeout(10*60):

src/codeflare_sdk/cluster/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
@dataclass
77
class ClusterConfiguration:
88
name: str
9+
namespace: str = "default"
910
head_info: list = field(default_factory=list)
1011
machine_types: list = field(default_factory=list) #["m4.xlarge", "g4dn.xlarge"]
1112
min_cpus: int = 1

src/codeflare_sdk/utils/generate_yaml.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,14 @@ def gen_names(name):
1919
else:
2020
return name, name
2121

22-
def update_names(yaml, item, appwrapper_name, cluster_name):
22+
def update_names(yaml, item, appwrapper_name, cluster_name, namespace):
2323
metadata = yaml.get("metadata")
2424
metadata["name"] = appwrapper_name
25+
metadata["namespace"] = namespace
2526
lower_meta = item.get("generictemplate", {}).get("metadata")
2627
lower_meta["labels"]["appwrapper.mcad.ibm.com"] = appwrapper_name
2728
lower_meta["name"] = cluster_name
29+
lower_meta["namespace"] = namespace
2830

2931
def update_labels(yaml, instascale, instance_types):
3032
metadata = yaml.get("metadata")
@@ -128,12 +130,12 @@ def write_user_appwrapper(user_yaml, output_file_name):
128130
yaml.dump(user_yaml, outfile, default_flow_style=False)
129131
print(f"Written to: {output_file_name}")
130132

131-
def generate_appwrapper(name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, template, image, instascale, instance_types, env):
133+
def generate_appwrapper(name, namespace, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, template, image, instascale, instance_types, env):
132134
user_yaml = read_template(template)
133135
appwrapper_name, cluster_name = gen_names(name)
134136
resources = user_yaml.get("spec","resources")
135137
item = resources["resources"].get("GenericItems")[0]
136-
update_names(user_yaml, item, appwrapper_name, cluster_name)
138+
update_names(user_yaml, item, appwrapper_name, cluster_name, namespace)
137139
update_labels(user_yaml, instascale, instance_types)
138140
update_custompodresources(item, min_cpu, max_cpu, min_memory, max_memory, gpu, workers)
139141
update_nodes(item, appwrapper_name, min_cpu, max_cpu, min_memory, max_memory, gpu, workers, image, instascale, env)

0 commit comments

Comments
 (0)