Skip to content

Commit b7a41b3

Browse files
committed
Finish converting AppWrappers to v1beta2
1 parent 16d7ad2 commit b7a41b3

File tree

8 files changed

+908
-933
lines changed

8 files changed

+908
-933
lines changed

src/codeflare_sdk/cluster/awload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def submit(self) -> None:
6262
api_instance = client.CustomObjectsApi(api_config_handler())
6363
api_instance.create_namespaced_custom_object(
6464
group="workload.codeflare.dev",
65-
version="v1beta1",
65+
version="v1beta2",
6666
namespace=self.namespace,
6767
plural="appwrappers",
6868
body=self.awyaml,
@@ -87,7 +87,7 @@ def remove(self) -> None:
8787
api_instance = client.CustomObjectsApi(api_config_handler())
8888
api_instance.delete_namespaced_custom_object(
8989
group="workload.codeflare.dev",
90-
version="v1beta1",
90+
version="v1beta2",
9191
namespace=self.namespace,
9292
plural="appwrappers",
9393
name=self.name,

src/codeflare_sdk/cluster/cluster.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def up(self):
199199
aw = yaml.load(f, Loader=yaml.FullLoader)
200200
api_instance.create_namespaced_custom_object(
201201
group="workload.codeflare.dev",
202-
version="v1beta1",
202+
version="v1beta2",
203203
namespace=namespace,
204204
plural="appwrappers",
205205
body=aw,
@@ -208,7 +208,7 @@ def up(self):
208208
aw = yaml.safe_load(self.app_wrapper_yaml)
209209
api_instance.create_namespaced_custom_object(
210210
group="workload.codeflare.dev",
211-
version="v1beta1",
211+
version="v1beta2",
212212
namespace=namespace,
213213
plural="appwrappers",
214214
body=aw,
@@ -250,7 +250,7 @@ def down(self):
250250
if self.config.appwrapper:
251251
api_instance.delete_namespaced_custom_object(
252252
group="workload.codeflare.dev",
253-
version="v1beta1",
253+
version="v1beta2",
254254
namespace=namespace,
255255
plural="appwrappers",
256256
name=self.app_wrapper_name,
@@ -666,7 +666,7 @@ def _check_aw_exists(name: str, namespace: str) -> bool:
666666
api_instance = client.CustomObjectsApi(api_config_handler())
667667
aws = api_instance.list_namespaced_custom_object(
668668
group="workload.codeflare.dev",
669-
version="v1beta1",
669+
version="v1beta2",
670670
namespace=namespace,
671671
plural="appwrappers",
672672
)
@@ -726,7 +726,7 @@ def _app_wrapper_status(name, namespace="default") -> Optional[AppWrapper]:
726726
api_instance = client.CustomObjectsApi(api_config_handler())
727727
aws = api_instance.list_namespaced_custom_object(
728728
group="workload.codeflare.dev",
729-
version="v1beta1",
729+
version="v1beta2",
730730
namespace=namespace,
731731
plural="appwrappers",
732732
)
@@ -796,7 +796,7 @@ def _get_app_wrappers(
796796
api_instance = client.CustomObjectsApi(api_config_handler())
797797
aws = api_instance.list_namespaced_custom_object(
798798
group="workload.codeflare.dev",
799-
version="v1beta1",
799+
version="v1beta2",
800800
namespace=namespace,
801801
plural="appwrappers",
802802
)
@@ -893,7 +893,7 @@ def _map_to_app_wrapper(aw) -> AppWrapper:
893893
if "status" in aw:
894894
return AppWrapper(
895895
name=aw["metadata"]["name"],
896-
status=AppWrapperStatus(aw["status"]["state"].lower()),
896+
status=AppWrapperStatus(aw["status"]["phase"].lower()),
897897
)
898898
return AppWrapper(
899899
name=aw["metadata"]["name"],

src/codeflare_sdk/templates/base-template.yaml

Lines changed: 202 additions & 204 deletions
Large diffs are not rendered by default.

src/codeflare_sdk/utils/generate_yaml.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def update_names(yaml, item, appwrapper_name, cluster_name, namespace):
8181
metadata = yaml.get("metadata")
8282
metadata["name"] = appwrapper_name
8383
metadata["namespace"] = namespace
84-
lower_meta = item.get("generictemplate", {}).get("metadata")
84+
lower_meta = item.get("template", {}).get("metadata")
8585
lower_meta["name"] = cluster_name
8686
lower_meta["namespace"] = namespace
8787

@@ -140,11 +140,11 @@ def update_nodes(
140140
head_memory,
141141
head_gpus,
142142
):
143-
if "generictemplate" in item.keys():
144-
head = item.get("generictemplate").get("spec").get("headGroupSpec")
143+
if "template" in item.keys():
144+
head = item.get("template").get("spec").get("headGroupSpec")
145145
head["rayStartParams"]["num-gpus"] = str(int(head_gpus))
146146

147-
worker = item.get("generictemplate").get("spec").get("workerGroupSpecs")[0]
147+
worker = item.get("template").get("spec").get("workerGroupSpecs")[0]
148148
# Head counts as first worker
149149
worker["replicas"] = workers
150150
worker["minReplicas"] = workers
@@ -220,20 +220,18 @@ def write_components(
220220
if not os.path.exists(directory_path):
221221
os.makedirs(directory_path)
222222

223-
components = user_yaml.get("spec", "resources")["resources"].get("GenericItems")
223+
components = user_yaml.get("spec", "resources").get("components")
224224
open(output_file_name, "w").close()
225225
lq_name = local_queue or get_default_kueue_name(namespace)
226226
cluster_labels = labels
227227
with open(output_file_name, "a") as outfile:
228228
for component in components:
229-
if "generictemplate" in component:
229+
if "template" in component:
230230
labels = component["generictemplate"]["metadata"]["labels"]
231231
labels.update({"kueue.x-k8s.io/queue-name": lq_name})
232232
labels.update(cluster_labels)
233233
outfile.write("---\n")
234-
yaml.dump(
235-
component["generictemplate"], outfile, default_flow_style=False
236-
)
234+
yaml.dump(component["template"], outfile, default_flow_style=False)
237235
print(f"Written to: {output_file_name}")
238236

239237

@@ -245,15 +243,15 @@ def load_components(
245243
labels: dict,
246244
):
247245
component_list = []
248-
components = user_yaml.get("spec", "resources")["resources"].get("GenericItems")
246+
components = user_yaml.get("spec", "resources").get("components")
249247
lq_name = local_queue or get_default_kueue_name(namespace)
250248
cluster_labels = labels
251249
for component in components:
252-
if "generictemplate" in component:
253-
labels = component["generictemplate"]["metadata"]["labels"]
250+
if "template" in component:
251+
labels = component["template"]["metadata"]["labels"]
254252
labels.update({"kueue.x-k8s.io/queue-name": lq_name})
255253
labels.update(cluster_labels)
256-
component_list.append(component["generictemplate"])
254+
component_list.append(component["template"])
257255

258256
resources = "---\n" + "---\n".join(
259257
[yaml.dump(component) for component in component_list]
@@ -295,7 +293,7 @@ def generate_appwrapper(
295293
user_yaml = read_template(template)
296294
appwrapper_name, cluster_name = gen_names(name)
297295
resources = user_yaml.get("spec", "resources")
298-
item = resources["resources"].get("GenericItems")[0]
296+
item = resources.get("components")[0]
299297
update_names(
300298
user_yaml,
301299
item,

tests/test-case-bad.yaml

Lines changed: 103 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,110 @@
1-
apiVersion: workload.codeflare.dev/v1beta1
1+
apiVersion: workload.codeflare.dev/v1beta2
22
kind: AppsWrapper
33
metadata:
44
labels:
55
orderedinstance: cpu.small_gpu.large
66
nam: unit-test-cluster
77
namspace: ns
88
spec:
9-
resources:
10-
GenericItems:
11-
- generictemplate:
12-
apiVersion: ray.io/v1
13-
kind: RayCluster
14-
metadata:
15-
labels:
16-
controller-tools.k8s.io: '1.0'
17-
name: unit-test-cluster
18-
namespace: ns
19-
spec:
20-
autoscalerOptions:
21-
idleTimeoutSeconds: 60
22-
imagePullPolicy: Always
23-
resources:
24-
limits:
25-
cpu: 500m
26-
memory: 512Mi
27-
requests:
28-
cpu: 500m
29-
memory: 512Mi
30-
upscalingMode: Default
31-
enableInTreeAutoscaling: false
32-
headGroupSpec:
33-
rayStartParams:
34-
block: 'true'
35-
dashboard-host: 0.0.0.0
36-
num-gpus: '0'
37-
serviceType: ClusterIP
38-
template:
39-
spec:
40-
containers:
41-
- env:
42-
- name: MY_POD_IP
43-
valueFrom:
44-
fieldRef:
45-
fieldPath: status.podIP
46-
image: quay.io/project-codeflare/ray:latest-py39-cu118
47-
imagePullPolicy: Always
48-
lifecycle:
49-
preStop:
50-
exec:
51-
command:
52-
- /bin/sh
53-
- -c
54-
- ray stop
55-
name: ray-head
56-
ports:
57-
- containerPort: 6379
58-
name: gcs
59-
- containerPort: 8265
60-
name: dashboard
61-
- containerPort: 10001
62-
name: client
63-
resources:
64-
limits:
65-
cpu: 2
66-
memory: 8G
67-
nvidia.com/gpu: 0
68-
requests:
69-
cpu: 2
70-
memory: 8G
71-
nvidia.com/gpu: 0
72-
rayVersion: 1.12.0
73-
workerGroupSpecs:
74-
- groupName: small-group-unit-test-cluster
75-
maxReplicas: 2
76-
minReplicas: 2
77-
rayStartParams:
78-
block: 'true'
79-
num-gpus: '7'
80-
replicas: 2
81-
template:
82-
metadata:
83-
annotations:
84-
key: value
85-
labels:
86-
key: value
87-
spec:
88-
containers:
89-
- env:
90-
- name: MY_POD_IP
91-
valueFrom:
92-
fieldRef:
93-
fieldPath: status.podIP
94-
image: quay.io/project-codeflare/ray:latest-py39-cu118
95-
lifecycle:
96-
preStop:
97-
exec:
98-
command:
99-
- /bin/sh
100-
- -c
101-
- ray stop
102-
name: machine-learning
103-
resources:
104-
limits:
105-
cpu: 4
106-
memory: 6G
107-
nvidia.com/gpu: 7
108-
requests:
109-
cpu: 3
110-
memory: 5G
111-
nvidia.com/gpu: 7
112-
replicas: 1
9+
components:
10+
- template:
11+
apiVersion: ray.io/v1
12+
kind: RayCluster
13+
metadata:
14+
labels:
15+
controller-tools.k8s.io: '1.0'
16+
name: unit-test-cluster
17+
namespace: ns
18+
spec:
19+
autoscalerOptions:
20+
idleTimeoutSeconds: 60
21+
imagePullPolicy: Always
22+
resources:
23+
limits:
24+
cpu: 500m
25+
memory: 512Mi
26+
requests:
27+
cpu: 500m
28+
memory: 512Mi
29+
upscalingMode: Default
30+
enableInTreeAutoscaling: false
31+
headGroupSpec:
32+
rayStartParams:
33+
block: 'true'
34+
dashboard-host: 0.0.0.0
35+
num-gpus: '0'
36+
serviceType: ClusterIP
37+
template:
38+
spec:
39+
containers:
40+
- env:
41+
- name: MY_POD_IP
42+
valueFrom:
43+
fieldRef:
44+
fieldPath: status.podIP
45+
image: quay.io/project-codeflare/ray:latest-py39-cu118
46+
imagePullPolicy: Always
47+
lifecycle:
48+
preStop:
49+
exec:
50+
command:
51+
- /bin/sh
52+
- -c
53+
- ray stop
54+
name: ray-head
55+
ports:
56+
- containerPort: 6379
57+
name: gcs
58+
- containerPort: 8265
59+
name: dashboard
60+
- containerPort: 10001
61+
name: client
62+
resources:
63+
limits:
64+
cpu: 2
65+
memory: 8G
66+
nvidia.com/gpu: 0
67+
requests:
68+
cpu: 2
69+
memory: 8G
70+
nvidia.com/gpu: 0
71+
rayVersion: 1.12.0
72+
workerGroupSpecs:
73+
- groupName: small-group-unit-test-cluster
74+
maxReplicas: 2
75+
minReplicas: 2
76+
rayStartParams:
77+
block: 'true'
78+
num-gpus: '7'
79+
replicas: 2
80+
template:
81+
metadata:
82+
annotations:
83+
key: value
84+
labels:
85+
key: value
86+
spec:
87+
containers:
88+
- env:
89+
- name: MY_POD_IP
90+
valueFrom:
91+
fieldRef:
92+
fieldPath: status.podIP
93+
image: quay.io/project-codeflare/ray:latest-py39-cu118
94+
lifecycle:
95+
preStop:
96+
exec:
97+
command:
98+
- /bin/sh
99+
- -c
100+
- ray stop
101+
name: machine-learning
102+
resources:
103+
limits:
104+
cpu: 4
105+
memory: 6G
106+
nvidia.com/gpu: 7
107+
requests:
108+
cpu: 3
109+
memory: 5G
110+
nvidia.com/gpu: 7

0 commit comments

Comments
 (0)