Skip to content

Commit e7827e4

Browse files
Convert Go raycluster_sdk_upgrade_test to Python test
1 parent 1497434 commit e7827e4

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import requests
2+
from time import sleep
3+
4+
from torchx.specs.api import AppState, is_terminal
5+
6+
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
7+
from codeflare_sdk.job.jobs import DDPJobDefinition
8+
9+
import pytest
10+
11+
from support import *
12+
from codeflare_sdk.cluster.cluster import get_cluster
13+
14+
# This test Creates a Ray cluster with openshift_oauth enable and covers the Ray Job submission with authentication and without authentication functionality
15+
namespace = "test-ns-rayclusterupgrade"
16+
17+
18+
# Creates a Ray cluster
19+
@pytest.mark.openshift
20+
class TestMNISTRayClusterUp:
21+
def setup_method(self):
22+
initialize_kubernetes_client(self)
23+
24+
def teardown_method(self):
25+
delete_namespace(self)
26+
27+
def test_mnist_ray_cluster_sdk_auth(self):
28+
self.setup_method()
29+
create_namespace_with_name(self, namespace)
30+
self.run_mnist_raycluster_sdk_oauth()
31+
32+
def run_mnist_raycluster_sdk_oauth(self):
33+
ray_image = get_ray_image()
34+
35+
auth = TokenAuthentication(
36+
token=run_oc_command(["whoami", "--show-token=true"]),
37+
server=run_oc_command(["whoami", "--show-server=true"]),
38+
skip_tls=True,
39+
)
40+
auth.login()
41+
42+
cluster = Cluster(
43+
ClusterConfiguration(
44+
name="mnist",
45+
namespace=self.namespace,
46+
num_workers=1,
47+
head_cpus="1",
48+
head_memory=2,
49+
min_cpus="1",
50+
max_cpus=1,
51+
min_memory=1,
52+
max_memory=2,
53+
num_gpus=0,
54+
instascale=False,
55+
image=ray_image,
56+
openshift_oauth=True,
57+
write_to_file=True,
58+
)
59+
)
60+
61+
try:
62+
cluster.up()
63+
# check if Appwrapper has been created in namespace
64+
self.assert_appwrapper_exists()
65+
cluster.status()
66+
# wait for raycluster to be Ready
67+
cluster.wait_ready()
68+
cluster.status()
69+
# Check cluster details
70+
cluster.details()
71+
# Assert the cluster status is READY
72+
_, ready = cluster.status()
73+
assert ready, "Cluster is not ready!"
74+
75+
except Exception as e:
76+
print(f"An unexpected error occurred. Error: ", e)
77+
self.teardown_method()
78+
79+
def assert_appwrapper_exists(self):
80+
try:
81+
self.custom_api.get_namespaced_custom_object(
82+
"workload.codeflare.dev",
83+
"v1beta1",
84+
self.namespace,
85+
"appwrappers",
86+
"mnist",
87+
)
88+
print(
89+
f"AppWrapper 'mnist' has been created in the namespace: '{self.namespace}'"
90+
)
91+
assert True
92+
except Exception as e:
93+
print(f"AppWrapper 'mnist' has not been created. Error: {e}")
94+
assert False
95+
96+
97+
class TestMnistJobSubmit:
98+
def setup_method(self):
99+
initialize_kubernetes_client(self)
100+
self.namespace = namespace
101+
self.cluster = get_cluster("mnist", self.namespace)
102+
if not self.cluster:
103+
raise RuntimeError("TestRayClusterUp needs to be run before this test")
104+
105+
def teardown_method(self):
106+
delete_namespace(self)
107+
108+
def test_mnist_job_submission(self):
109+
try:
110+
self.assert_jobsubmit_withoutLogin(self.cluster)
111+
self.assert_jobsubmit_withlogin(self.cluster)
112+
self.cluster.down()
113+
self.teardown_method()
114+
except Exception as e:
115+
print(f"An unexpected error occurred. Error: ", e)
116+
self.teardown_method()
117+
118+
# Assertions
119+
120+
def assert_jobsubmit_withoutLogin(self, cluster):
121+
dashboard_url = cluster.cluster_dashboard_uri()
122+
jobdata = {
123+
"entrypoint": "python mnist.py",
124+
"runtime_env": {
125+
"working_dir": "./tests/e2e/",
126+
"pip": "mnist_pip_requirements.txt",
127+
},
128+
}
129+
try:
130+
response = requests.post(
131+
dashboard_url + "/api/jobs/", verify=False, json=jobdata
132+
)
133+
if response.status_code == 403:
134+
assert True
135+
else:
136+
response.raise_for_status()
137+
assert False
138+
139+
except Exception as e:
140+
print(f"An unexpected error occurred. Error: {e}")
141+
assert False
142+
143+
def assert_jobsubmit_withlogin(self, cluster):
144+
jobdef = DDPJobDefinition(
145+
name="mnist",
146+
script="./tests/e2e/mnist.py",
147+
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
148+
)
149+
job = jobdef.submit(cluster)
150+
151+
done = False
152+
time = 0
153+
timeout = 900
154+
while not done:
155+
status = job.status()
156+
if is_terminal(status.state):
157+
break
158+
if not done:
159+
print(status)
160+
if timeout and time >= timeout:
161+
raise TimeoutError(f"job has timed out after waiting {timeout}s")
162+
sleep(5)
163+
time += 5
164+
165+
print(job.status())
166+
self.assert_job_completion(status)
167+
168+
print(job.logs())
169+
170+
def assert_job_completion(self, status):
171+
if status.state == AppState.SUCCEEDED:
172+
print(f"Job has completed: '{status.state}'")
173+
assert True
174+
else:
175+
print(f"Job has completed: '{status.state}'")
176+
assert False

tests/e2e/support.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ def create_namespace(self):
2424
self.api_instance.create_namespace(namespace_body)
2525

2626

27+
def create_namespace_with_name(self, namespace_name):
28+
self.namespace = namespace_name
29+
namespace_body = client.V1Namespace(
30+
metadata=client.V1ObjectMeta(name=self.namespace)
31+
)
32+
self.api_instance.create_namespace(namespace_body)
33+
34+
2735
def delete_namespace(self):
2836
if hasattr(self, "namespace"):
2937
self.api_instance.delete_namespace(self.namespace)

0 commit comments

Comments
 (0)