Skip to content

Commit fa0e0d3

Browse files
committed
remove: DDPJobDefinition and update tests
1 parent a06cf4f commit fa0e0d3

File tree

8 files changed

+67
-644
lines changed

8 files changed

+67
-644
lines changed

src/codeflare_sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@
1616
list_all_clusters,
1717
)
1818

19-
from .job import JobDefinition, Job, DDPJobDefinition, DDPJob, RayJobClient
19+
from .job import RayJobClient
2020

2121
from .utils import generate_cert

src/codeflare_sdk/job/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
from .jobs import JobDefinition, Job, DDPJobDefinition, DDPJob
2-
31
from .ray_jobs import RayJobClient

src/codeflare_sdk/job/jobs.py

Lines changed: 0 additions & 207 deletions
This file was deleted.

tests/e2e/mnist_raycluster_sdk_oauth_test.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@
22

33
from time import sleep
44

5-
from torchx.specs.api import AppState, is_terminal
6-
75
from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication
8-
from codeflare_sdk.job.jobs import DDPJobDefinition
6+
from codeflare_sdk.job import RayJobClient
97

108
import pytest
119

@@ -79,7 +77,7 @@ def assert_jobsubmit_withoutLogin(self, cluster):
7977
"entrypoint": "python mnist.py",
8078
"runtime_env": {
8179
"working_dir": "./tests/e2e/",
82-
"pip": "mnist_pip_requirements.txt",
80+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
8381
},
8482
}
8583
try:
@@ -98,19 +96,26 @@ def assert_jobsubmit_withoutLogin(self, cluster):
9896

9997
def assert_jobsubmit_withlogin(self, cluster):
10098
self.assert_appwrapper_exists()
101-
jobdef = DDPJobDefinition(
102-
name="mnist",
103-
script="./tests/e2e/mnist.py",
104-
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
99+
auth_token = run_oc_command(["whoami", "--show-token=true"])
100+
ray_dashboard = cluster.cluster_dashboard_uri()
101+
header = {"Authorization": f"Bearer {auth_token}"}
102+
client = RayJobClient(address=ray_dashboard, headers=header, verify=True)
103+
104+
# Submit the job
105+
submission_id = client.submit_job(
106+
entrypoint="python mnist.py",
107+
runtime_env={
108+
"working_dir": "./tests/e2e/",
109+
"pip": "mnist_pip_requirements.txt",
110+
},
105111
)
106-
job = jobdef.submit(cluster)
107-
112+
print(f"Submitted job with ID: {submission_id}")
108113
done = False
109114
time = 0
110115
timeout = 900
111116
while not done:
112-
status = job.status()
113-
if is_terminal(status.state):
117+
status = client.get_job_status(submission_id)
118+
if status.is_terminal():
114119
break
115120
if not done:
116121
print(status)
@@ -119,11 +124,12 @@ def assert_jobsubmit_withlogin(self, cluster):
119124
sleep(5)
120125
time += 5
121126

122-
print(job.status())
123-
self.assert_job_completion(status)
127+
logs = client.get_job_logs(submission_id)
128+
print(logs)
124129

125-
print(job.logs())
130+
self.assert_job_completion(status)
126131

132+
client.delete_job(submission_id)
127133
cluster.down()
128134

129135
def assert_appwrapper_exists(self):
@@ -144,9 +150,9 @@ def assert_appwrapper_exists(self):
144150
assert False
145151

146152
def assert_job_completion(self, status):
147-
if status.state == AppState.SUCCEEDED:
148-
print(f"Job has completed: '{status.state}'")
153+
if status == "SUCCEEDED":
154+
print(f"Job has completed: '{status}'")
149155
assert True
150156
else:
151-
print(f"Job has completed: '{status.state}'")
157+
print(f"Job has completed: '{status}'")
152158
assert False

tests/e2e/mnist_raycluster_sdk_test.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,8 @@
77

88
import ray
99

10-
from torchx.specs.api import AppState, is_terminal
11-
1210
from codeflare_sdk.cluster.cluster import Cluster, ClusterConfiguration
13-
from codeflare_sdk.job.jobs import DDPJobDefinition
11+
from codeflare_sdk.job import RayJobClient
1412

1513
import pytest
1614

@@ -68,19 +66,26 @@ def run_mnist_raycluster_sdk(self):
6866

6967
cluster.details()
7068

71-
jobdef = DDPJobDefinition(
72-
name="mnist",
73-
script="./tests/e2e/mnist.py",
74-
scheduler_args={"requirements": "./tests/e2e/mnist_pip_requirements.txt"},
69+
auth_token = run_oc_command(["whoami", "--show-token=true"])
70+
ray_dashboard = cluster.cluster_dashboard_uri()
71+
header = {"Authorization": f"Bearer {auth_token}"}
72+
client = RayJobClient(address=ray_dashboard, headers=header, verify=True)
73+
74+
# Submit the job
75+
submission_id = client.submit_job(
76+
entrypoint="python mnist.py",
77+
runtime_env={
78+
"working_dir": "./tests/e2e/",
79+
"pip": "./tests/e2e/mnist_pip_requirements.txt",
80+
},
7581
)
76-
job = jobdef.submit(cluster)
77-
82+
print(f"Submitted job with ID: {submission_id}")
7883
done = False
7984
time = 0
8085
timeout = 900
8186
while not done:
82-
status = job.status()
83-
if is_terminal(status.state):
87+
status = client.get_job_status(submission_id)
88+
if status.is_terminal():
8489
break
8590
if not done:
8691
print(status)
@@ -89,11 +94,12 @@ def run_mnist_raycluster_sdk(self):
8994
sleep(5)
9095
time += 5
9196

92-
print(job.status())
93-
self.assert_job_completion(status)
97+
logs = client.get_job_logs(submission_id)
98+
print(logs)
9499

95-
print(job.logs())
100+
self.assert_job_completion(status)
96101

102+
client.delete_job(submission_id)
97103
cluster.down()
98104

99105
# Assertions
@@ -128,9 +134,9 @@ def assert_raycluster_exists(self):
128134
assert False
129135

130136
def assert_job_completion(self, status):
131-
if status.state == AppState.SUCCEEDED:
132-
print(f"Job has completed: '{status.state}'")
137+
if status == "SUCCEEDED":
138+
print(f"Job has completed: '{status}'")
133139
assert True
134140
else:
135-
print(f"Job has completed: '{status.state}'")
141+
print(f"Job has completed: '{status}'")
136142
assert False

0 commit comments

Comments
 (0)