21
21
from time import sleep
22
22
from typing import List , Optional , Tuple , Dict
23
23
24
+ import openshift as oc
25
+ from kubernetes import config
24
26
from ray .job_submission import JobSubmissionClient
27
+ import urllib3
25
28
26
29
from .auth import config_check , api_config_handler
27
30
from ..utils import pretty_print
28
31
from ..utils .generate_yaml import generate_appwrapper
29
32
from ..utils .kube_api_helpers import _kube_api_error_handling
33
+ from ..utils .openshift_oauth import create_openshift_oauth_objects , delete_openshift_oauth_objects , download_tls_cert
30
34
from .config import ClusterConfiguration
31
35
from .model import (
32
36
AppWrapper ,
41
45
import requests
42
46
43
47
48
+ k8_client = config .new_client_from_config ()
49
+
50
+
44
51
class Cluster :
45
52
"""
46
53
An object for requesting, bringing up, and taking down resources.
@@ -61,6 +68,21 @@ def __init__(self, config: ClusterConfiguration):
61
68
self .config = config
62
69
self .app_wrapper_yaml = self .create_app_wrapper ()
63
70
self .app_wrapper_name = self .app_wrapper_yaml .split ("." )[0 ]
71
+ self ._client = None
72
+
73
+ @property
74
+ def client (self ):
75
+ if self ._client :
76
+ return self ._client
77
+ if self .config .openshift_oauth :
78
+ self ._client = JobSubmissionClient (
79
+ self .cluster_dashboard_uri (),
80
+ headers = {"Authorization" : k8_client .configuration .auth_settings ()["BearerToken" ]["value" ]},
81
+ verify = False ,
82
+ )
83
+ else :
84
+ self ._client = JobSubmissionClient (self .cluster_dashboard_uri ())
85
+ return self ._client
64
86
65
87
def evaluate_dispatch_priority (self ):
66
88
priority_class = self .config .dispatch_priority
@@ -141,6 +163,7 @@ def create_app_wrapper(self):
141
163
image_pull_secrets = image_pull_secrets ,
142
164
dispatch_priority = dispatch_priority ,
143
165
priority_val = priority_val ,
166
+ openshift_oauth = self .config .openshift_oauth ,
144
167
)
145
168
146
169
# creates a new cluster with the provided or default spec
@@ -150,6 +173,9 @@ def up(self):
150
173
the MCAD queue.
151
174
"""
152
175
namespace = self .config .namespace
176
+ if self .config .openshift_oauth :
177
+ create_openshift_oauth_objects (cluster_name = self .config .name , namespace = namespace )
178
+
153
179
try :
154
180
config_check ()
155
181
api_instance = client .CustomObjectsApi (api_config_handler ())
@@ -184,6 +210,9 @@ def down(self):
184
210
except Exception as e : # pragma: no cover
185
211
return _kube_api_error_handling (e )
186
212
213
+ if self .config .openshift_oauth :
214
+ delete_openshift_oauth_objects (cluster_name = self .config .name , namespace = namespace )
215
+
187
216
def status (
188
217
self , print_to_console : bool = True
189
218
) -> Tuple [CodeFlareClusterStatus , bool ]:
@@ -252,7 +281,13 @@ def status(
252
281
return status , ready
253
282
254
283
def is_dashboard_ready (self ) -> bool :
255
- response = requests .get (self .cluster_dashboard_uri (), timeout = 5 )
284
+ try :
285
+ response = requests .get (
286
+ self .cluster_dashboard_uri (), headers = self .client ._headers , timeout = 5 , verify = self .client ._verify
287
+ )
288
+ except requests .exceptions .SSLError :
289
+ # SSL exception occurs when oauth ingress has been created but cluster is not up
290
+ return False
256
291
if response .status_code == 200 :
257
292
return True
258
293
else :
@@ -311,7 +346,8 @@ def cluster_dashboard_uri(self) -> str:
311
346
return _kube_api_error_handling (e )
312
347
313
348
for route in routes ["items" ]:
314
- if route ["metadata" ]["name" ] == f"ray-dashboard-{ self .config .name } " :
349
+ if route ["metadata" ]["name" ] == f"ray-dashboard-{ self .config .name } " or \
350
+ route ["metadata" ]["name" ].startswith (f"{ self .config .name } -ingress" ):
315
351
protocol = "https" if route ["spec" ].get ("tls" ) else "http"
316
352
return f"{ protocol } ://{ route ['spec' ]['host' ]} "
317
353
return "Dashboard route not available yet, have you run cluster.up()?"
@@ -320,30 +356,24 @@ def list_jobs(self) -> List:
320
356
"""
321
357
This method accesses the head ray node in your cluster and lists the running jobs.
322
358
"""
323
- dashboard_route = self .cluster_dashboard_uri ()
324
- client = JobSubmissionClient (dashboard_route )
325
- return client .list_jobs ()
359
+ return self .client .list_jobs ()
326
360
327
361
def job_status (self , job_id : str ) -> str :
328
362
"""
329
363
This method accesses the head ray node in your cluster and returns the job status for the provided job id.
330
364
"""
331
- dashboard_route = self .cluster_dashboard_uri ()
332
- client = JobSubmissionClient (dashboard_route )
333
- return client .get_job_status (job_id )
365
+ return self .client .get_job_status (job_id )
334
366
335
367
def job_logs (self , job_id : str ) -> str :
336
368
"""
337
369
This method accesses the head ray node in your cluster and returns the logs for the provided job id.
338
370
"""
339
- dashboard_route = self .cluster_dashboard_uri ()
340
- client = JobSubmissionClient (dashboard_route )
341
- return client .get_job_logs (job_id )
371
+ return self .client .get_job_logs (job_id )
342
372
343
373
def torchx_config (
344
374
self , working_dir : str = None , requirements : str = None
345
375
) -> Dict [str , str ]:
346
- dashboard_address = f" { self .cluster_dashboard_uri (). lstrip ( 'http://' ) } "
376
+ dashboard_address = urllib3 . util . parse_url ( self .cluster_dashboard_uri ()). host
347
377
to_return = {
348
378
"cluster_name" : self .config .name ,
349
379
"dashboard_address" : dashboard_address ,
@@ -587,7 +617,8 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
587
617
)
588
618
ray_route = None
589
619
for route in routes ["items" ]:
590
- if route ["metadata" ]["name" ] == f"ray-dashboard-{ rc ['metadata' ]['name' ]} " :
620
+ if route ["metadata" ]["name" ] == f"ray-dashboard-{ rc ['metadata' ]['name' ]} " or \
621
+ route ["metadata" ]["name" ].startswith (f"{ rc ['metadata' ]['name' ]} -ingress" ):
591
622
protocol = "https" if route ["spec" ].get ("tls" ) else "http"
592
623
ray_route = f"{ protocol } ://{ route ['spec' ]['host' ]} "
593
624
0 commit comments