Skip to content

Commit 761a2df

Browse files
committed
add functions for creating ray with oauth proxy in front of the dashboard
Signed-off-by: Kevin <kpostlet@redhat.com>
1 parent abec0ef commit 761a2df

File tree

7 files changed

+499
-87
lines changed

7 files changed

+499
-87
lines changed

src/codeflare_sdk/cluster/auth.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
import urllib3
2626
from ..utils.kube_api_helpers import _kube_api_error_handling
2727

28+
from typing import Optional
29+
2830
global api_client
2931
api_client = None
3032
global config_path
@@ -183,12 +185,13 @@ def config_check() -> str:
183185
raise PermissionError(
184186
"Action not permitted, have you put in correct/up-to-date auth credentials?"
185187
)
188+
api_client = config.new_client_from_config()
186189

187190
if config_path != None and api_client == None:
188191
return config_path
189192

190193

191-
def api_config_handler() -> str:
194+
def api_config_handler() -> Optional[client.ApiClient]:
192195
"""
193196
This function is used to load the api client if the user has logged in
194197
"""

src/codeflare_sdk/cluster/cluster.py

Lines changed: 85 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,20 @@
2121
from time import sleep
2222
from typing import List, Optional, Tuple, Dict
2323

24+
import openshift as oc
25+
from kubernetes import config
2426
from ray.job_submission import JobSubmissionClient
27+
import urllib3
2528

2629
from .auth import config_check, api_config_handler
2730
from ..utils import pretty_print
2831
from ..utils.generate_yaml import generate_appwrapper
2932
from ..utils.kube_api_helpers import _kube_api_error_handling
33+
from ..utils.openshift_oauth import (
34+
create_openshift_oauth_objects,
35+
delete_openshift_oauth_objects,
36+
download_tls_cert,
37+
)
3038
from .config import ClusterConfiguration
3139
from .model import (
3240
AppWrapper,
@@ -40,6 +48,8 @@
4048
import os
4149
import requests
4250

51+
from kubernetes import config
52+
4353

4454
class Cluster:
4555
"""
@@ -61,6 +71,38 @@ def __init__(self, config: ClusterConfiguration):
6171
self.config = config
6272
self.app_wrapper_yaml = self.create_app_wrapper()
6373
self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0]
74+
self._client = None
75+
76+
@property
77+
def _client_headers(self):
78+
return {
79+
"Authorization": config.new_client_from_config().configuration.get_api_key_with_prefix(
80+
"authorization"
81+
)
82+
}
83+
84+
@property
85+
def _client_verify_tls(self):
86+
return not self.config.openshift_oauth
87+
88+
@property
89+
def client(self):
90+
if self._client:
91+
return self._client
92+
if self.config.openshift_oauth:
93+
print(
94+
config.new_client_from_config().configuration.get_api_key_with_prefix(
95+
"authorization"
96+
)
97+
)
98+
self._client = JobSubmissionClient(
99+
self.cluster_dashboard_uri(),
100+
headers=self._client_headers,
101+
verify=self._client_verify_tls,
102+
)
103+
else:
104+
self._client = JobSubmissionClient(self.cluster_dashboard_uri())
105+
return self._client
64106

65107
def evaluate_dispatch_priority(self):
66108
priority_class = self.config.dispatch_priority
@@ -141,6 +183,7 @@ def create_app_wrapper(self):
141183
image_pull_secrets=image_pull_secrets,
142184
dispatch_priority=dispatch_priority,
143185
priority_val=priority_val,
186+
openshift_oauth=self.config.openshift_oauth,
144187
)
145188

146189
# creates a new cluster with the provided or default spec
@@ -150,6 +193,11 @@ def up(self):
150193
the MCAD queue.
151194
"""
152195
namespace = self.config.namespace
196+
if self.config.openshift_oauth:
197+
create_openshift_oauth_objects(
198+
cluster_name=self.config.name, namespace=namespace
199+
)
200+
153201
try:
154202
config_check()
155203
api_instance = client.CustomObjectsApi(api_config_handler())
@@ -184,6 +232,11 @@ def down(self):
184232
except Exception as e: # pragma: no cover
185233
return _kube_api_error_handling(e)
186234

235+
if self.config.openshift_oauth:
236+
delete_openshift_oauth_objects(
237+
cluster_name=self.config.name, namespace=namespace
238+
)
239+
187240
def status(
188241
self, print_to_console: bool = True
189242
) -> Tuple[CodeFlareClusterStatus, bool]:
@@ -252,7 +305,16 @@ def status(
252305
return status, ready
253306

254307
def is_dashboard_ready(self) -> bool:
255-
response = requests.get(self.cluster_dashboard_uri(), timeout=5)
308+
try:
309+
response = requests.get(
310+
self.cluster_dashboard_uri(),
311+
headers=self._client_headers,
312+
timeout=5,
313+
verify=self._client_verify_tls,
314+
)
315+
except requests.exceptions.SSLError:
316+
# SSL exception occurs when oauth ingress has been created but cluster is not up
317+
return False
256318
if response.status_code == 200:
257319
return True
258320
else:
@@ -311,7 +373,13 @@ def cluster_dashboard_uri(self) -> str:
311373
return _kube_api_error_handling(e)
312374

313375
for route in routes["items"]:
314-
if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}":
376+
if route["metadata"][
377+
"name"
378+
] == f"ray-dashboard-{self.config.name}" or route["metadata"][
379+
"name"
380+
].startswith(
381+
f"{self.config.name}-ingress"
382+
):
315383
protocol = "https" if route["spec"].get("tls") else "http"
316384
return f"{protocol}://{route['spec']['host']}"
317385
return "Dashboard route not available yet, have you run cluster.up()?"
@@ -320,30 +388,24 @@ def list_jobs(self) -> List:
320388
"""
321389
This method accesses the head ray node in your cluster and lists the running jobs.
322390
"""
323-
dashboard_route = self.cluster_dashboard_uri()
324-
client = JobSubmissionClient(dashboard_route)
325-
return client.list_jobs()
391+
return self.client.list_jobs()
326392

327393
def job_status(self, job_id: str) -> str:
328394
"""
329395
This method accesses the head ray node in your cluster and returns the job status for the provided job id.
330396
"""
331-
dashboard_route = self.cluster_dashboard_uri()
332-
client = JobSubmissionClient(dashboard_route)
333-
return client.get_job_status(job_id)
397+
return self.client.get_job_status(job_id)
334398

335399
def job_logs(self, job_id: str) -> str:
336400
"""
337401
This method accesses the head ray node in your cluster and returns the logs for the provided job id.
338402
"""
339-
dashboard_route = self.cluster_dashboard_uri()
340-
client = JobSubmissionClient(dashboard_route)
341-
return client.get_job_logs(job_id)
403+
return self.client.get_job_logs(job_id)
342404

343405
def torchx_config(
344406
self, working_dir: str = None, requirements: str = None
345407
) -> Dict[str, str]:
346-
dashboard_address = f"{self.cluster_dashboard_uri().lstrip('http://')}"
408+
dashboard_address = urllib3.util.parse_url(self.cluster_dashboard_uri()).host
347409
to_return = {
348410
"cluster_name": self.config.name,
349411
"dashboard_address": dashboard_address,
@@ -455,8 +517,8 @@ def get_current_namespace(): # pragma: no cover
455517

456518
def get_cluster(cluster_name: str, namespace: str = "default"):
457519
try:
458-
config_check()
459-
api_instance = client.CustomObjectsApi(api_config_handler())
520+
config.load_kube_config()
521+
api_instance = client.CustomObjectsApi()
460522
rcs = api_instance.list_namespaced_custom_object(
461523
group="ray.io",
462524
version="v1alpha1",
@@ -477,7 +539,7 @@ def get_cluster(cluster_name: str, namespace: str = "default"):
477539
# private methods
478540
def _get_ingress_domain():
479541
try:
480-
config_check()
542+
config.load_kube_config()
481543
api_client = client.CustomObjectsApi(api_config_handler())
482544
ingress = api_client.get_cluster_custom_object(
483545
"config.openshift.io", "v1", "ingresses", "cluster"
@@ -572,7 +634,7 @@ def _get_app_wrappers(
572634

573635

574636
def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
575-
if "status" in rc and "state" in rc["status"]:
637+
if "state" in rc["status"]:
576638
status = RayClusterStatus(rc["status"]["state"].lower())
577639
else:
578640
status = RayClusterStatus.UNKNOWN
@@ -587,7 +649,13 @@ def _map_to_ray_cluster(rc) -> Optional[RayCluster]:
587649
)
588650
ray_route = None
589651
for route in routes["items"]:
590-
if route["metadata"]["name"] == f"ray-dashboard-{rc['metadata']['name']}":
652+
if route["metadata"][
653+
"name"
654+
] == f"ray-dashboard-{rc['metadata']['name']}" or route["metadata"][
655+
"name"
656+
].startswith(
657+
f"{rc['metadata']['name']}-ingress"
658+
):
591659
protocol = "https" if route["spec"].get("tls") else "http"
592660
ray_route = f"{protocol}://{route['spec']['host']}"
593661

src/codeflare_sdk/cluster/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,4 @@ class ClusterConfiguration:
4848
local_interactive: bool = False
4949
image_pull_secrets: list = field(default_factory=list)
5050
dispatch_priority: str = None
51+
openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator

0 commit comments

Comments
 (0)