diff --git a/docs/cluster/auth.html b/docs/cluster/auth.html index 0389fbf71..f1d8ad000 100644 --- a/docs/cluster/auth.html +++ b/docs/cluster/auth.html @@ -58,6 +58,8 @@

Module codeflare_sdk.cluster.auth

import urllib3 from ..utils.kube_api_helpers import _kube_api_error_handling +from typing import Optional + global api_client api_client = None global config_path @@ -221,7 +223,7 @@

Module codeflare_sdk.cluster.auth

return config_path -def api_config_handler() -> str: +def api_config_handler() -> Optional[client.ApiClient]: """ This function is used to load the api client if the user has logged in """ @@ -239,7 +241,7 @@

Module codeflare_sdk.cluster.auth

Functions

-def api_config_handler() ‑> str +def api_config_handler() ‑> Optional[kubernetes.client.api_client.ApiClient]

This function is used to load the api client if the user has logged in

@@ -247,7 +249,7 @@

Functions

Expand source code -
def api_config_handler() -> str:
+
def api_config_handler() -> Optional[client.ApiClient]:
     """
     This function is used to load the api client if the user has logged in
     """
diff --git a/docs/cluster/cluster.html b/docs/cluster/cluster.html
index 3b3962215..7b391787d 100644
--- a/docs/cluster/cluster.html
+++ b/docs/cluster/cluster.html
@@ -53,12 +53,19 @@ 

Module codeflare_sdk.cluster.cluster

from time import sleep from typing import List, Optional, Tuple, Dict +import openshift as oc +from kubernetes import config from ray.job_submission import JobSubmissionClient +import urllib3 from .auth import config_check, api_config_handler from ..utils import pretty_print from ..utils.generate_yaml import generate_appwrapper from ..utils.kube_api_helpers import _kube_api_error_handling +from ..utils.openshift_oauth import ( + create_openshift_oauth_objects, + delete_openshift_oauth_objects, +) from .config import ClusterConfiguration from .model import ( AppWrapper, @@ -72,6 +79,8 @@

Module codeflare_sdk.cluster.cluster

import os import requests +from kubernetes import config + class Cluster: """ @@ -93,6 +102,39 @@

Module codeflare_sdk.cluster.cluster

self.config = config self.app_wrapper_yaml = self.create_app_wrapper() self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0] + self._client = None + + @property + def _client_headers(self): + k8_client = api_config_handler() or client.ApiClient() + return { + "Authorization": k8_client.configuration.get_api_key_with_prefix( + "authorization" + ) + } + + @property + def _client_verify_tls(self): + return not self.config.openshift_oauth + + @property + def client(self): + if self._client: + return self._client + if self.config.openshift_oauth: + print( + api_config_handler().configuration.get_api_key_with_prefix( + "authorization" + ) + ) + self._client = JobSubmissionClient( + self.cluster_dashboard_uri(), + headers=self._client_headers, + verify=self._client_verify_tls, + ) + else: + self._client = JobSubmissionClient(self.cluster_dashboard_uri()) + return self._client def evaluate_dispatch_priority(self): priority_class = self.config.dispatch_priority @@ -179,6 +221,7 @@

Module codeflare_sdk.cluster.cluster

image_pull_secrets=image_pull_secrets, dispatch_priority=dispatch_priority, priority_val=priority_val, + openshift_oauth=self.config.openshift_oauth, ) # creates a new cluster with the provided or default spec @@ -188,6 +231,11 @@

Module codeflare_sdk.cluster.cluster

the MCAD queue. """ namespace = self.config.namespace + if self.config.openshift_oauth: + create_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + ) + try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -222,6 +270,11 @@

Module codeflare_sdk.cluster.cluster

except Exception as e: # pragma: no cover return _kube_api_error_handling(e) + if self.config.openshift_oauth: + delete_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + ) + def status( self, print_to_console: bool = True ) -> Tuple[CodeFlareClusterStatus, bool]: @@ -290,7 +343,16 @@

Module codeflare_sdk.cluster.cluster

return status, ready def is_dashboard_ready(self) -> bool: - response = requests.get(self.cluster_dashboard_uri(), timeout=5) + try: + response = requests.get( + self.cluster_dashboard_uri(), + headers=self._client_headers, + timeout=5, + verify=self._client_verify_tls, + ) + except requests.exceptions.SSLError: + # SSL exception occurs when oauth ingress has been created but cluster is not up + return False if response.status_code == 200: return True else: @@ -362,7 +424,13 @@

Module codeflare_sdk.cluster.cluster

return _kube_api_error_handling(e) for route in routes["items"]: - if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}": + if route["metadata"][ + "name" + ] == f"ray-dashboard-{self.config.name}" or route["metadata"][ + "name" + ].startswith( + f"{self.config.name}-ingress" + ): protocol = "https" if route["spec"].get("tls") else "http" return f"{protocol}://{route['spec']['host']}" return "Dashboard route not available yet, have you run cluster.up()?" @@ -371,30 +439,24 @@

Module codeflare_sdk.cluster.cluster

""" This method accesses the head ray node in your cluster and lists the running jobs. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.list_jobs() + return self.client.list_jobs() def job_status(self, job_id: str) -> str: """ This method accesses the head ray node in your cluster and returns the job status for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_status(job_id) + return self.client.get_job_status(job_id) def job_logs(self, job_id: str) -> str: """ This method accesses the head ray node in your cluster and returns the logs for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_logs(job_id) + return self.client.get_job_logs(job_id) def torchx_config( self, working_dir: str = None, requirements: str = None ) -> Dict[str, str]: - dashboard_address = f"{self.cluster_dashboard_uri().lstrip('http://')}" + dashboard_address = urllib3.util.parse_url(self.cluster_dashboard_uri()).host to_return = { "cluster_name": self.config.name, "dashboard_address": dashboard_address, @@ -623,7 +685,7 @@

Module codeflare_sdk.cluster.cluster

def _map_to_ray_cluster(rc) -> Optional[RayCluster]: - if "status" in rc and "state" in rc["status"]: + if "state" in rc["status"]: status = RayClusterStatus(rc["status"]["state"].lower()) else: status = RayClusterStatus.UNKNOWN @@ -638,7 +700,13 @@

Module codeflare_sdk.cluster.cluster

) ray_route = None for route in routes["items"]: - if route["metadata"]["name"] == f"ray-dashboard-{rc['metadata']['name']}": + if route["metadata"][ + "name" + ] == f"ray-dashboard-{rc['metadata']['name']}" or route["metadata"][ + "name" + ].startswith( + f"{rc['metadata']['name']}-ingress" + ): protocol = "https" if route["spec"].get("tls") else "http" ray_route = f"{protocol}://{route['spec']['host']}" @@ -862,6 +930,39 @@

Classes

self.config = config self.app_wrapper_yaml = self.create_app_wrapper() self.app_wrapper_name = self.app_wrapper_yaml.split(".")[0] + self._client = None + + @property + def _client_headers(self): + k8_client = api_config_handler() or client.ApiClient() + return { + "Authorization": k8_client.configuration.get_api_key_with_prefix( + "authorization" + ) + } + + @property + def _client_verify_tls(self): + return not self.config.openshift_oauth + + @property + def client(self): + if self._client: + return self._client + if self.config.openshift_oauth: + print( + api_config_handler().configuration.get_api_key_with_prefix( + "authorization" + ) + ) + self._client = JobSubmissionClient( + self.cluster_dashboard_uri(), + headers=self._client_headers, + verify=self._client_verify_tls, + ) + else: + self._client = JobSubmissionClient(self.cluster_dashboard_uri()) + return self._client def evaluate_dispatch_priority(self): priority_class = self.config.dispatch_priority @@ -948,6 +1049,7 @@

Classes

image_pull_secrets=image_pull_secrets, dispatch_priority=dispatch_priority, priority_val=priority_val, + openshift_oauth=self.config.openshift_oauth, ) # creates a new cluster with the provided or default spec @@ -957,6 +1059,11 @@

Classes

the MCAD queue. """ namespace = self.config.namespace + if self.config.openshift_oauth: + create_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + ) + try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -991,6 +1098,11 @@

Classes

except Exception as e: # pragma: no cover return _kube_api_error_handling(e) + if self.config.openshift_oauth: + delete_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + ) + def status( self, print_to_console: bool = True ) -> Tuple[CodeFlareClusterStatus, bool]: @@ -1059,7 +1171,16 @@

Classes

return status, ready def is_dashboard_ready(self) -> bool: - response = requests.get(self.cluster_dashboard_uri(), timeout=5) + try: + response = requests.get( + self.cluster_dashboard_uri(), + headers=self._client_headers, + timeout=5, + verify=self._client_verify_tls, + ) + except requests.exceptions.SSLError: + # SSL exception occurs when oauth ingress has been created but cluster is not up + return False if response.status_code == 200: return True else: @@ -1131,7 +1252,13 @@

Classes

return _kube_api_error_handling(e) for route in routes["items"]: - if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}": + if route["metadata"][ + "name" + ] == f"ray-dashboard-{self.config.name}" or route["metadata"][ + "name" + ].startswith( + f"{self.config.name}-ingress" + ): protocol = "https" if route["spec"].get("tls") else "http" return f"{protocol}://{route['spec']['host']}" return "Dashboard route not available yet, have you run cluster.up()?" @@ -1140,30 +1267,24 @@

Classes

""" This method accesses the head ray node in your cluster and lists the running jobs. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.list_jobs() + return self.client.list_jobs() def job_status(self, job_id: str) -> str: """ This method accesses the head ray node in your cluster and returns the job status for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_status(job_id) + return self.client.get_job_status(job_id) def job_logs(self, job_id: str) -> str: """ This method accesses the head ray node in your cluster and returns the logs for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_logs(job_id) + return self.client.get_job_logs(job_id) def torchx_config( self, working_dir: str = None, requirements: str = None ) -> Dict[str, str]: - dashboard_address = f"{self.cluster_dashboard_uri().lstrip('http://')}" + dashboard_address = urllib3.util.parse_url(self.cluster_dashboard_uri()).host to_return = { "cluster_name": self.config.name, "dashboard_address": dashboard_address, @@ -1230,6 +1351,36 @@

Class variables

+

Instance variables

+
+
var client
+
+
+
+ +Expand source code + +
@property
+def client(self):
+    if self._client:
+        return self._client
+    if self.config.openshift_oauth:
+        print(
+            api_config_handler().configuration.get_api_key_with_prefix(
+                "authorization"
+            )
+        )
+        self._client = JobSubmissionClient(
+            self.cluster_dashboard_uri(),
+            headers=self._client_headers,
+            verify=self._client_verify_tls,
+        )
+    else:
+        self._client = JobSubmissionClient(self.cluster_dashboard_uri())
+    return self._client
+
+
+

Methods

@@ -1258,7 +1409,13 @@

Methods

return _kube_api_error_handling(e) for route in routes["items"]: - if route["metadata"]["name"] == f"ray-dashboard-{self.config.name}": + if route["metadata"][ + "name" + ] == f"ray-dashboard-{self.config.name}" or route["metadata"][ + "name" + ].startswith( + f"{self.config.name}-ingress" + ): protocol = "https" if route["spec"].get("tls") else "http" return f"{protocol}://{route['spec']['host']}" return "Dashboard route not available yet, have you run cluster.up()?"
@@ -1355,6 +1512,7 @@

Methods

image_pull_secrets=image_pull_secrets, dispatch_priority=dispatch_priority, priority_val=priority_val, + openshift_oauth=self.config.openshift_oauth, )
@@ -1401,7 +1559,12 @@

Methods

name=self.app_wrapper_name, ) except Exception as e: # pragma: no cover - return _kube_api_error_handling(e)
+ return _kube_api_error_handling(e) + + if self.config.openshift_oauth: + delete_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + )
@@ -1496,7 +1659,16 @@

Methods

Expand source code
def is_dashboard_ready(self) -> bool:
-    response = requests.get(self.cluster_dashboard_uri(), timeout=5)
+    try:
+        response = requests.get(
+            self.cluster_dashboard_uri(),
+            headers=self._client_headers,
+            timeout=5,
+            verify=self._client_verify_tls,
+        )
+    except requests.exceptions.SSLError:
+        # SSL exception occurs when oauth ingress has been created but cluster is not up
+        return False
     if response.status_code == 200:
         return True
     else:
@@ -1516,9 +1688,7 @@ 

Methods

""" This method accesses the head ray node in your cluster and returns the logs for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_logs(job_id)
+ return self.client.get_job_logs(job_id)
@@ -1534,9 +1704,7 @@

Methods

""" This method accesses the head ray node in your cluster and returns the job status for the provided job id. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.get_job_status(job_id)
+ return self.client.get_job_status(job_id)
@@ -1552,9 +1720,7 @@

Methods

""" This method accesses the head ray node in your cluster and lists the running jobs. """ - dashboard_route = self.cluster_dashboard_uri() - client = JobSubmissionClient(dashboard_route) - return client.list_jobs()
+ return self.client.list_jobs()
@@ -1664,7 +1830,7 @@

Methods

def torchx_config(
     self, working_dir: str = None, requirements: str = None
 ) -> Dict[str, str]:
-    dashboard_address = f"{self.cluster_dashboard_uri().lstrip('http://')}"
+    dashboard_address = urllib3.util.parse_url(self.cluster_dashboard_uri()).host
     to_return = {
         "cluster_name": self.config.name,
         "dashboard_address": dashboard_address,
@@ -1692,6 +1858,11 @@ 

Methods

the MCAD queue. """ namespace = self.config.namespace + if self.config.openshift_oauth: + create_openshift_oauth_objects( + cluster_name=self.config.name, namespace=namespace + ) + try: config_check() api_instance = client.CustomObjectsApi(api_config_handler()) @@ -1785,6 +1956,7 @@

Index

  • Cluster

      +
    • client
    • cluster_dashboard_uri
    • cluster_uri
    • create_app_wrapper
    • diff --git a/docs/cluster/config.html b/docs/cluster/config.html index 2edf0a3c8..28830390a 100644 --- a/docs/cluster/config.html +++ b/docs/cluster/config.html @@ -82,7 +82,8 @@

      Module codeflare_sdk.cluster.config

      image: str = "quay.io/project-codeflare/ray:latest-py39-cu118" local_interactive: bool = False image_pull_secrets: list = field(default_factory=list) - dispatch_priority: str = None
  • + dispatch_priority: str = None + openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator
    @@ -96,7 +97,7 @@

    Classes

    class ClusterConfiguration -(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:latest-py39-cu118', local_interactive: bool = False, image_pull_secrets: list = <factory>, dispatch_priority: str = None) +(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:latest-py39-cu118', local_interactive: bool = False, image_pull_secrets: list = <factory>, dispatch_priority: str = None, openshift_oauth: bool = False)

    This dataclass is used to specify resource requirements and other details, and @@ -130,7 +131,8 @@

    Classes

    image: str = "quay.io/project-codeflare/ray:latest-py39-cu118" local_interactive: bool = False image_pull_secrets: list = field(default_factory=list) - dispatch_priority: str = None + dispatch_priority: str = None + openshift_oauth: bool = False # NOTE: to use the user must have permission to create a RoleBinding for system:auth-delegator

    Class variables

    @@ -210,6 +212,10 @@

    Class variables

    +
    var openshift_oauth : bool
    +
    +
    +
    var template : str
    @@ -254,6 +260,7 @@

    namespace
  • num_gpus
  • num_workers
  • +
  • openshift_oauth
  • template
  • diff --git a/docs/job/jobs.html b/docs/job/jobs.html index 96ea47449..266da13ec 100644 --- a/docs/job/jobs.html +++ b/docs/job/jobs.html @@ -46,15 +46,19 @@

    Module codeflare_sdk.job.jobs

    from pathlib import Path from torchx.components.dist import ddp -from torchx.runner import get_runner +from torchx.runner import get_runner, Runner +from torchx.schedulers.ray_scheduler import RayScheduler from torchx.specs import AppHandle, parse_app_handle, AppDryRunInfo +from ray.job_submission import JobSubmissionClient + +import openshift as oc + if TYPE_CHECKING: from ..cluster.cluster import Cluster from ..cluster.cluster import get_current_namespace all_jobs: List["Job"] = [] -torchx_runner = get_runner() class JobDefinition(metaclass=abc.ABCMeta): @@ -120,30 +124,37 @@

    Module codeflare_sdk.job.jobs

    def _dry_run(self, cluster: "Cluster"): j = f"{cluster.config.num_workers}x{max(cluster.config.num_gpus, 1)}" # # of proc. = # of gpus - return torchx_runner.dryrun( - app=ddp( - *self.script_args, - script=self.script, - m=self.m, - name=self.name, - h=self.h, - cpu=self.cpu if self.cpu is not None else cluster.config.max_cpus, - gpu=self.gpu if self.gpu is not None else cluster.config.num_gpus, - memMB=self.memMB - if self.memMB is not None - else cluster.config.max_memory * 1024, - j=self.j if self.j is not None else j, - env=self.env, - max_retries=self.max_retries, - rdzv_port=self.rdzv_port, - rdzv_backend=self.rdzv_backend - if self.rdzv_backend is not None - else "static", - mounts=self.mounts, + runner = get_runner(ray_client=cluster.client) + runner._scheduler_instances["ray"] = RayScheduler( + session_name=runner._name, ray_client=cluster.client + ) + return ( + runner.dryrun( + app=ddp( + *self.script_args, + script=self.script, + m=self.m, + name=self.name, + h=self.h, + cpu=self.cpu if self.cpu is not None else cluster.config.max_cpus, + gpu=self.gpu if self.gpu is not None else cluster.config.num_gpus, + memMB=self.memMB + if self.memMB is not None + else cluster.config.max_memory * 1024, + j=self.j if self.j is not None else j, + env=self.env, + max_retries=self.max_retries, + rdzv_port=self.rdzv_port, + rdzv_backend=self.rdzv_backend + if self.rdzv_backend is not None + else "static", + mounts=self.mounts, + ), + scheduler=cluster.torchx_scheduler, + cfg=cluster.torchx_config(**self.scheduler_args), + workspace=self.workspace, ), - scheduler=cluster.torchx_scheduler, - cfg=cluster.torchx_config(**self.scheduler_args), - workspace=self.workspace, + runner, ) def _missing_spec(self, spec: str): @@ -153,41 +164,47 @@

    Module codeflare_sdk.job.jobs

    if self.scheduler_args is not None: if self.scheduler_args.get("namespace") is None: self.scheduler_args["namespace"] = get_current_namespace() - return torchx_runner.dryrun( - app=ddp( - *self.script_args, - script=self.script, - m=self.m, - name=self.name if self.name is not None else self._missing_spec("name"), - h=self.h, - cpu=self.cpu - if self.cpu is not None - else self._missing_spec("cpu (# cpus per worker)"), - gpu=self.gpu - if self.gpu is not None - else self._missing_spec("gpu (# gpus per worker)"), - memMB=self.memMB - if self.memMB is not None - else self._missing_spec("memMB (memory in MB)"), - j=self.j - if self.j is not None - else self._missing_spec( - "j (`workers`x`procs`)" - ), # # of proc. = # of gpus, - env=self.env, # should this still exist? - max_retries=self.max_retries, - rdzv_port=self.rdzv_port, # should this still exist? - rdzv_backend=self.rdzv_backend - if self.rdzv_backend is not None - else "c10d", - mounts=self.mounts, - image=self.image - if self.image is not None - else self._missing_spec("image"), + runner = get_runner() + return ( + runner.dryrun( + app=ddp( + *self.script_args, + script=self.script, + m=self.m, + name=self.name + if self.name is not None + else self._missing_spec("name"), + h=self.h, + cpu=self.cpu + if self.cpu is not None + else self._missing_spec("cpu (# cpus per worker)"), + gpu=self.gpu + if self.gpu is not None + else self._missing_spec("gpu (# gpus per worker)"), + memMB=self.memMB + if self.memMB is not None + else self._missing_spec("memMB (memory in MB)"), + j=self.j + if self.j is not None + else self._missing_spec( + "j (`workers`x`procs`)" + ), # # of proc. = # of gpus, + env=self.env, # should this still exist? + max_retries=self.max_retries, + rdzv_port=self.rdzv_port, # should this still exist? + rdzv_backend=self.rdzv_backend + if self.rdzv_backend is not None + else "c10d", + mounts=self.mounts, + image=self.image + if self.image is not None + else self._missing_spec("image"), + ), + scheduler="kubernetes_mcad", + cfg=self.scheduler_args, + workspace="", ), - scheduler="kubernetes_mcad", - cfg=self.scheduler_args, - workspace="", + runner, ) def submit(self, cluster: "Cluster" = None) -> "Job": @@ -199,21 +216,23 @@

    Module codeflare_sdk.job.jobs

    self.job_definition = job_definition self.cluster = cluster if self.cluster: - self._app_handle = torchx_runner.schedule(job_definition._dry_run(cluster)) + definition, runner = job_definition._dry_run(cluster) + self._app_handle = runner.schedule(definition) + self._runner = runner else: - self._app_handle = torchx_runner.schedule( - job_definition._dry_run_no_cluster() - ) + definition, runner = job_definition._dry_run_no_cluster() + self._app_handle = runner.schedule(definition) + self._runner = runner all_jobs.append(self) def status(self) -> str: - return torchx_runner.status(self._app_handle) + return self._runner.status(self._app_handle) def logs(self) -> str: - return "".join(torchx_runner.log_lines(self._app_handle, None)) + return "".join(self._runner.log_lines(self._app_handle, None)) def cancel(self): - torchx_runner.cancel(self._app_handle) + self._runner.cancel(self._app_handle)
    @@ -240,21 +259,23 @@

    Classes

    self.job_definition = job_definition self.cluster = cluster if self.cluster: - self._app_handle = torchx_runner.schedule(job_definition._dry_run(cluster)) + definition, runner = job_definition._dry_run(cluster) + self._app_handle = runner.schedule(definition) + self._runner = runner else: - self._app_handle = torchx_runner.schedule( - job_definition._dry_run_no_cluster() - ) + definition, runner = job_definition._dry_run_no_cluster() + self._app_handle = runner.schedule(definition) + self._runner = runner all_jobs.append(self) def status(self) -> str: - return torchx_runner.status(self._app_handle) + return self._runner.status(self._app_handle) def logs(self) -> str: - return "".join(torchx_runner.log_lines(self._app_handle, None)) + return "".join(self._runner.log_lines(self._app_handle, None)) def cancel(self): - torchx_runner.cancel(self._app_handle) + self._runner.cancel(self._app_handle)

    Ancestors

      @@ -272,7 +293,7 @@

      Methods

      Expand source code
      def cancel(self):
      -    torchx_runner.cancel(self._app_handle)
      + self._runner.cancel(self._app_handle)
      @@ -285,7 +306,7 @@

      Methods

      Expand source code
      def logs(self) -> str:
      -    return "".join(torchx_runner.log_lines(self._app_handle, None))
      + return "".join(self._runner.log_lines(self._app_handle, None))
      @@ -298,7 +319,7 @@

      Methods

      Expand source code
      def status(self) -> str:
      -    return torchx_runner.status(self._app_handle)
      + return self._runner.status(self._app_handle)
    @@ -360,30 +381,37 @@

    Methods

    def _dry_run(self, cluster: "Cluster"): j = f"{cluster.config.num_workers}x{max(cluster.config.num_gpus, 1)}" # # of proc. = # of gpus - return torchx_runner.dryrun( - app=ddp( - *self.script_args, - script=self.script, - m=self.m, - name=self.name, - h=self.h, - cpu=self.cpu if self.cpu is not None else cluster.config.max_cpus, - gpu=self.gpu if self.gpu is not None else cluster.config.num_gpus, - memMB=self.memMB - if self.memMB is not None - else cluster.config.max_memory * 1024, - j=self.j if self.j is not None else j, - env=self.env, - max_retries=self.max_retries, - rdzv_port=self.rdzv_port, - rdzv_backend=self.rdzv_backend - if self.rdzv_backend is not None - else "static", - mounts=self.mounts, + runner = get_runner(ray_client=cluster.client) + runner._scheduler_instances["ray"] = RayScheduler( + session_name=runner._name, ray_client=cluster.client + ) + return ( + runner.dryrun( + app=ddp( + *self.script_args, + script=self.script, + m=self.m, + name=self.name, + h=self.h, + cpu=self.cpu if self.cpu is not None else cluster.config.max_cpus, + gpu=self.gpu if self.gpu is not None else cluster.config.num_gpus, + memMB=self.memMB + if self.memMB is not None + else cluster.config.max_memory * 1024, + j=self.j if self.j is not None else j, + env=self.env, + max_retries=self.max_retries, + rdzv_port=self.rdzv_port, + rdzv_backend=self.rdzv_backend + if self.rdzv_backend is not None + else "static", + mounts=self.mounts, + ), + scheduler=cluster.torchx_scheduler, + cfg=cluster.torchx_config(**self.scheduler_args), + workspace=self.workspace, ), - scheduler=cluster.torchx_scheduler, - cfg=cluster.torchx_config(**self.scheduler_args), - workspace=self.workspace, + runner, ) def _missing_spec(self, spec: str): @@ -393,41 +421,47 @@

    Methods

    if self.scheduler_args is not None: if self.scheduler_args.get("namespace") is None: self.scheduler_args["namespace"] = get_current_namespace() - return torchx_runner.dryrun( - app=ddp( - *self.script_args, - script=self.script, - m=self.m, - name=self.name if self.name is not None else self._missing_spec("name"), - h=self.h, - cpu=self.cpu - if self.cpu is not None - else self._missing_spec("cpu (# cpus per worker)"), - gpu=self.gpu - if self.gpu is not None - else self._missing_spec("gpu (# gpus per worker)"), - memMB=self.memMB - if self.memMB is not None - else self._missing_spec("memMB (memory in MB)"), - j=self.j - if self.j is not None - else self._missing_spec( - "j (`workers`x`procs`)" - ), # # of proc. = # of gpus, - env=self.env, # should this still exist? - max_retries=self.max_retries, - rdzv_port=self.rdzv_port, # should this still exist? - rdzv_backend=self.rdzv_backend - if self.rdzv_backend is not None - else "c10d", - mounts=self.mounts, - image=self.image - if self.image is not None - else self._missing_spec("image"), + runner = get_runner() + return ( + runner.dryrun( + app=ddp( + *self.script_args, + script=self.script, + m=self.m, + name=self.name + if self.name is not None + else self._missing_spec("name"), + h=self.h, + cpu=self.cpu + if self.cpu is not None + else self._missing_spec("cpu (# cpus per worker)"), + gpu=self.gpu + if self.gpu is not None + else self._missing_spec("gpu (# gpus per worker)"), + memMB=self.memMB + if self.memMB is not None + else self._missing_spec("memMB (memory in MB)"), + j=self.j + if self.j is not None + else self._missing_spec( + "j (`workers`x`procs`)" + ), # # of proc. = # of gpus, + env=self.env, # should this still exist? + max_retries=self.max_retries, + rdzv_port=self.rdzv_port, # should this still exist? + rdzv_backend=self.rdzv_backend + if self.rdzv_backend is not None + else "c10d", + mounts=self.mounts, + image=self.image + if self.image is not None + else self._missing_spec("image"), + ), + scheduler="kubernetes_mcad", + cfg=self.scheduler_args, + workspace="", ), - scheduler="kubernetes_mcad", - cfg=self.scheduler_args, - workspace="", + runner, ) def submit(self, cluster: "Cluster" = None) -> "Job": diff --git a/docs/utils/generate_yaml.html b/docs/utils/generate_yaml.html index 791898a3b..2091f8b33 100644 --- a/docs/utils/generate_yaml.html +++ b/docs/utils/generate_yaml.html @@ -55,6 +55,13 @@

    Module codeflare_sdk.utils.generate_yaml

    from kubernetes import client, config from .kube_api_helpers import _kube_api_error_handling from ..cluster.auth import api_config_handler, config_check +from os import urandom +from base64 import b64encode +from urllib3.util import parse_url + +from kubernetes import client, config + +from .kube_api_helpers import _get_api_host def read_template(template): @@ -77,13 +84,17 @@

    Module codeflare_sdk.utils.generate_yaml

    def update_dashboard_route(route_item, cluster_name, namespace): metadata = route_item.get("generictemplate", {}).get("metadata") - metadata["name"] = f"ray-dashboard-{cluster_name}" + metadata["name"] = gen_dashboard_route_name(cluster_name) metadata["namespace"] = namespace metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc" spec = route_item.get("generictemplate", {}).get("spec") spec["to"]["name"] = f"{cluster_name}-head-svc" +def gen_dashboard_route_name(cluster_name): + return f"ray-dashboard-{cluster_name}" + + # ToDo: refactor the update_x_route() functions def update_rayclient_route(route_item, cluster_name, namespace): metadata = route_item.get("generictemplate", {}).get("metadata") @@ -400,6 +411,83 @@

    Module codeflare_sdk.utils.generate_yaml

    print(f"Written to: {output_file_name}") +def enable_openshift_oauth(user_yaml, cluster_name, namespace): + config_check() + k8_client = api_config_handler() or client.ApiClient() + tls_mount_location = "/etc/tls/private" + oauth_port = 8443 + oauth_sa_name = f"{cluster_name}-oauth-proxy" + tls_secret_name = f"{cluster_name}-proxy-tls-secret" + tls_volume_name = "proxy-tls-secret" + port_name = "oauth-proxy" + host = _get_api_host(k8_client) + host = host.replace( + "api.", f"{gen_dashboard_route_name(cluster_name)}-{namespace}.apps." + ) + oauth_sidecar = _create_oauth_sidecar_object( + namespace, + tls_mount_location, + oauth_port, + oauth_sa_name, + tls_volume_name, + port_name, + ) + tls_secret_volume = client.V1Volume( + name=tls_volume_name, + secret=client.V1SecretVolumeSource(secret_name=tls_secret_name), + ) + # allows for setting value of Cluster object when initializing object from an existing AppWrapper on cluster + user_yaml["metadata"]["annotations"] = user_yaml["metadata"].get("annotations", {}) + user_yaml["metadata"]["annotations"][ + "codeflare-sdk-use-oauth" + ] = "true" # if the user gets an + ray_headgroup_pod = user_yaml["spec"]["resources"]["GenericItems"][0][ + "generictemplate" + ]["spec"]["headGroupSpec"]["template"]["spec"] + user_yaml["spec"]["resources"]["GenericItems"].pop(1) + ray_headgroup_pod["serviceAccount"] = oauth_sa_name + ray_headgroup_pod["volumes"] = ray_headgroup_pod.get("volumes", []) + + # we use a generic api client here so that the serialization function doesn't need to be mocked for unit tests + ray_headgroup_pod["volumes"].append( + client.ApiClient().sanitize_for_serialization(tls_secret_volume) + ) + ray_headgroup_pod["containers"].append( + client.ApiClient().sanitize_for_serialization(oauth_sidecar) + ) + + +def _create_oauth_sidecar_object( + namespace: str, + tls_mount_location: str, + oauth_port: int, + oauth_sa_name: str, + tls_volume_name: str, + port_name: str, +) -> client.V1Container: + return client.V1Container( + args=[ + f"--https-address=:{oauth_port}", + "--provider=openshift", + f"--openshift-service-account={oauth_sa_name}", + "--upstream=http://localhost:8265", + f"--tls-cert={tls_mount_location}/tls.crt", + f"--tls-key={tls_mount_location}/tls.key", + f"--cookie-secret={b64encode(urandom(64)).decode('utf-8')}", # create random string for encrypting cookie + f'--openshift-delegate-urls={{"/":{{"resource":"pods","namespace":"{namespace}","verb":"get"}}}}', + ], + image="registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", + name="oauth-proxy", + ports=[client.V1ContainerPort(container_port=oauth_port, name=port_name)], + resources=client.V1ResourceRequirements(limits=None, requests=None), + volume_mounts=[ + client.V1VolumeMount( + mount_path=tls_mount_location, name=tls_volume_name, read_only=True + ) + ], + ) + + def generate_appwrapper( name: str, namespace: str, @@ -421,6 +509,7 @@

    Module codeflare_sdk.utils.generate_yaml

    image_pull_secrets: list, dispatch_priority: str, priority_val: int, + openshift_oauth: bool, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -464,6 +553,10 @@

    Module codeflare_sdk.utils.generate_yaml

    enable_local_interactive(resources, cluster_name, namespace) else: disable_raycluster_tls(resources["resources"]) + + if openshift_oauth: + enable_openshift_oauth(user_yaml, cluster_name, namespace) + outfile = appwrapper_name + ".yaml" write_user_appwrapper(user_yaml, outfile) return outfile @@ -594,6 +687,74 @@

    Functions

    ][0].get("command")[2] = command +
    +def enable_openshift_oauth(user_yaml, cluster_name, namespace) +
    +
    +
    +
    + +Expand source code + +
    def enable_openshift_oauth(user_yaml, cluster_name, namespace):
    +    config_check()
    +    k8_client = api_config_handler() or client.ApiClient()
    +    tls_mount_location = "/etc/tls/private"
    +    oauth_port = 8443
    +    oauth_sa_name = f"{cluster_name}-oauth-proxy"
    +    tls_secret_name = f"{cluster_name}-proxy-tls-secret"
    +    tls_volume_name = "proxy-tls-secret"
    +    port_name = "oauth-proxy"
    +    host = _get_api_host(k8_client)
    +    host = host.replace(
    +        "api.", f"{gen_dashboard_route_name(cluster_name)}-{namespace}.apps."
    +    )
    +    oauth_sidecar = _create_oauth_sidecar_object(
    +        namespace,
    +        tls_mount_location,
    +        oauth_port,
    +        oauth_sa_name,
    +        tls_volume_name,
    +        port_name,
    +    )
    +    tls_secret_volume = client.V1Volume(
    +        name=tls_volume_name,
    +        secret=client.V1SecretVolumeSource(secret_name=tls_secret_name),
    +    )
    +    # allows for setting value of Cluster object when initializing object from an existing AppWrapper on cluster
    +    user_yaml["metadata"]["annotations"] = user_yaml["metadata"].get("annotations", {})
    +    user_yaml["metadata"]["annotations"][
    +        "codeflare-sdk-use-oauth"
    +    ] = "true"  # if the user gets an
    +    ray_headgroup_pod = user_yaml["spec"]["resources"]["GenericItems"][0][
    +        "generictemplate"
    +    ]["spec"]["headGroupSpec"]["template"]["spec"]
    +    user_yaml["spec"]["resources"]["GenericItems"].pop(1)
    +    ray_headgroup_pod["serviceAccount"] = oauth_sa_name
    +    ray_headgroup_pod["volumes"] = ray_headgroup_pod.get("volumes", [])
    +
    +    # we use a generic api client here so that the serialization function doesn't need to be mocked for unit tests
    +    ray_headgroup_pod["volumes"].append(
    +        client.ApiClient().sanitize_for_serialization(tls_secret_volume)
    +    )
    +    ray_headgroup_pod["containers"].append(
    +        client.ApiClient().sanitize_for_serialization(oauth_sidecar)
    +    )
    +
    +
    +
    +def gen_dashboard_route_name(cluster_name) +
    +
    +
    +
    + +Expand source code + +
    def gen_dashboard_route_name(cluster_name):
    +    return f"ray-dashboard-{cluster_name}"
    +
    +
    def gen_names(name)
    @@ -614,7 +775,7 @@

    Functions

    -def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int) +def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int, openshift_oauth: bool)
    @@ -643,6 +804,7 @@

    Functions

    image_pull_secrets: list, dispatch_priority: str, priority_val: int, + openshift_oauth: bool, ): user_yaml = read_template(template) appwrapper_name, cluster_name = gen_names(name) @@ -686,6 +848,10 @@

    Functions

    enable_local_interactive(resources, cluster_name, namespace) else: disable_raycluster_tls(resources["resources"]) + + if openshift_oauth: + enable_openshift_oauth(user_yaml, cluster_name, namespace) + outfile = appwrapper_name + ".yaml" write_user_appwrapper(user_yaml, outfile) return outfile @@ -821,7 +987,7 @@

    Functions

    def update_dashboard_route(route_item, cluster_name, namespace):
         metadata = route_item.get("generictemplate", {}).get("metadata")
    -    metadata["name"] = f"ray-dashboard-{cluster_name}"
    +    metadata["name"] = gen_dashboard_route_name(cluster_name)
         metadata["namespace"] = namespace
         metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc"
         spec = route_item.get("generictemplate", {}).get("spec")
    @@ -1078,6 +1244,8 @@ 

    Index

    +
    codeflare_sdk.utils.openshift_oauth
    +
    +
    +
    codeflare_sdk.utils.pretty_print

    This sub-module exists primarily to be used internally by the Cluster object @@ -70,6 +74,7 @@

    Index

  • codeflare_sdk.utils.generate_cert
  • codeflare_sdk.utils.generate_yaml
  • codeflare_sdk.utils.kube_api_helpers
  • +
  • codeflare_sdk.utils.openshift_oauth
  • codeflare_sdk.utils.pretty_print
  • diff --git a/docs/utils/kube_api_helpers.html b/docs/utils/kube_api_helpers.html index 4c2ecb781..37034ab78 100644 --- a/docs/utils/kube_api_helpers.html +++ b/docs/utils/kube_api_helpers.html @@ -50,6 +50,7 @@

    Module codeflare_sdk.utils.kube_api_helpers

    import executing from kubernetes import client, config +from urllib3.util import parse_url # private methods @@ -72,7 +73,11 @@

    Module codeflare_sdk.utils.kube_api_helpers

    raise PermissionError(perm_msg) elif e.reason == "Conflict": raise FileExistsError(exists_msg) - raise e
    + raise e + + +def _get_api_host(api_client: client.ApiClient): # pragma: no cover + return parse_url(api_client.configuration.host).host
    diff --git a/docs/utils/openshift_oauth.html b/docs/utils/openshift_oauth.html new file mode 100644 index 000000000..0fc27b6b2 --- /dev/null +++ b/docs/utils/openshift_oauth.html @@ -0,0 +1,347 @@ + + + + + + +codeflare_sdk.utils.openshift_oauth API documentation + + + + + + + + + + + +
    +
    +
    +

    Module codeflare_sdk.utils.openshift_oauth

    +
    +
    +
    + +Expand source code + +
    from urllib3.util import parse_url
    +from .generate_yaml import gen_dashboard_route_name
    +from .kube_api_helpers import _get_api_host
    +from base64 import b64decode
    +
    +from ..cluster.auth import config_check, api_config_handler
    +
    +from kubernetes import client
    +
    +
    +def create_openshift_oauth_objects(cluster_name, namespace):
    +    config_check()
    +    api_client = api_config_handler() or client.ApiClient()
    +    oauth_port = 8443
    +    oauth_sa_name = f"{cluster_name}-oauth-proxy"
    +    tls_secret_name = _gen_tls_secret_name(cluster_name)
    +    service_name = f"{cluster_name}-oauth"
    +    port_name = "oauth-proxy"
    +    host = _get_api_host(api_client)
    +
    +    # replace "^api" with the expected host
    +    host = f"{gen_dashboard_route_name(cluster_name)}-{namespace}.apps" + host.lstrip(
    +        "api"
    +    )
    +
    +    _create_or_replace_oauth_sa(namespace, oauth_sa_name, host)
    +    _create_or_replace_oauth_service_obj(
    +        cluster_name, namespace, oauth_port, tls_secret_name, service_name, port_name
    +    )
    +    _create_or_replace_oauth_ingress_object(
    +        cluster_name, namespace, service_name, port_name, host
    +    )
    +    _create_or_replace_oauth_rb(cluster_name, namespace, oauth_sa_name)
    +
    +
    +def _create_or_replace_oauth_sa(namespace, oauth_sa_name, host):
    +    api_client = api_config_handler()
    +    oauth_sa = client.V1ServiceAccount(
    +        api_version="v1",
    +        kind="ServiceAccount",
    +        metadata=client.V1ObjectMeta(
    +            name=oauth_sa_name,
    +            namespace=namespace,
    +            annotations={
    +                "serviceaccounts.openshift.io/oauth-redirecturi.first": f"https://{host}"
    +            },
    +        ),
    +    )
    +    try:
    +        client.CoreV1Api(api_client).create_namespaced_service_account(
    +            namespace=namespace, body=oauth_sa
    +        )
    +    except client.ApiException as e:
    +        if e.reason == "Conflict":
    +            client.CoreV1Api(api_client).replace_namespaced_service_account(
    +                namespace=namespace,
    +                body=oauth_sa,
    +                name=oauth_sa_name,
    +            )
    +        else:
    +            raise e
    +
    +
    +def _create_or_replace_oauth_rb(cluster_name, namespace, oauth_sa_name):
    +    api_client = api_config_handler()
    +    oauth_crb = client.V1ClusterRoleBinding(
    +        api_version="rbac.authorization.k8s.io/v1",
    +        kind="ClusterRoleBinding",
    +        metadata=client.V1ObjectMeta(name=f"{cluster_name}-rb"),
    +        role_ref=client.V1RoleRef(
    +            api_group="rbac.authorization.k8s.io",
    +            kind="ClusterRole",
    +            name="system:auth-delegator",
    +        ),
    +        subjects=[
    +            client.V1Subject(
    +                kind="ServiceAccount", name=oauth_sa_name, namespace=namespace
    +            )
    +        ],
    +    )
    +    try:
    +        client.RbacAuthorizationV1Api(api_client).create_cluster_role_binding(
    +            body=oauth_crb
    +        )
    +    except client.ApiException as e:
    +        if e.reason == "Conflict":
    +            client.RbacAuthorizationV1Api(api_client).replace_cluster_role_binding(
    +                body=oauth_crb, name=f"{cluster_name}-rb"
    +            )
    +        else:
    +            raise e
    +
    +
    +def _gen_tls_secret_name(cluster_name):
    +    return f"{cluster_name}-proxy-tls-secret"
    +
    +
    +def delete_openshift_oauth_objects(cluster_name, namespace):
    +    # NOTE: it might be worth adding error handling here, but shouldn't be necessary because cluster.down(...) checks
    +    # for an existing cluster before calling this => the objects should never be deleted twice
    +    api_client = api_config_handler()
    +    oauth_sa_name = f"{cluster_name}-oauth-proxy"
    +    service_name = f"{cluster_name}-oauth"
    +    client.CoreV1Api(api_client).delete_namespaced_service_account(
    +        name=oauth_sa_name, namespace=namespace
    +    )
    +    client.CoreV1Api(api_client).delete_namespaced_service(
    +        name=service_name, namespace=namespace
    +    )
    +    client.NetworkingV1Api(api_client).delete_namespaced_ingress(
    +        name=f"{cluster_name}-ingress", namespace=namespace
    +    )
    +    client.RbacAuthorizationV1Api(api_client).delete_cluster_role_binding(
    +        name=f"{cluster_name}-rb"
    +    )
    +
    +
    +def _create_or_replace_oauth_service_obj(
    +    cluster_name: str,
    +    namespace: str,
    +    oauth_port: int,
    +    tls_secret_name: str,
    +    service_name: str,
    +    port_name: str,
    +) -> client.V1Service:
    +    api_client = api_config_handler()
    +    oauth_service = client.V1Service(
    +        api_version="v1",
    +        kind="Service",
    +        metadata=client.V1ObjectMeta(
    +            annotations={
    +                "service.beta.openshift.io/serving-cert-secret-name": tls_secret_name
    +            },
    +            name=service_name,
    +            namespace=namespace,
    +        ),
    +        spec=client.V1ServiceSpec(
    +            ports=[
    +                client.V1ServicePort(
    +                    name=port_name,
    +                    protocol="TCP",
    +                    port=443,
    +                    target_port=oauth_port,
    +                )
    +            ],
    +            selector={
    +                "app.kubernetes.io/created-by": "kuberay-operator",
    +                "app.kubernetes.io/name": "kuberay",
    +                "ray.io/cluster": cluster_name,
    +                "ray.io/identifier": f"{cluster_name}-head",
    +                "ray.io/node-type": "head",
    +            },
    +        ),
    +    )
    +    try:
    +        client.CoreV1Api(api_client).create_namespaced_service(
    +            namespace=namespace, body=oauth_service
    +        )
    +    except client.ApiException as e:
    +        if e.reason == "Conflict":
    +            client.CoreV1Api(api_client).replace_namespaced_service(
    +                namespace=namespace, body=oauth_service, name=service_name
    +            )
    +        else:
    +            raise e
    +
    +
    +def _create_or_replace_oauth_ingress_object(
    +    cluster_name: str,
    +    namespace: str,
    +    service_name: str,
    +    port_name: str,
    +    host: str,
    +) -> client.V1Ingress:
    +    api_client = api_config_handler()
    +    ingress = client.V1Ingress(
    +        api_version="networking.k8s.io/v1",
    +        kind="Ingress",
    +        metadata=client.V1ObjectMeta(
    +            annotations={"route.openshift.io/termination": "passthrough"},
    +            name=f"{cluster_name}-ingress",
    +            namespace=namespace,
    +        ),
    +        spec=client.V1IngressSpec(
    +            rules=[
    +                client.V1IngressRule(
    +                    host=host,
    +                    http=client.V1HTTPIngressRuleValue(
    +                        paths=[
    +                            client.V1HTTPIngressPath(
    +                                backend=client.V1IngressBackend(
    +                                    service=client.V1IngressServiceBackend(
    +                                        name=service_name,
    +                                        port=client.V1ServiceBackendPort(
    +                                            name=port_name
    +                                        ),
    +                                    )
    +                                ),
    +                                path_type="ImplementationSpecific",
    +                            )
    +                        ]
    +                    ),
    +                )
    +            ]
    +        ),
    +    )
    +    try:
    +        client.NetworkingV1Api(api_client).create_namespaced_ingress(
    +            namespace=namespace, body=ingress
    +        )
    +    except client.ApiException as e:
    +        if e.reason == "Conflict":
    +            client.NetworkingV1Api(api_client).replace_namespaced_ingress(
    +                namespace=namespace, body=ingress, name=f"{cluster_name}-ingress"
    +            )
    +        else:
    +            raise e
    +
    +
    +
    +
    +
    +
    +
    +

    Functions

    +
    +
    +def create_openshift_oauth_objects(cluster_name, namespace) +
    +
    +
    +
    + +Expand source code + +
    def create_openshift_oauth_objects(cluster_name, namespace):
    +    config_check()
    +    api_client = api_config_handler() or client.ApiClient()
    +    oauth_port = 8443
    +    oauth_sa_name = f"{cluster_name}-oauth-proxy"
    +    tls_secret_name = _gen_tls_secret_name(cluster_name)
    +    service_name = f"{cluster_name}-oauth"
    +    port_name = "oauth-proxy"
    +    host = _get_api_host(api_client)
    +
    +    # replace "^api" with the expected host
    +    host = f"{gen_dashboard_route_name(cluster_name)}-{namespace}.apps" + host.lstrip(
    +        "api"
    +    )
    +
    +    _create_or_replace_oauth_sa(namespace, oauth_sa_name, host)
    +    _create_or_replace_oauth_service_obj(
    +        cluster_name, namespace, oauth_port, tls_secret_name, service_name, port_name
    +    )
    +    _create_or_replace_oauth_ingress_object(
    +        cluster_name, namespace, service_name, port_name, host
    +    )
    +    _create_or_replace_oauth_rb(cluster_name, namespace, oauth_sa_name)
    +
    +
    +
    +def delete_openshift_oauth_objects(cluster_name, namespace) +
    +
    +
    +
    + +Expand source code + +
    def delete_openshift_oauth_objects(cluster_name, namespace):
    +    # NOTE: it might be worth adding error handling here, but shouldn't be necessary because cluster.down(...) checks
    +    # for an existing cluster before calling this => the objects should never be deleted twice
    +    api_client = api_config_handler()
    +    oauth_sa_name = f"{cluster_name}-oauth-proxy"
    +    service_name = f"{cluster_name}-oauth"
    +    client.CoreV1Api(api_client).delete_namespaced_service_account(
    +        name=oauth_sa_name, namespace=namespace
    +    )
    +    client.CoreV1Api(api_client).delete_namespaced_service(
    +        name=service_name, namespace=namespace
    +    )
    +    client.NetworkingV1Api(api_client).delete_namespaced_ingress(
    +        name=f"{cluster_name}-ingress", namespace=namespace
    +    )
    +    client.RbacAuthorizationV1Api(api_client).delete_cluster_role_binding(
    +        name=f"{cluster_name}-rb"
    +    )
    +
    +
    +
    +
    +
    +
    +
    + +
    + + +