diff --git a/docs/cluster/cluster.html b/docs/cluster/cluster.html index 7b391787d..e33b9cc2f 100644 --- a/docs/cluster/cluster.html +++ b/docs/cluster/cluster.html @@ -102,7 +102,7 @@
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
codeflare_sdk.cluster.cluster
var client
var job_client
@property
-def client(self):
- if self._client:
- return self._client
+def job_client(self):
+ if self._job_submission_client:
+ return self._job_submission_client
if self.config.openshift_oauth:
print(
api_config_handler().configuration.get_api_key_with_prefix(
"authorization"
)
)
- self._client = JobSubmissionClient(
+ self._job_submission_client = JobSubmissionClient(
self.cluster_dashboard_uri(),
headers=self._client_headers,
verify=self._client_verify_tls,
)
else:
- self._client = JobSubmissionClient(self.cluster_dashboard_uri())
- return self._client
+ self._job_submission_client = JobSubmissionClient(
+ self.cluster_dashboard_uri()
+ )
+ return self._job_submission_client
-def from_k8_cluster_object(rc)
+def from_k8_cluster_object(rc, mcad=True)
def from_k8_cluster_object(rc):
+def from_k8_cluster_object(rc, mcad=True):
machine_types = (
rc["metadata"]["labels"]["orderedinstance"].split("_")
if "orderedinstance" in rc["metadata"]["labels"]
@@ -1645,6 +1836,7 @@ Methods
0
]["image"],
local_interactive=local_interactive,
+ mcad=mcad,
)
return Cluster(cluster_config)
@@ -1688,7 +1880,7 @@ Methods
"""
This method accesses the head ray node in your cluster and returns the logs for the provided job id.
"""
- return self.client.get_job_logs(job_id)
+ return self.job_client.get_job_logs(job_id)
@@ -1704,7 +1896,7 @@ Methods
"""
This method accesses the head ray node in your cluster and returns the job status for the provided job id.
"""
- return self.client.get_job_status(job_id)
+ return self.job_client.get_job_status(job_id)
@@ -1720,7 +1912,7 @@ Methods
"""
This method accesses the head ray node in your cluster and lists the running jobs.
"""
- return self.client.list_jobs()
+ return self.job_client.list_jobs()
@@ -1759,42 +1951,46 @@ Methods
"""
ready = False
status = CodeFlareClusterStatus.UNKNOWN
- # check the app wrapper status
- appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
- if appwrapper:
- if appwrapper.status in [
- AppWrapperStatus.RUNNING,
- AppWrapperStatus.COMPLETED,
- AppWrapperStatus.RUNNING_HOLD_COMPLETION,
- ]:
- ready = False
- status = CodeFlareClusterStatus.STARTING
- elif appwrapper.status in [
- AppWrapperStatus.FAILED,
- AppWrapperStatus.DELETED,
- ]:
- ready = False
- status = CodeFlareClusterStatus.FAILED # should deleted be separate
- return status, ready # exit early, no need to check ray status
- elif appwrapper.status in [
- AppWrapperStatus.PENDING,
- AppWrapperStatus.QUEUEING,
- ]:
- ready = False
- if appwrapper.status == AppWrapperStatus.PENDING:
- status = CodeFlareClusterStatus.QUEUED
- else:
- status = CodeFlareClusterStatus.QUEUEING
- if print_to_console:
- pretty_print.print_app_wrappers_status([appwrapper])
- return (
- status,
- ready,
- ) # no need to check the ray status since still in queue
+ if self.config.mcad:
+ # check the app wrapper status
+ appwrapper = _app_wrapper_status(self.config.name, self.config.namespace)
+ if appwrapper:
+ if appwrapper.status in [
+ AppWrapperStatus.RUNNING,
+ AppWrapperStatus.COMPLETED,
+ AppWrapperStatus.RUNNING_HOLD_COMPLETION,
+ ]:
+ ready = False
+ status = CodeFlareClusterStatus.STARTING
+ elif appwrapper.status in [
+ AppWrapperStatus.FAILED,
+ AppWrapperStatus.DELETED,
+ ]:
+ ready = False
+ status = CodeFlareClusterStatus.FAILED # should deleted be separate
+ return status, ready # exit early, no need to check ray status
+ elif appwrapper.status in [
+ AppWrapperStatus.PENDING,
+ AppWrapperStatus.QUEUEING,
+ ]:
+ ready = False
+ if appwrapper.status == AppWrapperStatus.PENDING:
+ status = CodeFlareClusterStatus.QUEUED
+ else:
+ status = CodeFlareClusterStatus.QUEUEING
+ if print_to_console:
+ pretty_print.print_app_wrappers_status([appwrapper])
+ return (
+ status,
+ ready,
+ ) # no need to check the ray status since still in queue
# check the ray cluster status
cluster = _ray_cluster_status(self.config.name, self.config.namespace)
- if cluster and not cluster.status == RayClusterStatus.UNKNOWN:
+ if cluster:
+ if cluster.status == RayClusterStatus.UNKNOWN:
+ ready = False
+ status = CodeFlareClusterStatus.STARTING
if cluster.status == RayClusterStatus.READY:
ready = True
status = CodeFlareClusterStatus.READY
@@ -1866,15 +2062,18 @@ Methods
try:
config_check()
api_instance = client.CustomObjectsApi(api_config_handler())
- with open(self.app_wrapper_yaml) as f:
- aw = yaml.load(f, Loader=yaml.FullLoader)
- api_instance.create_namespaced_custom_object(
- group="workload.codeflare.dev",
- version="v1beta1",
- namespace=namespace,
- plural="appwrappers",
- body=aw,
- )
+ if self.config.mcad:
+ with open(self.app_wrapper_yaml) as f:
+ aw = yaml.load(f, Loader=yaml.FullLoader)
+ api_instance.create_namespaced_custom_object(
+ group="workload.codeflare.dev",
+ version="v1beta1",
+ namespace=namespace,
+ plural="appwrappers",
+ body=aw,
+ )
+ else:
+ self._component_resources_up(namespace, api_instance)
except Exception as e: # pragma: no cover
return _kube_api_error_handling(e)
@@ -1956,7 +2155,6 @@ Cluster
client
cluster_dashboard_uri
cluster_uri
create_app_wrapper
evaluate_dispatch_priority
from_k8_cluster_object
is_dashboard_ready
job_client
job_logs
job_status
list_jobs
codeflare_sdk.cluster.config
class ClusterConfiguration
-(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:latest-py39-cu118', local_interactive: bool = False, image_pull_secrets: list = <factory>, dispatch_priority: str = None, openshift_oauth: bool = False)
+(name: str, namespace: str = None, head_info: list = <factory>, head_cpus: int = 2, head_memory: int = 8, head_gpus: int = 0, machine_types: list = <factory>, min_cpus: int = 1, max_cpus: int = 1, num_workers: int = 1, min_memory: int = 2, max_memory: int = 2, num_gpus: int = 0, template: str = '/home/runner/work/codeflare-sdk/codeflare-sdk/src/codeflare_sdk/templates/base-template.yaml', instascale: bool = False, mcad: bool = True, envs: dict = <factory>, image: str = 'quay.io/project-codeflare/ray:latest-py39-cu118', local_interactive: bool = False, image_pull_secrets: list = <factory>, dispatch_priority: str = None, openshift_oauth: bool = False)
This dataclass is used to specify resource requirements and other details, and @@ -127,6 +128,7 @@
var mcad : bool
var min_cpus : int
machine_types
max_cpus
max_memory
mcad
min_cpus
min_memory
name
codeflare_sdk.job.jobs
codeflare_sdk.job.jobs
codeflare_sdk.utils.generate_yaml
codeflare_sdk.utils.generate_yaml
codeflare_sdk.utils.generate_yaml
-def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int, openshift_oauth: bool)
+def generate_appwrapper(name: str, namespace: str, head_cpus: int, head_memory: int, head_gpus: int, min_cpu: int, max_cpu: int, min_memory: int, max_memory: int, gpu: int, workers: int, template: str, image: str, instascale: bool, mcad: bool, instance_types: list, env, local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int, openshift_oauth: bool)
+def write_components(user_yaml: dict, output_file_name: str)
+
def write_components(user_yaml: dict, output_file_name: str):
+ components = user_yaml.get("spec", "resources")["resources"].get("GenericItems")
+ open(output_file_name, "w").close()
+ with open(output_file_name, "a") as outfile:
+ for component in components:
+ if "generictemplate" in component:
+ outfile.write("---\n")
+ yaml.dump(
+ component["generictemplate"], outfile, default_flow_style=False
+ )
+ print(f"Written to: {output_file_name}")
+
def write_user_appwrapper(user_yaml, output_file_name)
update_priority
update_rayclient_route
update_resources
write_components
write_user_appwrapper
codeflare_sdk.utils.kube_api_helpers
codeflare_sdk.utils.kube_api_helpers