From f4400513aa85062ab8727cda16218a9081b2a2c2 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 12 Apr 2024 11:21:36 +0100 Subject: [PATCH 1/5] Removed create-cert init containers & side car container --- pyproject.toml | 1 + src/codeflare_sdk.egg-info/PKG-INFO | 2 +- src/codeflare_sdk.egg-info/SOURCES.txt | 2 - .../templates/base-template.yaml | 75 +---------- src/codeflare_sdk/utils/generate_yaml.py | 127 +----------------- tests/test-case-no-mcad.yamls | 53 ++++---- tests/test-case-prio.yaml | 54 ++++---- tests/test-case.yaml | 54 ++++---- tests/test-default-appwrapper.yaml | 54 ++++---- tests/unit_test.py | 115 ++++++++++------ 10 files changed, 189 insertions(+), 348 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c47e466a6..ee50a48c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ pytest-timeout = "2.2.0" [tool.pytest.ini_options] filterwarnings = [ "ignore::DeprecationWarning:pkg_resources", + "ignore::DeprecationWarning", "ignore:pkg_resources is deprecated as an API:DeprecationWarning", ] markers = [ diff --git a/src/codeflare_sdk.egg-info/PKG-INFO b/src/codeflare_sdk.egg-info/PKG-INFO index c4061c623..27ec5cbfa 100644 --- a/src/codeflare_sdk.egg-info/PKG-INFO +++ b/src/codeflare_sdk.egg-info/PKG-INFO @@ -1,4 +1,4 @@ Metadata-Version: 2.1 -Name: codeflare-sdk +Name: codeflare_sdk Version: 0.0.0 License-File: LICENSE diff --git a/src/codeflare_sdk.egg-info/SOURCES.txt b/src/codeflare_sdk.egg-info/SOURCES.txt index d922d0dbe..42541f1d2 100644 --- a/src/codeflare_sdk.egg-info/SOURCES.txt +++ b/src/codeflare_sdk.egg-info/SOURCES.txt @@ -13,11 +13,9 @@ src/codeflare_sdk/cluster/cluster.py src/codeflare_sdk/cluster/config.py src/codeflare_sdk/cluster/model.py src/codeflare_sdk/job/__init__.py -src/codeflare_sdk/job/jobs.py src/codeflare_sdk/job/ray_jobs.py src/codeflare_sdk/utils/__init__.py src/codeflare_sdk/utils/generate_cert.py src/codeflare_sdk/utils/generate_yaml.py src/codeflare_sdk/utils/kube_api_helpers.py -src/codeflare_sdk/utils/openshift_oauth.py src/codeflare_sdk/utils/pretty_print.py diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index 5f6036ac8..a93ec3a70 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -117,20 +117,7 @@ spec: - "aw-kuberay" containers: # The Ray head pod - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: "0" - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - name: ray-head + - name: ray-head image: quay.io/project-codeflare/ray:latest-py39-cu118 imagePullPolicy: Always ports: @@ -172,30 +159,7 @@ spec: - mountPath: /etc/ssl/certs/odh-ca-bundle.crt name: odh-ca-cert subPath: odh-ca-bundle.crt - initContainers: - - command: - - sh - - -c - - cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\nDNS.5 = rayclient-deployment-name-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).server-name">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext - image: quay.io/project-codeflare/ray:latest-py39-cu118 - name: create-cert - # securityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - volumeMounts: - - name: ca-vol - mountPath: "/home/ray/workspace/ca" - readOnly: true - - name: server-cert - mountPath: "/home/ray/workspace/tls" - readOnly: false volumes: - - name: ca-vol - secret: - secretName: ca-secret-deployment-name - optional: false - - name: server-cert - emptyDir: {} - name: odh-trusted-ca-cert configMap: name: odh-trusted-ca-bundle @@ -250,40 +214,9 @@ spec: operator: In values: - "aw-kuberay" - initContainers: - # the env var $RAY_IP is set by the operator if missing, with the value of the head service name - - name: create-cert - image: quay.io/project-codeflare/ray:latest-py39-cu118 - command: - - sh - - -c - - cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf "authorityKeyIdentifier=keyid,issuer\nbasicConstraints=CA:FALSE\nsubjectAltName = @alt_names\n[alt_names]\nDNS.1 = 127.0.0.1\nDNS.2 = localhost\nDNS.3 = ${FQ_RAY_IP}\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext - # securityContext: - # runAsUser: 1000 - # runAsGroup: 1000 - volumeMounts: - - name: ca-vol - mountPath: "/home/ray/workspace/ca" - readOnly: true - - name: server-cert - mountPath: "/home/ray/workspace/tls" - readOnly: false containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: quay.io/project-codeflare/ray:latest-py39-cu118 - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: "0" - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt # environment variables to set in the container.Optional. # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/ lifecycle: @@ -319,12 +252,6 @@ spec: name: odh-ca-cert subPath: odh-ca-bundle.crt volumes: - - name: ca-vol - secret: - secretName: ca-secret-deployment-name - optional: false - - name: server-cert - emptyDir: {} - name: odh-trusted-ca-cert configMap: name: odh-trusted-ca-bundle diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 2088b9102..91fd68114 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -85,20 +85,6 @@ def update_names(yaml, item, appwrapper_name, cluster_name, namespace): lower_meta["labels"]["workload.codeflare.dev/appwrapper"] = appwrapper_name lower_meta["name"] = cluster_name lower_meta["namespace"] = namespace - lower_spec = item.get("generictemplate", {}).get("spec") - if is_openshift_cluster(): - cookie_secret_env_var = { - "name": "COOKIE_SECRET", - "valueFrom": { - "secretKeyRef": { - "key": "cookie_secret", - "name": f"{cluster_name}-oauth-config", - } - }, - } - lower_spec["headGroupSpec"]["template"]["spec"]["containers"][0]["env"].append( - cookie_secret_env_var - ) def update_labels(yaml, instascale, instance_types): @@ -291,44 +277,13 @@ def update_ca_secret(ca_secret_item, cluster_name, namespace): data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365) -def enable_local_interactive(resources, cluster_name, namespace): # pragma: no cover - from ..cluster.cluster import _get_ingress_domain - - ca_secret_item = resources["resources"].get("GenericItems")[1] +def enable_local_interactive(resources): # pragma: no cover item = resources["resources"].get("GenericItems")[0] - update_ca_secret(ca_secret_item, cluster_name, namespace) - # update_ca_secret_volumes - item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["volumes"][0][ - "secret" - ]["secretName"] = f"ca-secret-{cluster_name}" - item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][ - "volumes" - ][0]["secret"]["secretName"] = f"ca-secret-{cluster_name}" - # update_tls_env - item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"]["containers"][ - 0 - ]["env"][1]["value"] = "1" - item["generictemplate"]["spec"]["workerGroupSpecs"][0]["template"]["spec"][ - "containers" - ][0]["env"][1]["value"] = "1" - # update_init_container - command = item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][ - "initContainers" - ][0].get("command")[2] - - command = command.replace("deployment-name", cluster_name) - - domain = "" ## FIX - We can't retrieve ingress domain - move init container to CFO - command = command.replace("server-name", domain) item["generictemplate"]["metadata"]["annotations"][ "sdk.codeflare.dev/local_interactive" ] = "True" - item["generictemplate"]["spec"]["headGroupSpec"]["template"]["spec"][ - "initContainers" - ][0].get("command")[2] = command - def del_from_list_by_name(l: list, target: typing.List[str]) -> list: return [x for x in l if x["name"] not in target] @@ -392,75 +347,6 @@ def write_user_appwrapper(user_yaml, output_file_name): print(f"Written to: {output_file_name}") -def enable_openshift_oauth(user_yaml, cluster_name, namespace): - config_check() - k8_client = api_config_handler() or client.ApiClient() - tls_mount_location = "/etc/tls/private" - oauth_port = 8443 - oauth_sa_name = f"{cluster_name}-oauth-proxy" - tls_secret_name = f"{cluster_name}-proxy-tls-secret" - tls_volume_name = "proxy-tls-secret" - port_name = "oauth-proxy" - oauth_sidecar = _create_oauth_sidecar_object( - namespace, - tls_mount_location, - oauth_port, - oauth_sa_name, - tls_volume_name, - port_name, - ) - tls_secret_volume = client.V1Volume( - name=tls_volume_name, - secret=client.V1SecretVolumeSource(secret_name=tls_secret_name), - ) - # allows for setting value of Cluster object when initializing object from an existing AppWrapper on cluster - user_yaml["metadata"]["annotations"] = user_yaml["metadata"].get("annotations", {}) - ray_headgroup_pod = user_yaml["spec"]["resources"]["GenericItems"][0][ - "generictemplate" - ]["spec"]["headGroupSpec"]["template"]["spec"] - ray_headgroup_pod["serviceAccount"] = oauth_sa_name - ray_headgroup_pod["volumes"] = ray_headgroup_pod.get("volumes", []) - - # we use a generic api client here so that the serialization function doesn't need to be mocked for unit tests - ray_headgroup_pod["volumes"].append( - client.ApiClient().sanitize_for_serialization(tls_secret_volume) - ) - ray_headgroup_pod["containers"].append( - client.ApiClient().sanitize_for_serialization(oauth_sidecar) - ) - - -def _create_oauth_sidecar_object( - namespace: str, - tls_mount_location: str, - oauth_port: int, - oauth_sa_name: str, - tls_volume_name: str, - port_name: str, -) -> client.V1Container: - return client.V1Container( - args=[ - f"--https-address=:{oauth_port}", - "--provider=openshift", - f"--openshift-service-account={oauth_sa_name}", - "--upstream=http://localhost:8265", - f"--tls-cert={tls_mount_location}/tls.crt", - f"--tls-key={tls_mount_location}/tls.key", - "--cookie-secret=$(COOKIE_SECRET)", - f'--openshift-delegate-urls={{"/":{{"resource":"pods","namespace":"{namespace}","verb":"get"}}}}', - ], - image="registry.redhat.io/openshift4/ose-oauth-proxy@sha256:1ea6a01bf3e63cdcf125c6064cbd4a4a270deaf0f157b3eabb78f60556840366", - name="oauth-proxy", - ports=[client.V1ContainerPort(container_port=oauth_port, name=port_name)], - resources=client.V1ResourceRequirements(limits=None, requests=None), - volume_mounts=[ - client.V1VolumeMount( - mount_path=tls_mount_location, name=tls_volume_name, read_only=True - ) - ], - ) - - def get_default_kueue_name(namespace: str): # If the local queue is set, use it. Otherwise, try to use the default queue. try: @@ -620,12 +506,13 @@ def generate_appwrapper( ) if local_interactive: - enable_local_interactive(resources, cluster_name, namespace) - else: - disable_raycluster_tls(resources["resources"]) + enable_local_interactive(resources) - if is_openshift_cluster(): - enable_openshift_oauth(user_yaml, cluster_name, namespace) + # else: + # disable_raycluster_tls(resources["resources"]) + + ca_secret_item = resources["resources"].get("GenericItems")[1] + update_ca_secret(ca_secret_item, cluster_name, namespace) directory_path = os.path.expanduser("~/.codeflare/resources/") outfile = os.path.join(directory_path, appwrapper_name + ".yaml") diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index e13752a44..bde0af91b 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -41,20 +41,7 @@ spec: values: - unit-test-cluster-ray containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 imagePullPolicy: Always lifecycle: preStop: @@ -81,6 +68,12 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -136,20 +129,7 @@ spec: values: - unit-test-cluster-ray containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 lifecycle: preStop: exec: @@ -168,6 +148,12 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -197,3 +183,14 @@ spec: name: odh-trusted-ca-bundle optional: true name: odh-ca-cert +--- +apiVersion: v1 +data: + ca.crt: ca-field + ca.key: ca-field +kind: Secret +metadata: + labels: + odh-ray-cluster-service: unit-test-cluster-ray-head-svc + name: ca-secret-unit-test-cluster-ray + namespace: ns diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 10e161dee..b3df12704 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -71,20 +71,7 @@ spec: values: - prio-test-cluster containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 imagePullPolicy: Always lifecycle: preStop: @@ -111,6 +98,12 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -167,20 +160,7 @@ spec: values: - prio-test-cluster containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 lifecycle: preStop: exec: @@ -199,6 +179,12 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -230,4 +216,16 @@ spec: optional: true name: odh-ca-cert replicas: 1 + - generictemplate: + apiVersion: v1 + data: + ca.crt: ca-field + ca.key: ca-field + kind: Secret + metadata: + labels: + odh-ray-cluster-service: prio-test-cluster-head-svc + name: ca-secret-prio-test-cluster + namespace: ns + replicas: 1 Items: [] diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 78d2e4a54..5ff666445 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -70,20 +70,7 @@ spec: values: - unit-test-cluster containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 imagePullPolicy: Always lifecycle: preStop: @@ -110,6 +97,12 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -165,20 +158,7 @@ spec: values: - unit-test-cluster containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 lifecycle: preStop: exec: @@ -197,6 +177,12 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -227,4 +213,16 @@ spec: optional: true name: odh-ca-cert replicas: 1 + - generictemplate: + apiVersion: v1 + data: + ca.crt: ca-field + ca.key: ca-field + kind: Secret + metadata: + labels: + odh-ray-cluster-service: unit-test-cluster-head-svc + name: ca-secret-unit-test-cluster + namespace: ns + replicas: 1 Items: [] diff --git a/tests/test-default-appwrapper.yaml b/tests/test-default-appwrapper.yaml index ecab5eac6..a4d5648c4 100644 --- a/tests/test-default-appwrapper.yaml +++ b/tests/test-default-appwrapper.yaml @@ -59,20 +59,7 @@ spec: template: spec: containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 imagePullPolicy: Always lifecycle: preStop: @@ -99,6 +86,12 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -144,20 +137,7 @@ spec: key: value spec: containers: - - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: RAY_USE_TLS - value: '0' - - name: RAY_TLS_SERVER_CERT - value: /home/ray/workspace/tls/server.crt - - name: RAY_TLS_SERVER_KEY - value: /home/ray/workspace/tls/server.key - - name: RAY_TLS_CA_CERT - value: /home/ray/workspace/tls/ca.crt - image: quay.io/project-codeflare/ray:latest-py39-cu118 + - image: quay.io/project-codeflare/ray:latest-py39-cu118 lifecycle: preStop: exec: @@ -176,6 +156,12 @@ spec: memory: 2G nvidia.com/gpu: 0 volumeMounts: + - mountPath: /home/ray/workspace/ca + name: ca-vol + readOnly: true + - mountPath: /home/ray/workspace/tls + name: server-cert + readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -205,4 +191,16 @@ spec: optional: true name: odh-ca-cert replicas: 1 + - generictemplate: + apiVersion: v1 + data: + ca.crt: ca-field + ca.key: ca-field + kind: Secret + metadata: + labels: + odh-ray-cluster-service: unit-test-default-cluster-head-svc + name: ca-secret-unit-test-default-cluster + namespace: opendatahub + replicas: 1 Items: [] diff --git a/tests/unit_test.py b/tests/unit_test.py index 935cdd100..19a920819 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -75,6 +75,7 @@ is_openshift_cluster, read_template, enable_local_interactive, + write_components, ) import openshift @@ -260,9 +261,65 @@ def test_config_creation(): assert config.local_interactive == False +def ca_secret_support(path, mcad: bool): + # Given that the secret is always random we need to set it to a static value for the tests to pass + if mcad: + with open(path, "r") as file: + try: + yaml_file = yaml.safe_load(file) + except yaml.YAMLError as exc: + print(exc) + resources = yaml_file.get("spec", "resources") + ca_secret_item = resources["resources"].get("GenericItems")[1] + data = ca_secret_item.get("generictemplate", {}).get("data") + data["ca.key"] = "ca-field" + data["ca.crt"] = "ca-field" + with open(path, "w") as outfile: + yaml.dump(yaml_file, outfile, default_flow_style=False) + else: + # Load the YAML file + with open(path, "r") as f: + data = list(yaml.safe_load_all(f)) + + # Find the Secret entry and update the fields + for item in data: + if item.get("kind") == "Secret": + item["data"]["ca.crt"] = "ca-field" + item["data"]["ca.key"] = "ca-field" + break + with open(path, "w") as f: + for item in data: + f.write("---\n") + yaml.dump(item, f, default_flow_style=False) + + +def ca_secret_support_no_write(yaml_file, mcad: bool): + if mcad: + file = yaml.safe_load(yaml_file) + resources = file.get("spec", "resources") + + ca_secret_item = resources["resources"].get("GenericItems")[1] + data = ca_secret_item.get("generictemplate", {}).get("data") + data["ca.key"] = "ca-field" + data["ca.crt"] = "ca-field" + return file + + else: + data = list(yaml.safe_load_all(yaml_file)) + for item in data: + if item.get("kind") == "Secret": + item["data"]["ca.crt"] = "ca-field" + item["data"]["ca.key"] = "ca-field" + break + + resources = "---\n" + "---\n".join([yaml.dump(item) for item in data]) + return resources + + def test_cluster_creation(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") cluster = createClusterWithConfig(mocker) + ca_secret_support(f"{aw_dir}unit-test-cluster.yaml", True) assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster.yaml" assert cluster.app_wrapper_name == "unit-test-cluster" assert filecmp.cmp( @@ -326,8 +383,10 @@ def test_cluster_creation_no_mcad(mocker): config.write_to_file = True config.mcad = False cluster = Cluster(config) + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" assert cluster.app_wrapper_name == "unit-test-cluster-ray" + ca_secret_support(cluster.app_wrapper_yaml, False) assert filecmp.cmp( f"{aw_dir}unit-test-cluster-ray.yaml", f"{parent}/tests/test-case-no-mcad.yamls", @@ -349,6 +408,7 @@ def test_cluster_creation_no_mcad_local_queue(mocker): config.write_to_file = True config.local_queue = "local-queue-default" cluster = Cluster(config) + ca_secret_support(cluster.app_wrapper_yaml, False) assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" assert cluster.app_wrapper_name == "unit-test-cluster-ray" assert filecmp.cmp( @@ -377,7 +437,11 @@ def test_cluster_creation_no_mcad_local_queue(mocker): cluster = Cluster(config) test_resources = [] expected_resources = [] - test_aw = yaml.load_all(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) + + test_aw = yaml.load_all( + ca_secret_support_no_write(cluster.app_wrapper_yaml, False), + Loader=yaml.FullLoader, + ) for resource in test_aw: test_resources.append(resource) with open( @@ -404,6 +468,7 @@ def test_cluster_creation_priority(mocker): return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, ) cluster = Cluster(config) + ca_secret_support(cluster.app_wrapper_yaml, True) assert cluster.app_wrapper_yaml == f"{aw_dir}prio-test-cluster.yaml" assert cluster.app_wrapper_name == "prio-test-cluster" assert filecmp.cmp( @@ -425,7 +490,8 @@ def test_default_cluster_creation(mocker): mcad=True, ) cluster = Cluster(default_config) - test_aw = yaml.safe_load(cluster.app_wrapper_yaml) + test_aw = ca_secret_support_no_write(cluster.app_wrapper_yaml, True) + with open( f"{parent}/tests/test-default-appwrapper.yaml", ) as f: @@ -534,16 +600,12 @@ def test_cluster_up_down(mocker): def test_cluster_up_down_no_mcad(mocker): + mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") + mocker.patch("kubernetes.client.ApisApi.get_api_versions") mocker.patch( "kubernetes.client.CustomObjectsApi.list_namespaced_custom_object", return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"), ) - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch("kubernetes.config.load_kube_config", return_value="ignore") - mocker.patch( - "kubernetes.client.CustomObjectsApi.get_cluster_custom_object", - return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, - ) mocker.patch( "kubernetes.client.CustomObjectsApi.create_namespaced_custom_object", side_effect=arg_check_apply_effect, @@ -552,6 +614,12 @@ def test_cluster_up_down_no_mcad(mocker): "kubernetes.client.CustomObjectsApi.delete_namespaced_custom_object", side_effect=arg_check_del_effect, ) + mocker.patch( + "kubernetes.client.CoreV1Api.create_namespaced_secret", + ) + mocker.patch( + "kubernetes.client.CoreV1Api.delete_namespaced_secret", + ) mocker.patch( "kubernetes.client.CustomObjectsApi.list_cluster_custom_object", return_value={"items": []}, @@ -3128,37 +3196,6 @@ def test_export_env(): # assert ca_secret["metadata"]["name"] == f"ca-secret-{cluster_name}" # assert ca_secret["metadata"]["namespace"] == namespace - -def test_gen_app_wrapper_with_oauth(mocker: MockerFixture): - mocker.patch("kubernetes.client.ApisApi.get_api_versions") - mocker.patch( - "codeflare_sdk.cluster.cluster.get_current_namespace", - return_value="opendatahub", - ) - mocker.patch( - "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=True - ) - write_user_appwrapper = MagicMock() - mocker.patch( - "codeflare_sdk.utils.generate_yaml.write_user_appwrapper", write_user_appwrapper - ) - Cluster( - ClusterConfiguration( - "test_cluster", - image="quay.io/project-codeflare/ray:latest-py39-cu118", - write_to_file=True, - mcad=True, - ) - ) - user_yaml = write_user_appwrapper.call_args.args[0] - assert any( - container["name"] == "oauth-proxy" - for container in user_yaml["spec"]["resources"]["GenericItems"][0][ - "generictemplate" - ]["spec"]["headGroupSpec"]["template"]["spec"]["containers"] - ) - - """ Ray Jobs tests """ From 9cee6115bec478913b9b8758f36c7cff35805c71 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 18 Apr 2024 17:12:03 +0100 Subject: [PATCH 2/5] Review changes --- pyproject.toml | 1 - src/codeflare_sdk.egg-info/PKG-INFO | 2 +- src/codeflare_sdk/utils/generate_yaml.py | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ee50a48c6..c47e466a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,6 @@ pytest-timeout = "2.2.0" [tool.pytest.ini_options] filterwarnings = [ "ignore::DeprecationWarning:pkg_resources", - "ignore::DeprecationWarning", "ignore:pkg_resources is deprecated as an API:DeprecationWarning", ] markers = [ diff --git a/src/codeflare_sdk.egg-info/PKG-INFO b/src/codeflare_sdk.egg-info/PKG-INFO index 27ec5cbfa..c4061c623 100644 --- a/src/codeflare_sdk.egg-info/PKG-INFO +++ b/src/codeflare_sdk.egg-info/PKG-INFO @@ -1,4 +1,4 @@ Metadata-Version: 2.1 -Name: codeflare_sdk +Name: codeflare-sdk Version: 0.0.0 License-File: LICENSE diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 91fd68114..7c00defcf 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -508,9 +508,6 @@ def generate_appwrapper( if local_interactive: enable_local_interactive(resources) - # else: - # disable_raycluster_tls(resources["resources"]) - ca_secret_item = resources["resources"].get("GenericItems")[1] update_ca_secret(ca_secret_item, cluster_name, namespace) From 043fdf29c24afe17f30587c3e5e3497367f5a23c Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Thu, 18 Apr 2024 18:46:51 +0100 Subject: [PATCH 3/5] Remove local_interactive --- docs/cluster-configuration.md | 28 ---- src/codeflare_sdk/cluster/cluster.py | 17 +-- src/codeflare_sdk/cluster/config.py | 1 - .../templates/base-template.yaml | 2 - src/codeflare_sdk/utils/generate_yaml.py | 12 -- tests/test-case-bad.yaml | 2 - tests/test-case-no-mcad.yamls | 2 - tests/test-case-prio.yaml | 2 - tests/test-case.yaml | 2 - tests/test-default-appwrapper.yaml | 2 - tests/unit_test.py | 128 ------------------ 11 files changed, 2 insertions(+), 196 deletions(-) diff --git a/docs/cluster-configuration.md b/docs/cluster-configuration.md index bb058fa4e..7684db2ca 100644 --- a/docs/cluster-configuration.md +++ b/docs/cluster-configuration.md @@ -22,39 +22,11 @@ cluster = Cluster(ClusterConfiguration( image="quay.io/project-codeflare/ray:latest-py39-cu118", # Mandatory Field instascale=False, # Default False machine_types=["m5.xlarge", "g4dn.xlarge"], - ingress_domain="example.com" # Default None, Mandatory for Vanilla Kubernetes Clusters - ingress_domain is ignored on OpenShift Clusters as a route is created. - local_interactive=False, # Default False )) ``` -Note: On OpenShift, the `ingress_domain` is only required when `local_interactive` is enabled. - This may change soon. Upon creating a cluster configuration with `mcad=True` an appwrapper will be created featuring the Ray Cluster and any Routes, Ingresses or Secrets that are needed to be created along side it.
From there a user can call `cluster.up()` and `cluster.down()` to create and remove the appwrapper thus creating and removing the Ray Cluster. In cases where `mcad=False` a yaml file will be created with the individual Ray Cluster, Route/Ingress and Secret included.
The Ray Cluster and service will be created by KubeRay directly and the other components will be individually created. - -## Ray Cluster Configuration in a Vanilla Kubernetes environment (Non-OpenShift) -To create a Ray Cluster using the CodeFlare SDK in a Vanilla Kubernetes environment an `ingress_domain` must be passed in the Cluster Configuration. -This is used for the creation of the Ray Dashboard and Client ingresses. - -`ingress_options` can be passed to create a custom Ray Dashboard ingress, `ingress_domain` is still a required variable for the Client route/ingress. -An example of `ingress_options` would look like this. - -``` -ingress_options = { - "ingresses": [ - { - "ingressName": "", - "port": , - "pathType": "", - "path": "", - "host":"", - "annotations": { - "foo": "bar", - "foo": "bar", - } - } - ] -} -``` diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 24cbf9a71..68474eeaa 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -179,7 +179,6 @@ def create_app_wrapper(self): mcad = self.config.mcad instance_types = self.config.machine_types env = self.config.envs - local_interactive = self.config.local_interactive image_pull_secrets = self.config.image_pull_secrets dispatch_priority = self.config.dispatch_priority write_to_file = self.config.write_to_file @@ -203,7 +202,6 @@ def create_app_wrapper(self): mcad=mcad, instance_types=instance_types, env=env, - local_interactive=local_interactive, image_pull_secrets=image_pull_secrets, dispatch_priority=dispatch_priority, priority_val=priority_val, @@ -479,13 +477,6 @@ def from_k8_cluster_object( verify_tls=True, ): config_check() - if ( - rc["metadata"]["annotations"]["sdk.codeflare.dev/local_interactive"] - == "True" - ): - local_interactive = True - else: - local_interactive = False machine_types = ( rc["metadata"]["labels"]["orderedinstance"].split("_") if "orderedinstance" in rc["metadata"]["labels"] @@ -526,7 +517,6 @@ def from_k8_cluster_object( image=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][ 0 ]["image"], - local_interactive=local_interactive, mcad=mcad, write_to_file=write_to_file, verify_tls=verify_tls, @@ -534,11 +524,8 @@ def from_k8_cluster_object( return Cluster(cluster_config) def local_client_url(self): - if self.config.local_interactive == True: - ingress_domain = _get_ingress_domain(self) - return f"ray://{ingress_domain}" - else: - return "None" + ingress_domain = _get_ingress_domain(self) + return f"ray://{ingress_domain}" def _component_resources_up( self, namespace: str, api_instance: client.CustomObjectsApi diff --git a/src/codeflare_sdk/cluster/config.py b/src/codeflare_sdk/cluster/config.py index f6bcac89c..e4d046f93 100644 --- a/src/codeflare_sdk/cluster/config.py +++ b/src/codeflare_sdk/cluster/config.py @@ -49,7 +49,6 @@ class ClusterConfiguration: mcad: bool = False envs: dict = field(default_factory=dict) image: str = "" - local_interactive: bool = False image_pull_secrets: list = field(default_factory=list) dispatch_priority: str = None write_to_file: bool = False diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index a93ec3a70..d034ab3bc 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -40,8 +40,6 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: "False" labels: workload.codeflare.dev/appwrapper: "aw-kuberay" controller-tools.k8s.io: "1.0" diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 7c00defcf..6d59196ba 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -277,14 +277,6 @@ def update_ca_secret(ca_secret_item, cluster_name, namespace): data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365) -def enable_local_interactive(resources): # pragma: no cover - item = resources["resources"].get("GenericItems")[0] - - item["generictemplate"]["metadata"]["annotations"][ - "sdk.codeflare.dev/local_interactive" - ] = "True" - - def del_from_list_by_name(l: list, target: typing.List[str]) -> list: return [x for x in l if x["name"] not in target] @@ -454,7 +446,6 @@ def generate_appwrapper( mcad: bool, instance_types: list, env, - local_interactive: bool, image_pull_secrets: list, dispatch_priority: str, priority_val: int, @@ -505,9 +496,6 @@ def generate_appwrapper( head_gpus, ) - if local_interactive: - enable_local_interactive(resources) - ca_secret_item = resources["resources"].get("GenericItems")[1] update_ca_secret(ca_secret_item, cluster_name, namespace) diff --git a/tests/test-case-bad.yaml b/tests/test-case-bad.yaml index aeccf5194..6e969e01b 100644 --- a/tests/test-case-bad.yaml +++ b/tests/test-case-bad.yaml @@ -32,8 +32,6 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: 'False' labels: workload.codeflare.dev/appwrapper: unit-test-cluster controller-tools.k8s.io: '1.0' diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index bde0af91b..5ce787b2f 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -2,8 +2,6 @@ apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: 'False' labels: controller-tools.k8s.io: '1.0' kueue.x-k8s.io/queue-name: local-queue-default diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index b3df12704..4c715fe08 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -32,8 +32,6 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: 'False' labels: controller-tools.k8s.io: '1.0' workload.codeflare.dev/appwrapper: prio-test-cluster diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 5ff666445..120f1907c 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -31,8 +31,6 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: 'False' labels: controller-tools.k8s.io: '1.0' workload.codeflare.dev/appwrapper: unit-test-cluster diff --git a/tests/test-default-appwrapper.yaml b/tests/test-default-appwrapper.yaml index a4d5648c4..9a7c581c5 100644 --- a/tests/test-default-appwrapper.yaml +++ b/tests/test-default-appwrapper.yaml @@ -29,8 +29,6 @@ spec: apiVersion: ray.io/v1 kind: RayCluster metadata: - annotations: - sdk.codeflare.dev/local_interactive: 'False' labels: controller-tools.k8s.io: '1.0' workload.codeflare.dev/appwrapper: unit-test-default-cluster diff --git a/tests/unit_test.py b/tests/unit_test.py index 19a920819..74133cc31 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -74,7 +74,6 @@ gen_names, is_openshift_cluster, read_template, - enable_local_interactive, write_components, ) @@ -258,7 +257,6 @@ def test_config_creation(): assert config.image_pull_secrets == ["unit-test-pull-secret"] assert config.dispatch_priority == None assert config.mcad == True - assert config.local_interactive == False def ca_secret_support(path, mcad: bool): @@ -746,7 +744,6 @@ def test_local_client_url(mocker): cluster_config = ClusterConfiguration( name="unit-test-cluster-localinter", namespace="ns", - local_interactive=True, write_to_file=True, ) cluster = Cluster(cluster_config) @@ -1062,9 +1059,6 @@ def get_ray_obj(group, version, namespace, plural, cls=None): "metadata": { "creationTimestamp": "2024-03-05T09:55:37Z", "generation": 1, - "annotations": { - "sdk.codeflare.dev/local_interactive": "True", - }, "labels": { "appwrapper.mcad.ibm.com": "quicktest", "controller-tools.k8s.io": "1.0", @@ -1874,9 +1868,6 @@ def get_aw_obj(group, version, namespace, plural): "apiVersion": "ray.io/v1", "kind": "RayCluster", "metadata": { - "annotations": { - "sdk.codeflare.dev/local_interactive": "False" - }, "labels": { "workload.codeflare.dev/appwrapper": "quicktest1", "controller-tools.k8s.io": "1.0", @@ -2204,9 +2195,6 @@ def get_aw_obj(group, version, namespace, plural): "apiVersion": "ray.io/v1", "kind": "RayCluster", "metadata": { - "annotations": { - "sdk.codeflare.dev/local_interactive": "False" - }, "labels": { "workload.codeflare.dev/appwrapper": "quicktest2", "controller-tools.k8s.io": "1.0", @@ -2518,7 +2506,6 @@ def custom_side_effect(group, version, namespace, plural, **kwargs): assert cluster_config.min_cpus == 1 and cluster_config.max_cpus == 1 assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2 assert cluster_config.num_gpus == 0 - assert cluster_config.local_interactive == True assert ( cluster_config.image == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" @@ -2552,7 +2539,6 @@ def test_get_cluster(mocker): assert cluster_config.min_memory == 2 and cluster_config.max_memory == 2 assert cluster_config.num_gpus == 0 assert cluster_config.instascale - assert cluster_config.local_interactive assert ( cluster_config.image == "ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103" @@ -3082,120 +3068,6 @@ def test_export_env(): ) -# def test_enable_local_interactive(mocker): -# template = f"{parent}/src/codeflare_sdk/templates/base-template.yaml" -# user_yaml = read_template(template) -# aw_spec = user_yaml.get("spec", None) -# cluster_name = "test-enable-local" -# namespace = "default" -# ingress_domain = "mytest.domain" -# mocker.patch("kubernetes.client.ApisApi.get_api_versions") -# mocker.patch( -# "codeflare_sdk.utils.generate_yaml.is_openshift_cluster", return_value=False -# ) -# volume_mounts = [ -# {"name": "ca-vol", "mountPath": "/home/ray/workspace/ca", "readOnly": True}, -# { -# "name": "server-cert", -# "mountPath": "/home/ray/workspace/tls", -# "readOnly": False, -# }, -# ] -# volumes = [ -# { -# "name": "ca-vol", -# "secret": {"secretName": "ca-secret-test-enable-local"}, -# "optional": False, -# }, -# {"name": "server-cert", "emptyDir": {}}, -# { -# "name": "odh-trusted-ca-cert", -# "configMap": { -# "name": "odh-trusted-ca-bundle", -# "items": [ -# {"key": "ca-bundle.crt", "path": "odh-trusted-ca-bundle.crt"} -# ], -# "optional": True, -# }, -# }, -# { -# "name": "odh-ca-cert", -# "configMap": { -# "name": "odh-trusted-ca-bundle", -# "items": [{"key": "odh-ca-bundle.crt", "path": "odh-ca-bundle.crt"}], -# "optional": True, -# }, -# }, -# ] -# tls_env = [ -# {"name": "RAY_USE_TLS", "value": "1"}, -# {"name": "RAY_TLS_SERVER_CERT", "value": "/home/ray/workspace/tls/server.crt"}, -# {"name": "RAY_TLS_SERVER_KEY", "value": "/home/ray/workspace/tls/server.key"}, -# {"name": "RAY_TLS_CA_CERT", "value": "/home/ray/workspace/tls/ca.crt"}, -# ] -# assert aw_spec != None -# enable_local_interactive(aw_spec, cluster_name, namespace, ingress_domain) -# head_group_spec = aw_spec["resources"]["GenericItems"][0]["generictemplate"][ -# "spec" -# ]["headGroupSpec"] -# worker_group_spec = aw_spec["resources"]["GenericItems"][0]["generictemplate"][ -# "spec" -# ]["workerGroupSpecs"] -# ca_secret = aw_spec["resources"]["GenericItems"][1]["generictemplate"] -# # At a minimal, make sure the following items are presented in the appwrapper spec.resources. -# # 1. headgroup has the initContainers command to generated TLS cert from the mounted CA cert. -# # Note: In this particular command, the DNS.5 in [alt_name] must match the exposed local_client_url: rayclient-{cluster_name}.{namespace}.{ingress_domain} -# assert ( -# head_group_spec["template"]["spec"]["initContainers"][0]["command"][2] -# == f"cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf \"authorityKeyIdentifier=keyid,issuer\\nbasicConstraints=CA:FALSE\\nsubjectAltName = @alt_names\\n[alt_names]\\nDNS.1 = 127.0.0.1\\nDNS.2 = localhost\\nDNS.3 = ${{FQ_RAY_IP}}\\nDNS.4 = $(awk 'END{{print $1}}' /etc/hosts)\\nDNS.5 = rayclient-{cluster_name}-$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).{ingress_domain}\">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext" -# ) -# assert ( -# head_group_spec["template"]["spec"]["initContainers"][0]["volumeMounts"] -# == volume_mounts -# ) -# assert head_group_spec["template"]["spec"]["volumes"] == volumes - -# # 2. workerGroupSpec has the initContainers command to generated TLS cert from the mounted CA cert. -# assert ( -# worker_group_spec[0]["template"]["spec"]["initContainers"][0]["command"][2] -# == "cd /home/ray/workspace/tls && openssl req -nodes -newkey rsa:2048 -keyout server.key -out server.csr -subj '/CN=ray-head' && printf \"authorityKeyIdentifier=keyid,issuer\\nbasicConstraints=CA:FALSE\\nsubjectAltName = @alt_names\\n[alt_names]\\nDNS.1 = 127.0.0.1\\nDNS.2 = localhost\\nDNS.3 = ${FQ_RAY_IP}\\nDNS.4 = $(awk 'END{print $1}' /etc/hosts)\">./domain.ext && cp /home/ray/workspace/ca/* . && openssl x509 -req -CA ca.crt -CAkey ca.key -in server.csr -out server.crt -days 365 -CAcreateserial -extfile domain.ext" -# ) -# assert ( -# worker_group_spec[0]["template"]["spec"]["initContainers"][0]["volumeMounts"] -# == volume_mounts -# ) -# assert worker_group_spec[0]["template"]["spec"]["volumes"] == volumes - -# # 3. Required Envs to enable TLS encryption between head and workers -# for i in range(len(tls_env)): -# assert ( -# head_group_spec["template"]["spec"]["containers"][0]["env"][i + 1]["name"] -# == tls_env[i]["name"] -# ) -# assert ( -# head_group_spec["template"]["spec"]["containers"][0]["env"][i + 1]["value"] -# == tls_env[i]["value"] -# ) -# assert ( -# worker_group_spec[0]["template"]["spec"]["containers"][0]["env"][i + 1][ -# "name" -# ] -# == tls_env[i]["name"] -# ) -# assert ( -# worker_group_spec[0]["template"]["spec"]["containers"][0]["env"][i + 1][ -# "value" -# ] -# == tls_env[i]["value"] -# ) - -# # 4. Secret with ca.crt and ca.key -# assert ca_secret["kind"] == "Secret" -# assert ca_secret["data"]["ca.crt"] != None -# assert ca_secret["data"]["ca.key"] != None -# assert ca_secret["metadata"]["name"] == f"ca-secret-{cluster_name}" -# assert ca_secret["metadata"]["namespace"] == namespace - """ Ray Jobs tests """ From 930bfb800d1b4bfde90ac60fe48c847c9b7fb1b7 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 19 Apr 2024 13:52:54 +0100 Subject: [PATCH 4/5] Removed ca generation from SDK --- src/codeflare_sdk/cluster/cluster.py | 13 --- .../templates/base-template.yaml | 12 --- src/codeflare_sdk/utils/generate_yaml.py | 60 ------------- tests/test-case-no-mcad.yamls | 11 --- tests/test-case-prio.yaml | 12 --- tests/test-case.yaml | 12 --- tests/test-default-appwrapper.yaml | 12 --- tests/unit_test.py | 84 ++----------------- 8 files changed, 8 insertions(+), 208 deletions(-) diff --git a/src/codeflare_sdk/cluster/cluster.py b/src/codeflare_sdk/cluster/cluster.py index 68474eeaa..295332ae4 100644 --- a/src/codeflare_sdk/cluster/cluster.py +++ b/src/codeflare_sdk/cluster/cluster.py @@ -665,13 +665,6 @@ def _delete_resources( plural="rayclusters", name=name, ) - elif resource["kind"] == "Secret": - name = resource["metadata"]["name"] - secret_instance = client.CoreV1Api(api_config_handler()) - secret_instance.delete_namespaced_secret( - namespace=namespace, - name=name, - ) def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsApi): @@ -684,12 +677,6 @@ def _create_resources(yamls, namespace: str, api_instance: client.CustomObjectsA plural="rayclusters", body=resource, ) - elif resource["kind"] == "Secret": - secret_instance = client.CoreV1Api(api_config_handler()) - secret_instance.create_namespaced_secret( - namespace=namespace, - body=resource, - ) def _check_aw_exists(name: str, namespace: str) -> bool: diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index d034ab3bc..2b83a8a5d 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -264,15 +264,3 @@ spec: - key: odh-ca-bundle.crt path: odh-ca-bundle.crt optional: true - - replicas: 1 - generictemplate: - apiVersion: v1 - data: - ca.crt: generated_crt - ca.key: generated_key - kind: Secret - metadata: - name: ca-secret-deployment-name - labels: - # allows me to return name of service that Ray operator creates - odh-ray-cluster-service: deployment-name-head-svc diff --git a/src/codeflare_sdk/utils/generate_yaml.py b/src/codeflare_sdk/utils/generate_yaml.py index 6d59196ba..95c3d04f0 100755 --- a/src/codeflare_sdk/utils/generate_yaml.py +++ b/src/codeflare_sdk/utils/generate_yaml.py @@ -266,67 +266,10 @@ def update_nodes( update_resources(spec, min_cpu, max_cpu, min_memory, max_memory, gpu) -def update_ca_secret(ca_secret_item, cluster_name, namespace): - from . import generate_cert - - metadata = ca_secret_item.get("generictemplate", {}).get("metadata") - metadata["name"] = f"ca-secret-{cluster_name}" - metadata["namespace"] = namespace - metadata["labels"]["odh-ray-cluster-service"] = f"{cluster_name}-head-svc" - data = ca_secret_item.get("generictemplate", {}).get("data") - data["ca.key"], data["ca.crt"] = generate_cert.generate_ca_cert(365) - - def del_from_list_by_name(l: list, target: typing.List[str]) -> list: return [x for x in l if x["name"] not in target] -def disable_raycluster_tls(resources): - generic_template_spec = resources["GenericItems"][0]["generictemplate"]["spec"] - - headGroupTemplateSpec = generic_template_spec["headGroupSpec"]["template"]["spec"] - headGroupTemplateSpec["volumes"] = del_from_list_by_name( - headGroupTemplateSpec.get("volumes", []), - ["ca-vol", "server-cert"], - ) - - c: dict - for c in generic_template_spec["headGroupSpec"]["template"]["spec"]["containers"]: - c["volumeMounts"] = del_from_list_by_name( - c.get("volumeMounts", []), ["ca-vol", "server-cert"] - ) - - if "initContainers" in generic_template_spec["headGroupSpec"]["template"]["spec"]: - del generic_template_spec["headGroupSpec"]["template"]["spec"]["initContainers"] - - for workerGroup in generic_template_spec.get("workerGroupSpecs", []): - workerGroupSpec = workerGroup["template"]["spec"] - workerGroupSpec["volumes"] = del_from_list_by_name( - workerGroupSpec.get("volumes", []), - ["ca-vol", "server-cert"], - ) - for c in workerGroup["template"]["spec"].get("containers", []): - c["volumeMounts"] = del_from_list_by_name( - c.get("volumeMounts", []), ["ca-vol", "server-cert"] - ) - - del generic_template_spec["workerGroupSpecs"][0]["template"]["spec"][ - "initContainers" - ] - - updated_items = [] - for i in resources["GenericItems"][:]: - if "rayclient-deployment-ingress" in i["generictemplate"]["metadata"]["name"]: - continue - if "rayclient-deployment-route" in i["generictemplate"]["metadata"]["name"]: - continue - if "ca-secret-deployment-name" in i["generictemplate"]["metadata"]["name"]: - continue - updated_items.append(i) - - resources["GenericItems"] = updated_items - - def write_user_appwrapper(user_yaml, output_file_name): # Create the directory if it doesn't exist directory_path = os.path.dirname(output_file_name) @@ -496,9 +439,6 @@ def generate_appwrapper( head_gpus, ) - ca_secret_item = resources["resources"].get("GenericItems")[1] - update_ca_secret(ca_secret_item, cluster_name, namespace) - directory_path = os.path.expanduser("~/.codeflare/resources/") outfile = os.path.join(directory_path, appwrapper_name + ".yaml") diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index 5ce787b2f..1883e9c5c 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -181,14 +181,3 @@ spec: name: odh-trusted-ca-bundle optional: true name: odh-ca-cert ---- -apiVersion: v1 -data: - ca.crt: ca-field - ca.key: ca-field -kind: Secret -metadata: - labels: - odh-ray-cluster-service: unit-test-cluster-ray-head-svc - name: ca-secret-unit-test-cluster-ray - namespace: ns diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 4c715fe08..13485ed07 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -214,16 +214,4 @@ spec: optional: true name: odh-ca-cert replicas: 1 - - generictemplate: - apiVersion: v1 - data: - ca.crt: ca-field - ca.key: ca-field - kind: Secret - metadata: - labels: - odh-ray-cluster-service: prio-test-cluster-head-svc - name: ca-secret-prio-test-cluster - namespace: ns - replicas: 1 Items: [] diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 120f1907c..060c354c6 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -211,16 +211,4 @@ spec: optional: true name: odh-ca-cert replicas: 1 - - generictemplate: - apiVersion: v1 - data: - ca.crt: ca-field - ca.key: ca-field - kind: Secret - metadata: - labels: - odh-ray-cluster-service: unit-test-cluster-head-svc - name: ca-secret-unit-test-cluster - namespace: ns - replicas: 1 Items: [] diff --git a/tests/test-default-appwrapper.yaml b/tests/test-default-appwrapper.yaml index 9a7c581c5..14261d0ce 100644 --- a/tests/test-default-appwrapper.yaml +++ b/tests/test-default-appwrapper.yaml @@ -189,16 +189,4 @@ spec: optional: true name: odh-ca-cert replicas: 1 - - generictemplate: - apiVersion: v1 - data: - ca.crt: ca-field - ca.key: ca-field - kind: Secret - metadata: - labels: - odh-ray-cluster-service: unit-test-default-cluster-head-svc - name: ca-secret-unit-test-default-cluster - namespace: opendatahub - replicas: 1 Items: [] diff --git a/tests/unit_test.py b/tests/unit_test.py index 74133cc31..3892f8e57 100644 --- a/tests/unit_test.py +++ b/tests/unit_test.py @@ -259,65 +259,9 @@ def test_config_creation(): assert config.mcad == True -def ca_secret_support(path, mcad: bool): - # Given that the secret is always random we need to set it to a static value for the tests to pass - if mcad: - with open(path, "r") as file: - try: - yaml_file = yaml.safe_load(file) - except yaml.YAMLError as exc: - print(exc) - resources = yaml_file.get("spec", "resources") - ca_secret_item = resources["resources"].get("GenericItems")[1] - data = ca_secret_item.get("generictemplate", {}).get("data") - data["ca.key"] = "ca-field" - data["ca.crt"] = "ca-field" - with open(path, "w") as outfile: - yaml.dump(yaml_file, outfile, default_flow_style=False) - else: - # Load the YAML file - with open(path, "r") as f: - data = list(yaml.safe_load_all(f)) - - # Find the Secret entry and update the fields - for item in data: - if item.get("kind") == "Secret": - item["data"]["ca.crt"] = "ca-field" - item["data"]["ca.key"] = "ca-field" - break - with open(path, "w") as f: - for item in data: - f.write("---\n") - yaml.dump(item, f, default_flow_style=False) - - -def ca_secret_support_no_write(yaml_file, mcad: bool): - if mcad: - file = yaml.safe_load(yaml_file) - resources = file.get("spec", "resources") - - ca_secret_item = resources["resources"].get("GenericItems")[1] - data = ca_secret_item.get("generictemplate", {}).get("data") - data["ca.key"] = "ca-field" - data["ca.crt"] = "ca-field" - return file - - else: - data = list(yaml.safe_load_all(yaml_file)) - for item in data: - if item.get("kind") == "Secret": - item["data"]["ca.crt"] = "ca-field" - item["data"]["ca.key"] = "ca-field" - break - - resources = "---\n" + "---\n".join([yaml.dump(item) for item in data]) - return resources - - def test_cluster_creation(mocker): mocker.patch("kubernetes.client.ApisApi.get_api_versions") cluster = createClusterWithConfig(mocker) - ca_secret_support(f"{aw_dir}unit-test-cluster.yaml", True) assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster.yaml" assert cluster.app_wrapper_name == "unit-test-cluster" assert filecmp.cmp( @@ -384,7 +328,6 @@ def test_cluster_creation_no_mcad(mocker): assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" assert cluster.app_wrapper_name == "unit-test-cluster-ray" - ca_secret_support(cluster.app_wrapper_yaml, False) assert filecmp.cmp( f"{aw_dir}unit-test-cluster-ray.yaml", f"{parent}/tests/test-case-no-mcad.yamls", @@ -406,7 +349,6 @@ def test_cluster_creation_no_mcad_local_queue(mocker): config.write_to_file = True config.local_queue = "local-queue-default" cluster = Cluster(config) - ca_secret_support(cluster.app_wrapper_yaml, False) assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" assert cluster.app_wrapper_name == "unit-test-cluster-ray" assert filecmp.cmp( @@ -428,27 +370,18 @@ def test_cluster_creation_no_mcad_local_queue(mocker): machine_types=["cpu.small", "gpu.large"], image_pull_secrets=["unit-test-pull-secret"], image="quay.io/project-codeflare/ray:latest-py39-cu118", - write_to_file=False, + write_to_file=True, mcad=False, local_queue="local-queue-default", ) cluster = Cluster(config) - test_resources = [] - expected_resources = [] - - test_aw = yaml.load_all( - ca_secret_support_no_write(cluster.app_wrapper_yaml, False), - Loader=yaml.FullLoader, - ) - for resource in test_aw: - test_resources.append(resource) - with open( + assert cluster.app_wrapper_yaml == f"{aw_dir}unit-test-cluster-ray.yaml" + assert cluster.app_wrapper_name == "unit-test-cluster-ray" + assert filecmp.cmp( + f"{aw_dir}unit-test-cluster-ray.yaml", f"{parent}/tests/test-case-no-mcad.yamls", - ) as f: - default_aw = yaml.load_all(f, Loader=yaml.FullLoader) - for resource in default_aw: - expected_resources.append(resource) - assert test_resources == expected_resources + shallow=True, + ) def test_cluster_creation_priority(mocker): @@ -466,7 +399,6 @@ def test_cluster_creation_priority(mocker): return_value={"spec": {"domain": "apps.cluster.awsroute.org"}}, ) cluster = Cluster(config) - ca_secret_support(cluster.app_wrapper_yaml, True) assert cluster.app_wrapper_yaml == f"{aw_dir}prio-test-cluster.yaml" assert cluster.app_wrapper_name == "prio-test-cluster" assert filecmp.cmp( @@ -488,7 +420,7 @@ def test_default_cluster_creation(mocker): mcad=True, ) cluster = Cluster(default_config) - test_aw = ca_secret_support_no_write(cluster.app_wrapper_yaml, True) + test_aw = yaml.load(cluster.app_wrapper_yaml, Loader=yaml.FullLoader) with open( f"{parent}/tests/test-default-appwrapper.yaml", From cf371dfe1e6a0c6976023a39f9f93db81736ce14 Mon Sep 17 00:00:00 2001 From: Bobbins228 Date: Fri, 19 Apr 2024 15:23:31 +0100 Subject: [PATCH 5/5] Removed ca volume mounts --- src/codeflare_sdk/templates/base-template.yaml | 12 ------------ tests/test-case-no-mcad.yamls | 12 ------------ tests/test-case-prio.yaml | 12 ------------ tests/test-case.yaml | 12 ------------ tests/test-default-appwrapper.yaml | 12 ------------ 5 files changed, 60 deletions(-) diff --git a/src/codeflare_sdk/templates/base-template.yaml b/src/codeflare_sdk/templates/base-template.yaml index 2b83a8a5d..356e3494e 100644 --- a/src/codeflare_sdk/templates/base-template.yaml +++ b/src/codeflare_sdk/templates/base-template.yaml @@ -139,12 +139,6 @@ spec: memory: "8G" nvidia.com/gpu: 0 volumeMounts: - - name: ca-vol - mountPath: "/home/ray/workspace/ca" - readOnly: true - - name: server-cert - mountPath: "/home/ray/workspace/tls" - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -231,12 +225,6 @@ spec: memory: "12G" nvidia.com/gpu: "1" volumeMounts: - - name: ca-vol - mountPath: "/home/ray/workspace/ca" - readOnly: true - - name: server-cert - mountPath: "/home/ray/workspace/tls" - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt diff --git a/tests/test-case-no-mcad.yamls b/tests/test-case-no-mcad.yamls index 1883e9c5c..aaf9324e6 100644 --- a/tests/test-case-no-mcad.yamls +++ b/tests/test-case-no-mcad.yamls @@ -66,12 +66,6 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -146,12 +140,6 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt diff --git a/tests/test-case-prio.yaml b/tests/test-case-prio.yaml index 13485ed07..a4d6e68f2 100644 --- a/tests/test-case-prio.yaml +++ b/tests/test-case-prio.yaml @@ -96,12 +96,6 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -177,12 +171,6 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt diff --git a/tests/test-case.yaml b/tests/test-case.yaml index 060c354c6..b97d12a49 100644 --- a/tests/test-case.yaml +++ b/tests/test-case.yaml @@ -95,12 +95,6 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -175,12 +169,6 @@ spec: memory: 5G nvidia.com/gpu: 7 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt diff --git a/tests/test-default-appwrapper.yaml b/tests/test-default-appwrapper.yaml index 14261d0ce..c390f619b 100644 --- a/tests/test-default-appwrapper.yaml +++ b/tests/test-default-appwrapper.yaml @@ -84,12 +84,6 @@ spec: memory: 8G nvidia.com/gpu: 0 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt @@ -154,12 +148,6 @@ spec: memory: 2G nvidia.com/gpu: 0 volumeMounts: - - mountPath: /home/ray/workspace/ca - name: ca-vol - readOnly: true - - mountPath: /home/ray/workspace/tls - name: server-cert - readOnly: true - mountPath: /etc/pki/tls/certs/odh-trusted-ca-bundle.crt name: odh-trusted-ca-cert subPath: odh-trusted-ca-bundle.crt