From 05469cb719b3576d4167a8769e0bf5b1d0bb3968 Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 20 May 2025 09:21:43 +0100 Subject: [PATCH 1/2] fix: port forward for local interactive tests --- .github/workflows/e2e_tests.yaml | 8 ++- tests/e2e/local_interactive_sdk_kind_test.py | 59 +++++++++++++++----- tests/e2e/support.py | 2 - 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index fca6d6e72..cb6d4b8d1 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -104,8 +104,12 @@ jobs: kubectl create clusterrolebinding sdk-user-localqueue-creator --clusterrole=localqueue-creator --user=sdk-user kubectl create clusterrole list-secrets --verb=get,list --resource=secrets kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user - kubectl create clusterrole pod-creator --verb=get,list --resource=pods + kubectl create clusterrole pod-creator --verb=get,list,watch --resource=pods kubectl create clusterrolebinding sdk-user-pod-creator --clusterrole=pod-creator --user=sdk-user + kubectl create clusterrole service-reader --verb=get,list,watch --resource=services + kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user + kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward + kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user kubectl config use-context sdk-user - name: Run e2e tests @@ -117,7 +121,7 @@ jobs: pip install poetry poetry install --with test,docs echo "Running e2e tests..." - poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 + poetry run pytest -v -s --log-cli-level=INFO ./tests/e2e/local_interactive_sdk_kind_test.py::TestRayLocalInteractiveKind::test_local_interactives_nvidia_gpu > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 env: GRPC_DNS_RESOLVER: "native" diff --git a/tests/e2e/local_interactive_sdk_kind_test.py b/tests/e2e/local_interactive_sdk_kind_test.py index c20fd8793..eb5066db0 100644 --- a/tests/e2e/local_interactive_sdk_kind_test.py +++ b/tests/e2e/local_interactive_sdk_kind_test.py @@ -1,23 +1,31 @@ from codeflare_sdk import ( Cluster, ClusterConfiguration, - TokenAuthentication, generate_cert, ) import pytest import ray import math +import subprocess from support import * @pytest.mark.kind -class TestRayLocalInteractiveOauth: +class TestRayLocalInteractiveKind: def setup_method(self): initialize_kubernetes_client(self) + self.port_forward_process = None + + def cleanup_port_forward(self): + if self.port_forward_process: + self.port_forward_process.terminate() + self.port_forward_process.wait(timeout=10) + self.port_forward_process = None def teardown_method(self): + self.cleanup_port_forward() delete_namespace(self) delete_kueue_resources(self) @@ -39,6 +47,8 @@ def run_local_interactives( ): cluster_name = "test-ray-cluster-li" + ray.shutdown() + cluster = Cluster( ClusterConfiguration( name=cluster_name, @@ -49,25 +59,24 @@ def run_local_interactives( head_memory_requests=2, head_memory_limits=2, worker_cpu_requests="500m", - worker_cpu_limits=1, + worker_cpu_limits="500m", worker_memory_requests=1, worker_memory_limits=4, worker_extended_resource_requests={gpu_resource_name: number_of_gpus}, - write_to_file=True, verify_tls=False, ) ) + cluster.up() + cluster.wait_ready() + cluster.status() generate_cert.generate_tls_cert(cluster_name, self.namespace) generate_cert.export_env(cluster_name, self.namespace) print(cluster.local_client_url()) - ray.shutdown() - ray.init(address=cluster.local_client_url(), logging_level="DEBUG") - @ray.remote(num_gpus=number_of_gpus / 2) def heavy_calculation_part(num_iterations): result = 0.0 @@ -84,10 +93,34 @@ def heavy_calculation(num_iterations): ) return sum(results) - ref = heavy_calculation.remote(3000) - result = ray.get(ref) - assert result == 1789.4644387076714 - ray.cancel(ref) - ray.shutdown() + # Attempt to port forward + try: + local_port = "20001" + ray_client_port = "10001" + + port_forward_cmd = [ + "kubectl", + "port-forward", + "-n", + self.namespace, + f"svc/{cluster_name}-head-svc", + f"{local_port}:{ray_client_port}", + ] + self.port_forward_process = subprocess.Popen( + port_forward_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + + client_url = f"ray://localhost:{local_port}" + cluster.status() + + ray.init(address=client_url, logging_level="INFO") + + ref = heavy_calculation.remote(3000) + result = ray.get(ref) + assert result == 1789.4644387076714 + ray.cancel(ref) + ray.shutdown() - cluster.down() + cluster.down() + finally: + self.cleanup_port_forward() diff --git a/tests/e2e/support.py b/tests/e2e/support.py index d7bee8054..46dd4e703 100644 --- a/tests/e2e/support.py +++ b/tests/e2e/support.py @@ -1,11 +1,9 @@ -import json import os import random import string import subprocess from codeflare_sdk import get_cluster from kubernetes import client, config -import kubernetes.client from codeflare_sdk.common.kubernetes_cluster.kube_api_helpers import ( _kube_api_error_handling, ) From 7ec22e2a7d4bb7268872aface7cae269b18da1cf Mon Sep 17 00:00:00 2001 From: kryanbeane Date: Tue, 3 Jun 2025 16:27:18 +0100 Subject: [PATCH 2/2] fix: run all E2Es --- .github/workflows/e2e_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e_tests.yaml b/.github/workflows/e2e_tests.yaml index cb6d4b8d1..fc80af56f 100644 --- a/.github/workflows/e2e_tests.yaml +++ b/.github/workflows/e2e_tests.yaml @@ -121,7 +121,7 @@ jobs: pip install poetry poetry install --with test,docs echo "Running e2e tests..." - poetry run pytest -v -s --log-cli-level=INFO ./tests/e2e/local_interactive_sdk_kind_test.py::TestRayLocalInteractiveKind::test_local_interactives_nvidia_gpu > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 + poetry run pytest -v -s ./tests/e2e -m 'kind and nvidia_gpu' > ${CODEFLARE_TEST_OUTPUT_DIR}/pytest_output.log 2>&1 env: GRPC_DNS_RESOLVER: "native"