Skip to content

Commit 6f60dfc

Browse files
ChristianZaccariaSrihari1192
authored andcommitted
Added local_interactive to cluster-configuration doc
1 parent 587f4c4 commit 6f60dfc

File tree

3 files changed

+83
-79
lines changed

3 files changed

+83
-79
lines changed

docs/cluster-configuration.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ cluster = Cluster(ClusterConfiguration(
2323
instascale=False, # Default False
2424
machine_types=["m5.xlarge", "g4dn.xlarge"],
2525
ingress_domain="example.com" # Default None, Mandatory for Vanilla Kubernetes Clusters - ingress_domain is ignored on OpenShift Clusters as a route is created.
26+
local_interactive=False, # Default False
2627
))
2728
```
29+
Note: On OpenShift, the `ingress_domain` is only required when `local_interactive` is enabled. - This may change soon.
2830

2931
Upon creating a cluster configuration with `mcad=True` an appwrapper will be created featuring the Ray Cluster and any Routes, Ingresses or Secrets that are needed to be created along side it.<br>
3032
From there a user can call `cluster.up()` and `cluster.down()` to create and remove the appwrapper thus creating and removing the Ray Cluster.
@@ -36,7 +38,7 @@ The Ray Cluster and service will be created by KubeRay directly and the other co
3638
To create a Ray Cluster using the CodeFlare SDK in a Vanilla Kubernetes environment an `ingress_domain` must be passed in the Cluster Configuration.
3739
This is used for the creation of the Ray Dashboard and Client ingresses.
3840

39-
`ingress_options` can be passed to create a custom Ray Dashboard ingress, `ingress_domain` is still a required variable for the Client ingress.
41+
`ingress_options` can be passed to create a custom Ray Dashboard ingress, `ingress_domain` is still a required variable for the Client route/ingress.
4042
An example of `ingress_options` would look like this.
4143

4244
```
Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22
import os
3+
import requests
34

45
from time import sleep
56

@@ -9,6 +10,63 @@
910
from codeflare_sdk.job.jobs import DDPJobDefinition
1011
from codeflare_sdk.cluster.auth import TokenAuthentication
1112

13+
14+
def submit_ddp_job(cluster, cluster_token, server_url):
15+
auth = TokenAuthentication(token=cluster_token, server=server_url, skip_tls=True)
16+
auth.login()
17+
18+
jobdef = DDPJobDefinition(
19+
name="mnist",
20+
script="mnist.py",
21+
scheduler_args={"requirements": "requirements.txt"},
22+
)
23+
job = jobdef.submit(cluster)
24+
25+
done = False
26+
time = 0
27+
timeout = 900
28+
while not done:
29+
status = job.status()
30+
if is_terminal(status.state):
31+
break
32+
if not done:
33+
print(status)
34+
if timeout and time >= timeout:
35+
raise TimeoutError(f"job has timed out after waiting {timeout}s")
36+
sleep(5)
37+
time += 5
38+
39+
print(f"Job has completed: {status.state}")
40+
print(job.logs())
41+
cluster.down()
42+
43+
if not status.state == AppState.SUCCEEDED:
44+
exit(1)
45+
else:
46+
exit(0)
47+
48+
49+
def submit_job_post_request(cluster, dashboard_url):
50+
jobdata = {
51+
"entrypoint": "python mnist.py",
52+
"runtime_env": {"working_dir": "workdir", "pip": "requirements.txt"},
53+
}
54+
try:
55+
response = requests.post(
56+
dashboard_url + "/api/jobs/", verify=False, json=jobdata
57+
)
58+
cluster.down()
59+
if response.status_code == 403:
60+
exit(0)
61+
else:
62+
response.raise_for_status()
63+
exit(1)
64+
65+
except Exception as e:
66+
print(f"An unexpected error occurred: {e}")
67+
exit(1)
68+
69+
1270
namespace = sys.argv[1]
1371
ray_image = os.getenv("RAY_IMAGE")
1472
isAuthLoginEnable = os.getenv("OAUTH_LOGIN")
@@ -33,10 +91,6 @@
3391
)
3492
)
3593

36-
if isAuthLoginEnable == "true":
37-
auth = TokenAuthentication(token=cluster_token, server=server_url, skip_tls=True)
38-
auth.login()
39-
4094
cluster.up()
4195

4296
cluster.status()
@@ -47,34 +101,8 @@
47101

48102
cluster.details()
49103

50-
jobdef = DDPJobDefinition(
51-
name="mnist",
52-
script="mnist.py",
53-
scheduler_args={"requirements": "requirements.txt"},
54-
)
55-
job = jobdef.submit(cluster)
56-
57-
done = False
58-
time = 0
59-
timeout = 900
60-
while not done:
61-
status = job.status()
62-
if is_terminal(status.state):
63-
break
64-
if not done:
65-
print(status)
66-
if timeout and time >= timeout:
67-
raise TimeoutError(f"job has timed out after waiting {timeout}s")
68-
sleep(5)
69-
time += 5
70-
71-
print(f"Job has completed: {status.state}")
72-
73-
print(job.logs())
74-
75-
cluster.down()
76-
77-
if not status.state == AppState.SUCCEEDED:
78-
exit(1)
104+
if isAuthLoginEnable == "true":
105+
submit_ddp_job(cluster, cluster_token, server_url)
79106
else:
80-
exit(0)
107+
dashboard_url = cluster.cluster_dashboard_uri()
108+
submit_job_post_request(cluster, dashboard_url)

tests/e2e/mnist_raycluster_sdk_auth_test.go

Lines changed: 18 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,8 @@ limitations under the License.
1717
package e2e
1818

1919
import (
20-
"bytes"
2120
"testing"
22-
"time"
2321

24-
"github.com/onsi/gomega"
2522
. "github.com/onsi/gomega"
2623
. "github.com/project-codeflare/codeflare-common/support"
2724
mcadv1beta1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1"
@@ -33,7 +30,7 @@ import (
3330
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3431
)
3532

36-
// This test covers the Ray Cluster creation authentication functionality of openshift_oauth without login and assert the warning only authorised users can submit jobs
33+
// This test covers the Ray Cluster creation authentication functionality of openshift_oauth without login and assert the Unauthorized 403 error
3734
func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
3835
test := With(t)
3936
test.T().Parallel()
@@ -44,6 +41,7 @@ func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
4441

4542
// Create a namespace
4643
namespace := test.NewTestNamespace()
44+
4745
// Test configuration
4846
config := CreateConfigMap(test, namespace.Name, map[string][]byte{
4947
// SDK script
@@ -69,7 +67,7 @@ func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
6967
Resources: []string{"rayclusters", "rayclusters/status"},
7068
},
7169
{
72-
Verbs: []string{"get", "create", "delete", "list", "patch", "update"},
70+
Verbs: []string{"get", "list", "create", "update", "delete"},
7371
APIGroups: []string{"route.openshift.io"},
7472
Resources: []string{"routes"},
7573
},
@@ -83,13 +81,7 @@ func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
8381
// Create cluster wide RBAC, required for SDK OpenShift Oauth check
8482
clusterPolicyRules := []rbacv1.PolicyRule{
8583
{
86-
Verbs: []string{"get", "list"},
87-
APIGroups: []string{"config.openshift.io"},
88-
Resources: []string{"ingresses"},
89-
ResourceNames: []string{"cluster"},
90-
},
91-
{
92-
Verbs: []string{"create", "update"},
84+
Verbs: []string{"create", "update", "delete"},
9385
APIGroups: []string{""},
9486
Resources: []string{"services", "serviceaccounts"},
9587
},
@@ -108,6 +100,11 @@ func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
108100
APIGroups: []string{"authentication.k8s.io"},
109101
Resources: []string{"tokenreviews"},
110102
},
103+
{
104+
Verbs: []string{"get"},
105+
APIGroups: []string{""},
106+
Resources: []string{"pods"},
107+
},
111108
}
112109
sa := CreateServiceAccount(test, namespace.Name)
113110
role := CreateRole(test, namespace.Name, policyRules)
@@ -210,10 +207,16 @@ func TestMNISTRayClusterSDKWithoutAuthLogin(t *testing.T) {
210207
// Setup the codeflare-sdk inside the pod associated to the created job
211208
SetupCodeflareSDKInsidePod(test, namespace, job.Name)
212209

213-
test.T().Logf("Checking pod logs contains insecure requests warning")
214-
// Assert the job pod as warning of insecurerequests
215-
assertPodLogs(test, namespace, job.Name, "InsecureRequestWarning: Unverified HTTPS request is being made to host", 3*time.Minute)
210+
test.T().Logf("Waiting for Job %s/%s to complete", job.Namespace, job.Name)
211+
test.Eventually(Job(test, job.Namespace, job.Name), TestTimeoutLong).Should(
212+
Or(
213+
WithTransform(ConditionStatus(batchv1.JobComplete), Equal(corev1.ConditionTrue)),
214+
WithTransform(ConditionStatus(batchv1.JobFailed), Equal(corev1.ConditionTrue)),
215+
))
216216

217+
// Assert the job has completed successfully
218+
test.Expect(GetJob(test, job.Namespace, job.Name)).
219+
To(WithTransform(ConditionStatus(batchv1.JobComplete), Equal(corev1.ConditionTrue)))
217220
}
218221

219222
// This test covers the Ray Cluster creation authentication functionality with openshift_oauth login and check for Ray job is successfull
@@ -264,12 +267,6 @@ func TestMNISTRayClusterSDKWithAuthLogin(t *testing.T) {
264267

265268
// Create cluster wide RBAC, required for SDK OpenShift Oauth check
266269
clusterPolicyRules := []rbacv1.PolicyRule{
267-
{
268-
Verbs: []string{"get", "list"},
269-
APIGroups: []string{"config.openshift.io"},
270-
Resources: []string{"ingresses"},
271-
ResourceNames: []string{"cluster"},
272-
},
273270
{
274271
Verbs: []string{"create", "update", "delete"},
275272
APIGroups: []string{""},
@@ -414,26 +411,3 @@ func TestMNISTRayClusterSDKWithAuthLogin(t *testing.T) {
414411
test.Expect(GetJob(test, job.Namespace, job.Name)).
415412
To(WithTransform(ConditionStatus(batchv1.JobComplete), Equal(corev1.ConditionTrue)))
416413
}
417-
418-
func assertPodLogs(test Test, namespace *corev1.Namespace, labelName string, expectedLogSubstring string, timeout time.Duration) {
419-
420-
// Get pod
421-
pod := GetPod(test, namespace, labelName)
422-
423-
startTime := time.Now()
424-
for {
425-
// Get the current pod logs
426-
podLogs := GetPodLogs(test, pod, corev1.PodLogOptions{})
427-
428-
// Check if the expected log substring is present
429-
if bytes.Contains(podLogs, []byte(expectedLogSubstring)) {
430-
test.Expect(podLogs).To(gomega.ContainSubstring(expectedLogSubstring))
431-
break
432-
}
433-
434-
if time.Since(startTime) > timeout {
435-
test.T().Fatal("Timeout waiting for expected Incorrect Requests Warning log not found!")
436-
}
437-
time.Sleep(5 * time.Second)
438-
}
439-
}

0 commit comments

Comments
 (0)