Skip to content

Commit 86e2e6f

Browse files
committed
Change demo notebook to use new job submission
Signed-off-by: Kevin <kpostlet@redhat.com>
1 parent dccad02 commit 86e2e6f

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
apiVersion: mcad.ibm.com/v1beta1
2+
kind: AppWrapper
3+
metadata:
4+
name: mnisttest
5+
namespace: default
6+
spec:
7+
priority: 9
8+
resources:
9+
GenericItems:
10+
- custompodresources:
11+
- limits:
12+
cpu: 2
13+
memory: 8G
14+
nvidia.com/gpu: 0
15+
replicas: 1
16+
requests:
17+
cpu: 2
18+
memory: 8G
19+
nvidia.com/gpu: 0
20+
- limits:
21+
cpu: 1
22+
memory: 1G
23+
nvidia.com/gpu: 0
24+
replicas: 2
25+
requests:
26+
cpu: 1
27+
memory: 1G
28+
nvidia.com/gpu: 0
29+
generictemplate:
30+
apiVersion: ray.io/v1alpha1
31+
kind: RayCluster
32+
metadata:
33+
labels:
34+
appwrapper.mcad.ibm.com: mnisttest
35+
controller-tools.k8s.io: '1.0'
36+
name: mnisttest
37+
namespace: default
38+
spec:
39+
autoscalerOptions:
40+
idleTimeoutSeconds: 60
41+
imagePullPolicy: Always
42+
resources:
43+
limits:
44+
cpu: 500m
45+
memory: 512Mi
46+
requests:
47+
cpu: 500m
48+
memory: 512Mi
49+
upscalingMode: Default
50+
enableInTreeAutoscaling: false
51+
headGroupSpec:
52+
rayStartParams:
53+
block: 'true'
54+
dashboard-host: 0.0.0.0
55+
num-gpus: '0'
56+
serviceType: ClusterIP
57+
template:
58+
spec:
59+
containers:
60+
- image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
61+
imagePullPolicy: Always
62+
lifecycle:
63+
preStop:
64+
exec:
65+
command:
66+
- /bin/sh
67+
- -c
68+
- ray stop
69+
name: ray-head
70+
ports:
71+
- containerPort: 6379
72+
name: gcs
73+
- containerPort: 8265
74+
name: dashboard
75+
- containerPort: 10001
76+
name: client
77+
resources:
78+
limits:
79+
cpu: 2
80+
memory: 8G
81+
nvidia.com/gpu: 0
82+
requests:
83+
cpu: 2
84+
memory: 8G
85+
nvidia.com/gpu: 0
86+
rayVersion: 1.12.0
87+
workerGroupSpecs:
88+
- groupName: small-group-mnisttest
89+
maxReplicas: 2
90+
minReplicas: 2
91+
rayStartParams:
92+
block: 'true'
93+
num-gpus: '0'
94+
replicas: 2
95+
template:
96+
metadata:
97+
annotations:
98+
key: value
99+
labels:
100+
key: value
101+
spec:
102+
containers:
103+
- env:
104+
- name: MY_POD_IP
105+
valueFrom:
106+
fieldRef:
107+
fieldPath: status.podIP
108+
image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
109+
lifecycle:
110+
preStop:
111+
exec:
112+
command:
113+
- /bin/sh
114+
- -c
115+
- ray stop
116+
name: machine-learning
117+
resources:
118+
limits:
119+
cpu: 1
120+
memory: 1G
121+
nvidia.com/gpu: 0
122+
requests:
123+
cpu: 1
124+
memory: 1G
125+
nvidia.com/gpu: 0
126+
initContainers:
127+
- command:
128+
- sh
129+
- -c
130+
- until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
131+
do echo waiting for myservice; sleep 2; done
132+
image: quay.io/kpostlet/busybox:1.28
133+
name: init-myservice
134+
replicas: 1
135+
- generictemplate:
136+
apiVersion: route.openshift.io/v1
137+
kind: Route
138+
metadata:
139+
labels:
140+
odh-ray-cluster-service: mnisttest-head-svc
141+
name: ray-dashboard-mnisttest
142+
namespace: default
143+
spec:
144+
to:
145+
kind: Service
146+
name: mnisttest-head-svc
147+
replica: 1
148+
Items: []

0 commit comments

Comments
 (0)