Change demo notebook to use new job submission

KPostOffice · KPostOffice · commit 86e2e6f2112c · 2023-02-21T15:10:59.000-05:00
Signed-off-by: Kevin &lt;kpostlet@redhat.com&gt;
diff --git a/demo-notebooks/batch-job/mnisttest.yaml b/demo-notebooks/batch-job/mnisttest.yaml
@@ -0,0 +1,148 @@
+apiVersion: mcad.ibm.com/v1beta1
+kind: AppWrapper
+metadata:
+  name: mnisttest
+  namespace: default
+spec:
+  priority: 9
+  resources:
+    GenericItems:
+    - custompodresources:
+      - limits:
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
+        replicas: 1
+        requests:
+          cpu: 2
+          memory: 8G
+          nvidia.com/gpu: 0
+      - limits:
+          cpu: 1
+          memory: 1G
+          nvidia.com/gpu: 0
+        replicas: 2
+        requests:
+          cpu: 1
+          memory: 1G
+          nvidia.com/gpu: 0
+      generictemplate:
+        apiVersion: ray.io/v1alpha1
+        kind: RayCluster
+        metadata:
+          labels:
+            appwrapper.mcad.ibm.com: mnisttest
+            controller-tools.k8s.io: '1.0'
+          name: mnisttest
+          namespace: default
+        spec:
+          autoscalerOptions:
+            idleTimeoutSeconds: 60
+            imagePullPolicy: Always
+            resources:
+              limits:
+                cpu: 500m
+                memory: 512Mi
+              requests:
+                cpu: 500m
+                memory: 512Mi
+            upscalingMode: Default
+          enableInTreeAutoscaling: false
+          headGroupSpec:
+            rayStartParams:
+              block: 'true'
+              dashboard-host: 0.0.0.0
+              num-gpus: '0'
+            serviceType: ClusterIP
+            template:
+              spec:
+                containers:
+                - image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
+                  imagePullPolicy: Always
+                  lifecycle:
+                    preStop:
+                      exec:
+                        command:
+                        - /bin/sh
+                        - -c
+                        - ray stop
+                  name: ray-head
+                  ports:
+                  - containerPort: 6379
+                    name: gcs
+                  - containerPort: 8265
+                    name: dashboard
+                  - containerPort: 10001
+                    name: client
+                  resources:
+                    limits:
+                      cpu: 2
+                      memory: 8G
+                      nvidia.com/gpu: 0
+                    requests:
+                      cpu: 2
+                      memory: 8G
+                      nvidia.com/gpu: 0
+          rayVersion: 1.12.0
+          workerGroupSpecs:
+          - groupName: small-group-mnisttest
+            maxReplicas: 2
+            minReplicas: 2
+            rayStartParams:
+              block: 'true'
+              num-gpus: '0'
+            replicas: 2
+            template:
+              metadata:
+                annotations:
+                  key: value
+                labels:
+                  key: value
+              spec:
+                containers:
+                - env:
+                  - name: MY_POD_IP
+                    valueFrom:
+                      fieldRef:
+                        fieldPath: status.podIP
+                  image: ghcr.io/foundation-model-stack/base:ray2.1.0-py38-gpu-pytorch1.12.0cu116-20221213-193103
+                  lifecycle:
+                    preStop:
+                      exec:
+                        command:
+                        - /bin/sh
+                        - -c
+                        - ray stop
+                  name: machine-learning
+                  resources:
+                    limits:
+                      cpu: 1
+                      memory: 1G
+                      nvidia.com/gpu: 0
+                    requests:
+                      cpu: 1
+                      memory: 1G
+                      nvidia.com/gpu: 0
+                initContainers:
+                - command:
+                  - sh
+                  - -c
+                  - until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local;
+                    do echo waiting for myservice; sleep 2; done
+                  image: quay.io/kpostlet/busybox:1.28
+                  name: init-myservice
+      replicas: 1
+    - generictemplate:
+        apiVersion: route.openshift.io/v1
+        kind: Route
+        metadata:
+          labels:
+            odh-ray-cluster-service: mnisttest-head-svc
+          name: ray-dashboard-mnisttest
+          namespace: default
+        spec:
+          to:
+            kind: Service
+            name: mnisttest-head-svc
+      replica: 1
+    Items: []